mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-06-29 19:28:33 +00:00
bf16 add NPadding for library
This commit is contained in:
@@ -25,6 +25,11 @@ void add_device_gemm_multiply_multiply_weight_preshuffle_xdl_f8_f8_bf16_mk_mfma1
|
||||
instances,
|
||||
device_gemm_multiply_multiply_weight_preshuffle_xdl_f8_f8_bf16_mk_mfma16x16_mn_compute_instances_p1<
|
||||
GemmKPadding>{});
|
||||
|
||||
add_device_operation_instances(
|
||||
instances,
|
||||
device_gemm_multiply_multiply_weight_preshuffle_xdl_f8_f8_bf16_mk_mfma16x16_mn_compute_instances_p1<
|
||||
GemmNPadding>{});
|
||||
}
|
||||
|
||||
} // namespace instance
|
||||
|
||||
@@ -25,6 +25,11 @@ void add_device_gemm_multiply_multiply_weight_preshuffle_xdl_f8_f8_bf16_mk_mfma1
|
||||
instances,
|
||||
device_gemm_multiply_multiply_weight_preshuffle_xdl_f8_f8_bf16_mk_mfma16x16_mn_compute_instances_p2<
|
||||
GemmKPadding>{});
|
||||
|
||||
add_device_operation_instances(
|
||||
instances,
|
||||
device_gemm_multiply_multiply_weight_preshuffle_xdl_f8_f8_bf16_mk_mfma16x16_mn_compute_instances_p2<
|
||||
GemmNPadding>{});
|
||||
}
|
||||
|
||||
} // namespace instance
|
||||
|
||||
@@ -25,6 +25,11 @@ void add_device_gemm_multiply_multiply_weight_preshuffle_xdl_f8_f8_bf16_mk_mfma1
|
||||
instances,
|
||||
device_gemm_multiply_multiply_weight_preshuffle_xdl_f8_f8_bf16_mk_mfma16x16_mn_compute_instances_p3<
|
||||
GemmKPadding>{});
|
||||
|
||||
add_device_operation_instances(
|
||||
instances,
|
||||
device_gemm_multiply_multiply_weight_preshuffle_xdl_f8_f8_bf16_mk_mfma16x16_mn_compute_instances_p3<
|
||||
GemmNPadding>{});
|
||||
}
|
||||
|
||||
} // namespace instance
|
||||
|
||||
@@ -25,6 +25,11 @@ void add_device_gemm_multiply_multiply_weight_preshuffle_xdl_f8_f8_bf16_mk_mfma1
|
||||
instances,
|
||||
device_gemm_multiply_multiply_weight_preshuffle_xdl_f8_f8_bf16_mk_mfma16x16_mn_compute_instances_p4<
|
||||
GemmKPadding>{});
|
||||
|
||||
add_device_operation_instances(
|
||||
instances,
|
||||
device_gemm_multiply_multiply_weight_preshuffle_xdl_f8_f8_bf16_mk_mfma16x16_mn_compute_instances_p4<
|
||||
GemmNPadding>{});
|
||||
}
|
||||
|
||||
} // namespace instance
|
||||
|
||||
@@ -25,6 +25,11 @@ void add_device_gemm_multiply_multiply_weight_preshuffle_xdl_f8_f8_bf16_mk_mfma1
|
||||
instances,
|
||||
device_gemm_multiply_multiply_weight_preshuffle_xdl_f8_f8_bf16_mk_mfma16x16_mn_compute_instances_p5<
|
||||
GemmKPadding>{});
|
||||
|
||||
add_device_operation_instances(
|
||||
instances,
|
||||
device_gemm_multiply_multiply_weight_preshuffle_xdl_f8_f8_bf16_mk_mfma16x16_mn_compute_instances_p5<
|
||||
GemmNPadding>{});
|
||||
}
|
||||
|
||||
} // namespace instance
|
||||
|
||||
@@ -25,6 +25,11 @@ void add_device_gemm_multiply_multiply_weight_preshuffle_xdl_f8_f8_bf16_mk_mfma1
|
||||
instances,
|
||||
device_gemm_multiply_multiply_weight_preshuffle_xdl_f8_f8_bf16_mk_mfma16x16_mn_compute_instances_p6<
|
||||
GemmKPadding>{});
|
||||
|
||||
add_device_operation_instances(
|
||||
instances,
|
||||
device_gemm_multiply_multiply_weight_preshuffle_xdl_f8_f8_bf16_mk_mfma16x16_mn_compute_instances_p6<
|
||||
GemmNPadding>{});
|
||||
}
|
||||
|
||||
} // namespace instance
|
||||
|
||||
@@ -28,6 +28,7 @@ using MultiplyMultiply = element_wise::MultiplyMultiply;
|
||||
|
||||
static constexpr auto GemmDefault = GemmSpecialization::Default;
|
||||
static constexpr auto GemmKPadding = GemmSpecialization::KPadding;
|
||||
static constexpr auto GemmNPadding = GemmSpecialization::NPadding;
|
||||
static constexpr auto GemmNKPadding = GemmSpecialization::NKPadding;
|
||||
static constexpr auto GemmMNPadding = GemmSpecialization::MNPadding;
|
||||
static constexpr auto GemmMNKPadding = GemmSpecialization::MNKPadding;
|
||||
|
||||
@@ -25,6 +25,11 @@ void add_device_gemm_multiply_multiply_weight_preshuffle_xdl_f8_f8_bf16_mk_mfma_
|
||||
instances,
|
||||
device_gemm_multiply_multiply_weight_preshuffle_xdl_f8_f8_bf16_mk_mfma_mn_compute_instances_p1<
|
||||
GemmKPadding>{});
|
||||
|
||||
add_device_operation_instances(
|
||||
instances,
|
||||
device_gemm_multiply_multiply_weight_preshuffle_xdl_f8_f8_bf16_mk_mfma_mn_compute_instances_p1<
|
||||
GemmNPadding>{});
|
||||
}
|
||||
|
||||
} // namespace instance
|
||||
|
||||
@@ -25,6 +25,11 @@ void add_device_gemm_multiply_multiply_weight_preshuffle_xdl_f8_f8_bf16_mk_mfma_
|
||||
instances,
|
||||
device_gemm_multiply_multiply_weight_preshuffle_xdl_f8_f8_bf16_mk_mfma_mn_compute_instances_p2<
|
||||
GemmKPadding>{});
|
||||
|
||||
add_device_operation_instances(
|
||||
instances,
|
||||
device_gemm_multiply_multiply_weight_preshuffle_xdl_f8_f8_bf16_mk_mfma_mn_compute_instances_p2<
|
||||
GemmNPadding>{});
|
||||
}
|
||||
|
||||
} // namespace instance
|
||||
|
||||
@@ -26,6 +26,12 @@ void add_device_gemm_multiply_multiply_weight_preshuffle_xdl_f8_f8_bf16_mk_mfma_
|
||||
device_gemm_multiply_multiply_weight_preshuffle_xdl_f8_f8_bf16_mk_mfma_mn_p1_instances<
|
||||
v1,
|
||||
GemmKPadding>{});
|
||||
|
||||
add_device_operation_instances(
|
||||
instances,
|
||||
device_gemm_multiply_multiply_weight_preshuffle_xdl_f8_f8_bf16_mk_mfma_mn_p1_instances<
|
||||
v1,
|
||||
GemmNPadding>{});
|
||||
}
|
||||
|
||||
} // namespace instance
|
||||
|
||||
@@ -26,6 +26,12 @@ void add_device_gemm_multiply_multiply_weight_preshuffle_xdl_f8_f8_bf16_mk_mfma_
|
||||
device_gemm_multiply_multiply_weight_preshuffle_xdl_f8_f8_bf16_mk_mfma_mn_p1_instances<
|
||||
v2,
|
||||
GemmKPadding>{});
|
||||
|
||||
add_device_operation_instances(
|
||||
instances,
|
||||
device_gemm_multiply_multiply_weight_preshuffle_xdl_f8_f8_bf16_mk_mfma_mn_p1_instances<
|
||||
v2,
|
||||
GemmNPadding>{});
|
||||
}
|
||||
|
||||
} // namespace instance
|
||||
|
||||
@@ -26,6 +26,12 @@ void add_device_gemm_multiply_multiply_weight_preshuffle_xdl_f8_f8_bf16_mk_mfma_
|
||||
device_gemm_multiply_multiply_weight_preshuffle_xdl_f8_f8_bf16_mk_mfma_mn_p2_instances<
|
||||
v1,
|
||||
GemmKPadding>{});
|
||||
|
||||
add_device_operation_instances(
|
||||
instances,
|
||||
device_gemm_multiply_multiply_weight_preshuffle_xdl_f8_f8_bf16_mk_mfma_mn_p2_instances<
|
||||
v1,
|
||||
GemmNPadding>{});
|
||||
}
|
||||
|
||||
} // namespace instance
|
||||
|
||||
@@ -26,6 +26,12 @@ void add_device_gemm_multiply_multiply_weight_preshuffle_xdl_f8_f8_bf16_mk_mfma_
|
||||
device_gemm_multiply_multiply_weight_preshuffle_xdl_f8_f8_bf16_mk_mfma_mn_p2_instances<
|
||||
v2,
|
||||
GemmKPadding>{});
|
||||
|
||||
add_device_operation_instances(
|
||||
instances,
|
||||
device_gemm_multiply_multiply_weight_preshuffle_xdl_f8_f8_bf16_mk_mfma_mn_p2_instances<
|
||||
v2,
|
||||
GemmNPadding>{});
|
||||
}
|
||||
|
||||
} // namespace instance
|
||||
|
||||
@@ -26,6 +26,12 @@ void add_device_gemm_multiply_multiply_weight_preshuffle_xdl_f8_f8_bf16_mk_mfma_
|
||||
device_gemm_multiply_multiply_weight_preshuffle_xdl_f8_f8_bf16_mk_mfma_mn_p3_instances<
|
||||
v1,
|
||||
GemmKPadding>{});
|
||||
|
||||
add_device_operation_instances(
|
||||
instances,
|
||||
device_gemm_multiply_multiply_weight_preshuffle_xdl_f8_f8_bf16_mk_mfma_mn_p3_instances<
|
||||
v1,
|
||||
GemmNPadding>{});
|
||||
}
|
||||
|
||||
} // namespace instance
|
||||
|
||||
@@ -26,6 +26,12 @@ void add_device_gemm_multiply_multiply_weight_preshuffle_xdl_f8_f8_bf16_mk_mfma_
|
||||
device_gemm_multiply_multiply_weight_preshuffle_xdl_f8_f8_bf16_mk_mfma_mn_p3_instances<
|
||||
v2,
|
||||
GemmKPadding>{});
|
||||
|
||||
add_device_operation_instances(
|
||||
instances,
|
||||
device_gemm_multiply_multiply_weight_preshuffle_xdl_f8_f8_bf16_mk_mfma_mn_p3_instances<
|
||||
v2,
|
||||
GemmNPadding>{});
|
||||
}
|
||||
|
||||
} // namespace instance
|
||||
|
||||
@@ -26,6 +26,12 @@ void add_device_gemm_multiply_multiply_weight_preshuffle_xdl_f8_f8_bf16_mk_mfma_
|
||||
device_gemm_multiply_multiply_weight_preshuffle_xdl_f8_f8_bf16_mk_mfma_mn_p4_instances<
|
||||
v1,
|
||||
GemmKPadding>{});
|
||||
|
||||
add_device_operation_instances(
|
||||
instances,
|
||||
device_gemm_multiply_multiply_weight_preshuffle_xdl_f8_f8_bf16_mk_mfma_mn_p4_instances<
|
||||
v1,
|
||||
GemmNPadding>{});
|
||||
}
|
||||
|
||||
} // namespace instance
|
||||
|
||||
@@ -26,6 +26,12 @@ void add_device_gemm_multiply_multiply_weight_preshuffle_xdl_f8_f8_bf16_mk_mfma_
|
||||
device_gemm_multiply_multiply_weight_preshuffle_xdl_f8_f8_bf16_mk_mfma_mn_p4_instances<
|
||||
v2,
|
||||
GemmKPadding>{});
|
||||
|
||||
add_device_operation_instances(
|
||||
instances,
|
||||
device_gemm_multiply_multiply_weight_preshuffle_xdl_f8_f8_bf16_mk_mfma_mn_p4_instances<
|
||||
v2,
|
||||
GemmNPadding>{});
|
||||
}
|
||||
|
||||
} // namespace instance
|
||||
|
||||
@@ -26,6 +26,12 @@ void add_device_gemm_multiply_multiply_weight_preshuffle_xdl_f8_f8_bf16_mk_mfma_
|
||||
device_gemm_multiply_multiply_weight_preshuffle_xdl_f8_f8_bf16_mk_mfma_mn_p5_instances<
|
||||
v1,
|
||||
GemmKPadding>{});
|
||||
|
||||
add_device_operation_instances(
|
||||
instances,
|
||||
device_gemm_multiply_multiply_weight_preshuffle_xdl_f8_f8_bf16_mk_mfma_mn_p5_instances<
|
||||
v1,
|
||||
GemmNPadding>{});
|
||||
}
|
||||
|
||||
} // namespace instance
|
||||
|
||||
@@ -26,6 +26,12 @@ void add_device_gemm_multiply_multiply_weight_preshuffle_xdl_f8_f8_bf16_mk_mfma_
|
||||
device_gemm_multiply_multiply_weight_preshuffle_xdl_f8_f8_bf16_mk_mfma_mn_p5_instances<
|
||||
v2,
|
||||
GemmKPadding>{});
|
||||
|
||||
add_device_operation_instances(
|
||||
instances,
|
||||
device_gemm_multiply_multiply_weight_preshuffle_xdl_f8_f8_bf16_mk_mfma_mn_p5_instances<
|
||||
v2,
|
||||
GemmNPadding>{});
|
||||
}
|
||||
|
||||
} // namespace instance
|
||||
|
||||
Reference in New Issue
Block a user