enable fp4 for universal gemm - without any scaling

This commit is contained in:
Sami Remes
2026-02-03 03:10:35 -05:00
parent 4d241289c9
commit b47853d3fe
8 changed files with 205 additions and 113 deletions

View File

@@ -182,17 +182,23 @@ int run_gemm_example(ck_tile::ArgParser& arg_parser)
std::string a_layout = arg_parser.get_str("a_layout");
std::string b_layout = arg_parser.get_str("b_layout");
if(data_type == "fp16")
if(data_type == "fp4")
{
return run_gemm_example_prec_type_universal<GemmConfig<ck_tile::half_t>, ck_tile::half_t>(
return run_gemm_example_prec_type_universal<GemmConfig<ck_tile::pk_fp4_t>, ck_tile::pk_fp4_t, ck_tile::pk_fp4_t, ck_tile::half_t>(
a_layout, b_layout, arg_parser);
}
else if(data_type == "bf16")
{
return run_gemm_example_prec_type_universal<GemmConfig<ck_tile::bf16_t>, ck_tile::bf16_t>(
a_layout, b_layout, arg_parser);
}
else if(data_type == "fp8")
// if(data_type == "fp16")
// {
// return run_gemm_example_prec_type_universal<GemmConfig<ck_tile::half_t>, ck_tile::half_t>(
// a_layout, b_layout, arg_parser);
// }
// else if(data_type == "bf16")
// {
// return run_gemm_example_prec_type_universal<GemmConfig<ck_tile::bf16_t>, ck_tile::bf16_t>(
// a_layout, b_layout, arg_parser);
// }
else
if(data_type == "fp8")
{
return run_gemm_example_prec_type_universal<GemmConfig<ck_tile::fp8_t>,
ck_tile::fp8_t,
@@ -200,68 +206,68 @@ int run_gemm_example(ck_tile::ArgParser& arg_parser)
ck_tile::half_t>(
a_layout, b_layout, arg_parser);
}
else if(data_type == "bf8")
{
return run_gemm_example_prec_type_universal<GemmConfig<ck_tile::bf8_t>,
ck_tile::bf8_t,
ck_tile::bf8_t,
ck_tile::half_t>(
a_layout, b_layout, arg_parser);
}
else if(data_type == "int8")
{
return run_gemm_example_prec_type_universal<GemmConfig<ck_tile::int8_t>,
ck_tile::int8_t,
ck_tile::int8_t,
ck_tile::int32_t>(
a_layout, b_layout, arg_parser);
}
else if(data_type == "fp16i4")
{
// TODO: Add support for bhalf_t ADataType
if constexpr(GemmConfig<ck_tile::half_t>::Pipeline == ck_tile::GemmPipeline::COMPUTE_V3)
{
return run_gemm_example_prec_type_universal<GemmConfig<ck_tile::half_t>,
ck_tile::half_t,
ck_tile::pk_int4_t,
ck_tile::half_t>(
a_layout, b_layout, arg_parser);
}
else
{
throw std::runtime_error("Unsupported pipeline for this operation !!!");
}
}
else if(data_type == "fp8i4")
{
if constexpr(GemmConfig<ck_tile::fp8_t>::Pipeline == ck_tile::GemmPipeline::COMPUTE_V3)
{
return run_gemm_example_prec_type_universal<GemmConfig<ck_tile::fp8_t>,
ck_tile::fp8_t,
ck_tile::pk_int4_t,
ck_tile::half_t>(
a_layout, b_layout, arg_parser);
}
else
{
throw std::runtime_error("Unsupported pipeline for this operation !!!");
}
}
else if(data_type == "bf8i4")
{
if constexpr(GemmConfig<ck_tile::bf8_t>::Pipeline == ck_tile::GemmPipeline::COMPUTE_V3)
{
return run_gemm_example_prec_type_universal<GemmConfig<ck_tile::bf8_t>,
ck_tile::bf8_t,
ck_tile::pk_int4_t,
ck_tile::half_t>(
a_layout, b_layout, arg_parser);
}
else
{
throw std::runtime_error("Unsupported pipeline for this operation !!!");
}
}
// else if(data_type == "bf8")
// {
// return run_gemm_example_prec_type_universal<GemmConfig<ck_tile::bf8_t>,
// ck_tile::bf8_t,
// ck_tile::bf8_t,
// ck_tile::half_t>(
// a_layout, b_layout, arg_parser);
// }
// else if(data_type == "int8")
// {
// return run_gemm_example_prec_type_universal<GemmConfig<ck_tile::int8_t>,
// ck_tile::int8_t,
// ck_tile::int8_t,
// ck_tile::int32_t>(
// a_layout, b_layout, arg_parser);
// }
// else if(data_type == "fp16i4")
// {
// // TODO: Add support for bhalf_t ADataType
// if constexpr(GemmConfig<ck_tile::half_t>::Pipeline == ck_tile::GemmPipeline::COMPUTE_V3)
// {
// return run_gemm_example_prec_type_universal<GemmConfig<ck_tile::half_t>,
// ck_tile::half_t,
// ck_tile::pk_int4_t,
// ck_tile::half_t>(
// a_layout, b_layout, arg_parser);
// }
// else
// {
// throw std::runtime_error("Unsupported pipeline for this operation !!!");
// }
// }
// else if(data_type == "fp8i4")
// {
// if constexpr(GemmConfig<ck_tile::fp8_t>::Pipeline == ck_tile::GemmPipeline::COMPUTE_V3)
// {
// return run_gemm_example_prec_type_universal<GemmConfig<ck_tile::fp8_t>,
// ck_tile::fp8_t,
// ck_tile::pk_int4_t,
// ck_tile::half_t>(
// a_layout, b_layout, arg_parser);
// }
// else
// {
// throw std::runtime_error("Unsupported pipeline for this operation !!!");
// }
// }
// else if(data_type == "bf8i4")
// {
// if constexpr(GemmConfig<ck_tile::bf8_t>::Pipeline == ck_tile::GemmPipeline::COMPUTE_V3)
// {
// return run_gemm_example_prec_type_universal<GemmConfig<ck_tile::bf8_t>,
// ck_tile::bf8_t,
// ck_tile::pk_int4_t,
// ck_tile::half_t>(
// a_layout, b_layout, arg_parser);
// }
// else
// {
// throw std::runtime_error("Unsupported pipeline for this operation !!!");
// }
// }
else
{
throw std::runtime_error("Unsupported data type for this operation !!!");
@@ -281,7 +287,8 @@ int main(int argc, char* argv[])
#if CK_TILE_USE_WMMA
return !run_gemm_example<GemmConfigComputeV3_WMMA>(arg_parser);
#else
return !run_gemm_example<GemmConfigComputeV3_2>(arg_parser);
return !run_gemm_example<GemmConfigComputeAsync>(arg_parser);
// return !run_gemm_example<GemmConfigComputeV3_3>(arg_parser);
#endif
}
catch(const std::runtime_error& e)