[CKTILE] Support A/B Quantization in Blockscale Grouped Gemm (#3452)

* update grouped_gemm blockwise kernel

* update config

* update kernel

* update examples

* remove test code for now

* sync test files with origin/develop

* update example

* fix code lint

* fix code-lint

* update test code

* run clang format

* run pre-commit

* update api
This commit is contained in:
kyle-256
2026-01-07 04:36:04 +08:00
committed by GitHub
parent 2309c86054
commit 76696ace44
11 changed files with 1798 additions and 3 deletions

View File

@@ -484,6 +484,17 @@ struct QuantGroupedGemmKernel
tail_num,
smem_ptr);
}
else if constexpr(kQuantType == QuantType::ABQuantGrouped)
{
return GemmPipeline{}.template operator()(a_block_window,
b_block_window,
aq_block_window,
bq_block_window,
num_loop,
has_hot_loop,
tail_num,
smem_ptr);
}
else if constexpr(kQuantType == QuantType::RowColQuant ||
kQuantType == QuantType::TensorQuant)
{
@@ -499,7 +510,8 @@ struct QuantGroupedGemmKernel
c_ptr, kargs, block_idx_m, block_idx_n);
if constexpr(kQuantType == QuantType::AQuantGrouped ||
kQuantType == QuantType::BQuantGrouped)
kQuantType == QuantType::BQuantGrouped ||
kQuantType == QuantType::ABQuantGrouped)
{
EpiloguePipeline{}(c_block_window, c_block_tile, c_block_window, smem_ptr);
}
@@ -527,7 +539,8 @@ struct QuantGroupedGemmKernel
c_ptr, kargs, block_idx_m, block_idx_n);
if constexpr(kQuantType == QuantType::AQuantGrouped ||
kQuantType == QuantType::BQuantGrouped)
kQuantType == QuantType::BQuantGrouped ||
kQuantType == QuantType::ABQuantGrouped)
{
EpiloguePipeline{}(c_block_window, c_block_tile, c_block_window, smem_ptr);
}