[CKTILE] Support A/B Quantization in Blockscale Grouped Gemm (#3452)

* update grouped_gemm blockwise kernel * update config * update kernel * update examples * remove test code for now * sync test files with origin/develop * update example * fix code lint * fix code-lint * update test code * run clang format * run pre-commit * update api
2026-04-20 06:49:15 +00:00 · 2026-01-07 04:36:04 +08:00
parent 2309c86054
commit 76696ace44
11 changed files with 1798 additions and 3 deletions
--- a/include/ck_tile/ops/gemm_quant/kernel/grouped_gemm_quant_kernel.hpp
+++ b/include/ck_tile/ops/gemm_quant/kernel/grouped_gemm_quant_kernel.hpp
@@ -484,6 +484,17 @@ struct QuantGroupedGemmKernel
                                                          tail_num,
                                                          smem_ptr);
            }
+            else if constexpr(kQuantType == QuantType::ABQuantGrouped)
+            {
+                return GemmPipeline{}.template operator()(a_block_window,
+                                                          b_block_window,
+                                                          aq_block_window,
+                                                          bq_block_window,
+                                                          num_loop,
+                                                          has_hot_loop,
+                                                          tail_num,
+                                                          smem_ptr);
+            }
            else if constexpr(kQuantType == QuantType::RowColQuant ||
                              kQuantType == QuantType::TensorQuant)
            {
@@ -499,7 +510,8 @@ struct QuantGroupedGemmKernel
                c_ptr, kargs, block_idx_m, block_idx_n);

            if constexpr(kQuantType == QuantType::AQuantGrouped ||
-                         kQuantType == QuantType::BQuantGrouped)
+                         kQuantType == QuantType::BQuantGrouped ||
+                         kQuantType == QuantType::ABQuantGrouped)
            {
                EpiloguePipeline{}(c_block_window, c_block_tile, c_block_window, smem_ptr);
            }
@@ -527,7 +539,8 @@ struct QuantGroupedGemmKernel
                    c_ptr, kargs, block_idx_m, block_idx_n);

            if constexpr(kQuantType == QuantType::AQuantGrouped ||
-                         kQuantType == QuantType::BQuantGrouped)
+                         kQuantType == QuantType::BQuantGrouped ||
+                         kQuantType == QuantType::ABQuantGrouped)
            {
                EpiloguePipeline{}(c_block_window, c_block_tile, c_block_window, smem_ptr);
            }