From 61c79a8d0bdd9a22c7f2023944de543dcfd0b0a7 Mon Sep 17 00:00:00 2001 From: "assistant-librarian[bot]" Date: Wed, 13 Aug 2025 05:13:48 +0000 Subject: [PATCH] Merge commit '452791a3bacbadb95774c071cc1f9c3495b04187' into develop --- .../38_block_scale_gemm/CMakeLists.txt | 3 + .../38_block_scale_gemm/gemm_aquant_basic.cpp | 30 ++- .../gemm_aquant_preshuffle.cpp | 238 ++++++++++++++++++ .../38_block_scale_gemm/gemm_utils.hpp | 103 +++----- .../run_gemm_aquant_example.inc | 71 ++++-- .../block_universal_gemm_as_aquant_bs_cr.hpp | 198 ++++++++++----- .../kernel/gemm_aquant_kernel.hpp | 128 ++++++++-- .../gemm_aquant_pipeline_ag_bg_cr_base.hpp | 5 +- .../gemm_aquant_pipeline_ag_bg_cr_policy.hpp | 35 ++- .../gemm_aquant_pipeline_ag_bg_cr_v3.hpp | 10 +- .../pipeline/gemm_group_quant_utils.hpp | 61 +++-- .../pipeline/tile_gemm_aquant_traits.hpp | 2 + .../test_run_gemm_aquant_example.inc | 11 +- 13 files changed, 667 insertions(+), 228 deletions(-) create mode 100644 example/ck_tile/38_block_scale_gemm/gemm_aquant_preshuffle.cpp diff --git a/example/ck_tile/38_block_scale_gemm/CMakeLists.txt b/example/ck_tile/38_block_scale_gemm/CMakeLists.txt index bdcb6f50bd..914fdac0e4 100644 --- a/example/ck_tile/38_block_scale_gemm/CMakeLists.txt +++ b/example/ck_tile/38_block_scale_gemm/CMakeLists.txt @@ -8,6 +8,9 @@ list(APPEND EXAMPLE_GEMM_COMPILE_OPTIONS -mllvm -enable-noalias-to-md-conversion if(GPU_TARGETS MATCHES "gfx94" OR GPU_TARGETS MATCHES "gfx95") add_executable(tile_example_gemm_aquant_basic EXCLUDE_FROM_ALL gemm_aquant_basic.cpp) target_compile_options(tile_example_gemm_aquant_basic PRIVATE ${EXAMPLE_GEMM_COMPILE_OPTIONS}) + + add_executable(tile_example_gemm_aquant_preshuffle EXCLUDE_FROM_ALL gemm_aquant_preshuffle.cpp) + target_compile_options(tile_example_gemm_aquant_preshuffle PRIVATE ${EXAMPLE_GEMM_COMPILE_OPTIONS}) else() message(DEBUG "Skipping ck_tile quant gemm tests for current target") endif() diff --git a/example/ck_tile/38_block_scale_gemm/gemm_aquant_basic.cpp b/example/ck_tile/38_block_scale_gemm/gemm_aquant_basic.cpp index 2667cae788..2ac08c7343 100644 --- a/example/ck_tile/38_block_scale_gemm/gemm_aquant_basic.cpp +++ b/example/ck_tile/38_block_scale_gemm/gemm_aquant_basic.cpp @@ -21,7 +21,8 @@ template + uint32_t QuantGroupSize, + bool Preshuffle = false> float gemm_calc_aquant(const ck_tile::AQuantGemmHostArgs& args, const ck_tile::stream_config& s) { constexpr bool kPadM = false; @@ -52,7 +53,7 @@ float gemm_calc_aquant(const ck_tile::AQuantGemmHostArgs& args, const ck_tile::s using TilePartitioner = ck_tile::GemmTile1DPartitioner; using CodegenGemmTraits = - ck_tile::TileGemmAQuantTraits; + ck_tile::TileGemmAQuantTraits; using GemmPipelineProblem = ck_tile::GemmPipelineProblemBase +template int run_gemm_example_prec_type(std::string a_layout, std::string b_layout, int argc, char* argv[]) { using Row = ck_tile::tensor_layout::gemm::RowMajor; @@ -156,7 +157,7 @@ int run_gemm_example_prec_type(std::string a_layout, std::string b_layout, int a { if(a_layout == "R" && b_layout == "C") { - return run_gemm_example_with_layouts( + return run_gemm_example_with_layouts( argc, argv, Row{}, Row{}, Col{}, Row{}); } else @@ -172,6 +173,7 @@ int run_gemm_example_prec_type(std::string a_layout, std::string b_layout, int a return 0; } +template