[rocm-libraries] ROCm/rocm-libraries#4268 (commit d2fca53)

[CK_TILE]: PreshuffleB + PreshuffleBQuant for ABQuant
 pipeline (#4268)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

## Proposed changes

Implement BQuantPreshuffle option for the ABQuant PreshuffleB pipeline.

## Checklist

Please put an `x` into the boxes that apply. You can also fill these out
after creating the PR. If you're not sure, please don't hesitate to ask.

- [X] I have added tests relevant to the introduced functionality, and
the unit tests are passing locally
- [X] I have added the test to REGRESSION_TESTS list defined at the top
of CMakeLists.txt in tests/CMakeLists.txt, **IF** the test takes more
than 30 seconds to run.
- [X] I have added inline documentation which enables the maintainers
with understanding the motivation
- [X] I have removed the stale documentation which is no longer relevant
after this pull request
- [ ] (If this change is user-facing) I have added release notes which
provide the end users with a brief summary of the improvement from this
pull request
- [X] I have run `clang-format` on all changed files
- [X] Any dependent changes have been merged
This commit is contained in:
Erwin Terpstra
2026-02-10 13:59:03 +00:00
committed by assistant-librarian[bot]
parent d5acfd8d52
commit b41bfece83
7 changed files with 128 additions and 7 deletions

View File

@@ -15,6 +15,7 @@ if(GPU_TARGETS MATCHES "gfx94|gfx95|gfx12")
add_executable(${EXE_NAME}
gemm_quant.cpp
gemm_abquant_quantgrouped.cpp
gemm_abquant_quantgrouped_preshuffleb_preshufflequant.cpp
gemm_aquant_quantgrouped.cpp
gemm_aquant_quantgrouped_preshufflequant.cpp
gemm_bquant_quantgrouped_bf8i4.cpp

View File

@@ -0,0 +1,44 @@
// Copyright (c) Advanced Micro Devices, Inc., or its affiliates.
// SPDX-License-Identifier: MIT
#include "38_block_scale_gemm/gemm_utils.hpp"
#include "run_gemm_quant_example.inc"
template <typename T>
using GemmConfigPreshuffleB_PreshuffleBQuant =
GemmConfigPreshuffleB_ABQuant_PreshuffleBQuant_Prefill<T>;
static auto _ = []() {
auto& lut = get_kernel_lut();
lut[hash_multiple_strings({"fp8",
"abquant",
"preshuffleb",
"preshufflequant",
"1x1x128"})] = [](const ck_tile::ArgParser& arg_parser) {
using AQuantGroupSize = ck_tile::QuantGroupShape<ck_tile::sequence<1, 1, 128>>;
using BQuantGroupSize = ck_tile::QuantGroupShape<ck_tile::sequence<1, 1, 128>>;
using TypeConfig =
decltype(GemmQuantTypeConfig<ck_tile::fp8_t, ck_tile::fp8_t, ck_tile::half_t, float>{});
return run_gemm_example_prec_type<GemmConfigPreshuffleB_PreshuffleBQuant<ck_tile::fp8_t>,
TypeConfig,
AQuantGroupSize,
BQuantGroupSize,
ck_tile::QuantType::ABQuantGrouped>(arg_parser);
};
lut[hash_multiple_strings({"fp8",
"abquant",
"preshuffleb",
"preshufflequant",
"1x128x128"})] = [](const ck_tile::ArgParser& arg_parser) {
using AQuantGroupSize = ck_tile::QuantGroupShape<ck_tile::sequence<1, 1, 128>>;
using BQuantGroupSize = ck_tile::QuantGroupShape<ck_tile::sequence<1, 128, 128>>;
using TypeConfig =
decltype(GemmQuantTypeConfig<ck_tile::fp8_t, ck_tile::fp8_t, ck_tile::half_t, float>{});
return run_gemm_example_prec_type<GemmConfigPreshuffleB_PreshuffleBQuant<ck_tile::fp8_t>,
TypeConfig,
AQuantGroupSize,
BQuantGroupSize,
ck_tile::QuantType::ABQuantGrouped>(arg_parser);
};
return 0;
}();

View File

@@ -234,6 +234,13 @@ struct GemmConfigPreshuffleB_ABQuant_Prefill : public GemmConfigPreshuffleB_BQua
static constexpr bool TransposeC = true;
};
template <typename PrecType>
struct GemmConfigPreshuffleB_ABQuant_PreshuffleBQuant_Prefill
: public GemmConfigPreshuffleB_ABQuant_Prefill<PrecType>
{
static constexpr bool BPreshuffleQuant = true;
};
template <typename PrecType>
struct GemmConfigPreshuffleB_ABQuant_Decode : public GemmConfigPreshuffleB_BQuant_Prefill<PrecType>
{