Ck tile/gemm blockscale opt (#3227)

* GEMM block scale optimization kernel

* GEMM block scale optimization kernel

* Fix: Apply clang-format for style consistency

* Fix: Apply clang-format for style consistency

---------

Co-authored-by: Thomas Ning <Thomas.Ning@amd.com>
This commit is contained in:
kensclin
2025-12-04 14:07:23 +08:00
committed by GitHub
parent eb7f617713
commit ffc3120f63
2 changed files with 75 additions and 24 deletions

View File

@@ -211,6 +211,7 @@ struct GemmConfigPreshuffleB_BQuant_Prefill : public GemmConfigBase
static constexpr int N_Repeat = N_Tile / N_Warp_Tile / N_Warp;
static constexpr bool TiledMMAPermuteN = N_Repeat % 2 == 0;
static constexpr int kBlockPerCu = 2;
};
template <typename PrecType>