[CK_TILE] Refine Generic2dBlockShape to fix ck_tile example 2,10,11,14 on rdna3 and 4 (#2795)

BlockWarps, WarpTile in Generic2dBlockShape are wave size dependent, it causes mangled name mismatch between host and device side.

Solution: Replace them with ThreadPerBlock and move BlockWarps, WarpTile calculation into Generic2dBlockShape
This commit is contained in:
linqunAMD
2025-09-10 08:29:20 +08:00
committed by GitHub
parent df4ee556d6
commit c254f3d7b4
14 changed files with 103 additions and 453 deletions

View File

@@ -94,12 +94,11 @@ bool run(const ck_tile::ArgParser& arg_parser)
constexpr bool kTwoPass = true;
using BlockWarps = ck_tile::sequence<2, 2>;
using BlockTile = ck_tile::sequence<2, 128>;
using WarpTile = ck_tile::sequence<1, 64>;
using Vector = ck_tile::sequence<1, 1>;
using BlockTile = ck_tile::sequence<2, 128>;
using Vector = ck_tile::sequence<1, 1>;
using ThreadPerBlock = ck_tile::sequence<2, 128>;
using Shape = ck_tile::Generic2dBlockShape<BlockTile, BlockWarps, WarpTile, Vector>;
using Shape = ck_tile::Generic2dBlockShape<BlockTile, ThreadPerBlock, Vector>;
using Problem = ck_tile::SmoothquantPipelineProblem<XDataType,
SmoothScaleDataType,
ComputeDataType,