[CK-Tile] Add the API to load SGPR (#2878)

* Have a workable version for SGPR

* have a workable version for atomic add

* Revert "have a workable version for atomic add"

This reverts commit 792377a590c26cfff9c8f545d9a9e8484a7422eb.

* substitute with the new sgpr read api

* update the CHANGELOG

* have a workable version for atomic add

* Revert "have a workable version for atomic add"

This reverts commit 792377a590c26cfff9c8f545d9a9e8484a7422eb.

* change to static for logic

* have a workable version for atomic add

* Revert "have a workable version for atomic add"

This reverts commit 792377a590c26cfff9c8f545d9a9e8484a7422eb.
This commit is contained in:
Thomas Ning
2025-09-23 01:23:56 -07:00
committed by GitHub
parent b6e8994386
commit 2cbbf5dcb3
40 changed files with 273 additions and 167 deletions

View File

@@ -84,9 +84,9 @@ struct BatchedTransposeKernel
static constexpr ck_tile::index_t VectorSizeOutput = Problem::VectorSizeOutput;
static constexpr ck_tile::index_t VectorStrideOutput = 1;
const auto iM = __builtin_amdgcn_readfirstlane(blockIdx.x * kMPerBlock);
const auto iN = __builtin_amdgcn_readfirstlane(blockIdx.y * kNPerBlock);
const auto offset = __builtin_amdgcn_readfirstlane(blockIdx.z * kargs.height * kargs.width);
const auto iM = amd_wave_read_first_lane(blockIdx.x * kMPerBlock);
const auto iN = amd_wave_read_first_lane(blockIdx.y * kNPerBlock);
const auto offset = amd_wave_read_first_lane(blockIdx.z * kargs.height * kargs.width);
const auto x_m_n = [&]() {
const auto x_dram_naive = make_naive_tensor_view<address_space_enum::global>(