[CK-Tile] Add the API to load SGPR (#2878)

* Have a workable version for SGPR

* have a workable version for atomic add

* Revert "have a workable version for atomic add"

This reverts commit 792377a590c26cfff9c8f545d9a9e8484a7422eb.

* substitute with the new sgpr read api

* update the CHANGELOG

* have a workable version for atomic add

* Revert "have a workable version for atomic add"

This reverts commit 792377a590c26cfff9c8f545d9a9e8484a7422eb.

* change to static for logic

* have a workable version for atomic add

* Revert "have a workable version for atomic add"

This reverts commit 792377a590c26cfff9c8f545d9a9e8484a7422eb.
This commit is contained in:
Thomas Ning
2025-09-23 01:23:56 -07:00
committed by GitHub
parent b6e8994386
commit 2cbbf5dcb3
40 changed files with 273 additions and 167 deletions

View File

@@ -175,9 +175,9 @@ struct ImageToColumn
{
const auto [M, K] = CalculateMKDims(kargs);
const index_t iM = __builtin_amdgcn_readfirstlane(blockIdx.x * kMPerBlock);
const index_t iK = __builtin_amdgcn_readfirstlane(blockIdx.y * kKPerBlock);
const index_t iBatch = __builtin_amdgcn_readfirstlane(blockIdx.z);
const index_t iM = amd_wave_read_first_lane(blockIdx.x * kMPerBlock);
const index_t iK = amd_wave_read_first_lane(blockIdx.y * kKPerBlock);
const index_t iBatch = amd_wave_read_first_lane(blockIdx.z);
const auto in_offset = iBatch * kargs.image_g_n_c_wis_strides[I0];
const auto out_offset = iBatch * kargs.gemm_g_m_k_strides[I0];