This commit is contained in:
Sami Remes
2026-01-16 08:22:11 -05:00
parent f6f9931541
commit 16ca5cb532
7 changed files with 135 additions and 6 deletions

View File

@@ -1713,8 +1713,9 @@ CK_TILE_DEVICE void amd_async_buffer_load(CK_TILE_LDS_ADDR T* smem,
ignore = src_immediate_addr_offset;
#if defined(__gfx950__)
static_assert(bytes == 4 || bytes == 12 || bytes == 16,
"wrong! only support in dword, dwordx3, dwordx4");
static_assert(bytes == 16, "wrong! not implemented vector size");
// static_assert(bytes == 4 || bytes == 12 || bytes == 16,
// "wrong! only support in dword, dwordx3, dwordx4");
src_wave_addr_offset = 0;
#else
static_assert(bytes == 4, "wrong! not implemented vector size");

View File

@@ -552,6 +552,8 @@ struct tile_window_with_static_distribution
using vector_t = typename Traits::vector_t;
using SFC_Ys = typename Traits::SFC_Ys;
// static_assert(sizeof(vector_t) == 16, "wrong! not implemented vector size");
// Precompute invariant values outside loops
const auto window_origin = lds_tile.get_window_origin();
const auto& bottom_tensor_view = lds_tile.get_bottom_tensor_view();