mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-05 14:11:29 +00:00
WIP
This commit is contained in:
@@ -1713,8 +1713,9 @@ CK_TILE_DEVICE void amd_async_buffer_load(CK_TILE_LDS_ADDR T* smem,
|
||||
ignore = src_immediate_addr_offset;
|
||||
|
||||
#if defined(__gfx950__)
|
||||
static_assert(bytes == 4 || bytes == 12 || bytes == 16,
|
||||
"wrong! only support in dword, dwordx3, dwordx4");
|
||||
static_assert(bytes == 16, "wrong! not implemented vector size");
|
||||
// static_assert(bytes == 4 || bytes == 12 || bytes == 16,
|
||||
// "wrong! only support in dword, dwordx3, dwordx4");
|
||||
src_wave_addr_offset = 0;
|
||||
#else
|
||||
static_assert(bytes == 4, "wrong! not implemented vector size");
|
||||
|
||||
@@ -552,6 +552,8 @@ struct tile_window_with_static_distribution
|
||||
using vector_t = typename Traits::vector_t;
|
||||
using SFC_Ys = typename Traits::SFC_Ys;
|
||||
|
||||
// static_assert(sizeof(vector_t) == 16, "wrong! not implemented vector size");
|
||||
|
||||
// Precompute invariant values outside loops
|
||||
const auto window_origin = lds_tile.get_window_origin();
|
||||
const auto& bottom_tensor_view = lds_tile.get_bottom_tensor_view();
|
||||
|
||||
Reference in New Issue
Block a user