diff --git a/include/ck_tile/core/tensor/load_tile.hpp b/include/ck_tile/core/tensor/load_tile.hpp index 4cdad8bb9f..fb751fea89 100644 --- a/include/ck_tile/core/tensor/load_tile.hpp +++ b/include/ck_tile/core/tensor/load_tile.hpp @@ -164,6 +164,7 @@ CK_TILE_DEVICE void async_load_tile_with_offset(LdsTileWindow_&& lds_tile, // gfx12 does not support the direct global-to-LDS async buffer load used by // tile_window::async_load*. Keep the architecture choice at the tile-load layer by using a // regular buffer load followed by an explicit LDS store instead of hiding it in the primitive. + static_cast(smy); static_assert(!static_move_ys, "gfx12 synchronous async_load_tile fallback does not support static_move_ys"); auto tile = load_tile_with_offset(tile_window, offset, number{}, occ); diff --git a/include/ck_tile/core/tensor/tile_window.hpp b/include/ck_tile/core/tensor/tile_window.hpp index 33d21737cb..bf0e4fc25d 100644 --- a/include/ck_tile/core/tensor/tile_window.hpp +++ b/include/ck_tile/core/tensor/tile_window.hpp @@ -495,7 +495,9 @@ struct tile_window_with_static_distribution { using LdsTileWindow = remove_cvref_t; // using LdsTensorView = typename LdsTileWindow::BottomTensorView; +#if !defined(__gfx12__) using LdsDataType = typename LdsTileWindow::DataType; +#endif // using LdsDescriptor = typename LdsTileWindow::BottomTensorDesc; // issues * warps * lanes diff --git a/include/ck_tile/core/tensor/tile_window_linear.hpp b/include/ck_tile/core/tensor/tile_window_linear.hpp index 905be46966..54fb3ca1d9 100644 --- a/include/ck_tile/core/tensor/tile_window_linear.hpp +++ b/include/ck_tile/core/tensor/tile_window_linear.hpp @@ -488,7 +488,9 @@ struct tile_window_linear bool_constant = {}) const { using LdsTileWindow = remove_cvref_t; +#if !defined(__gfx12__) using LdsDataType = typename LdsTileWindow::DataType; +#endif // currently we only support everything is non linear dim // actually it's not performant if we have linear dim(e.g. fast changing)