This commit is contained in:
zhimding
2025-11-28 08:07:30 +00:00
parent e31a7a4f29
commit bb3d2f5be6
3 changed files with 10 additions and 10 deletions

View File

@@ -125,7 +125,7 @@ struct tile_scatter_gather
static constexpr auto get_space_filling_curve()
{
constexpr auto tile_dstr = TileDstr{};
[[maybe_unused]] constexpr auto tile_dstr = TileDstr{};
constexpr auto thread_tensor_lengths_ys =
to_sequence(tile_dstr.get_ys_to_d_descriptor().get_lengths());
@@ -309,7 +309,7 @@ struct tile_scatter_gather
CK_TILE_DEVICE auto load(number<i_access_unsupport_> = {},
bool_constant<oob_conditional_check> = {}) const
{
constexpr auto tile_dstr = TileDstr{};
[[maybe_unused]] constexpr auto tile_dstr = TileDstr{};
auto dst_tensor = make_static_distributed_tensor<DataType>(tile_dstr);
load(dst_tensor, number<i_access_unsupport_>{}, bool_constant<oob_conditional_check>{});
return dst_tensor;
@@ -326,7 +326,7 @@ struct tile_scatter_gather
using vector_t = typename Traits::vector_t;
using SFC_Ys = typename Traits::SFC_Ys;
constexpr auto tile_dstr = TileDstr{};
[[maybe_unused]] constexpr auto tile_dstr = TileDstr{};
// loop over thread tensor space [y0, y1, ...]
static_for<0, NumCoord, 1>{}([&](auto iCoord) {
@@ -418,7 +418,7 @@ struct tile_scatter_gather
using vector_t = typename Traits::vector_t;
using SFC_Ys = typename Traits::SFC_Ys;
constexpr auto tile_dstr = TileDstr{};
[[maybe_unused]] constexpr auto tile_dstr = TileDstr{};
// Precompute invariant values outside loops
const auto window_origin = lds_tile.get_window_origin();
@@ -614,7 +614,7 @@ struct tile_scatter_gather
using vector_t = typename Traits::vector_t;
using SFC_Ys = typename Traits::SFC_Ys;
constexpr auto tile_dstr = TileDstr{};
[[maybe_unused]] constexpr auto tile_dstr = TileDstr{};
static_for<0, NumCoord, 1>{}([&](auto iCoord) {
auto window_adaptor_thread_coord = pre_computed_coords_[iCoord][I0];
@@ -696,7 +696,7 @@ struct tile_scatter_gather
using vector_t = typename Traits::vector_t;
using SFC_Ys = typename Traits::SFC_Ys;
constexpr auto tile_dstr = TileDstr{};
[[maybe_unused]] constexpr auto tile_dstr = TileDstr{};
// printf("off %d\n", page_idx_[I0]);
// loop over thread tensor space [y0, y1, ...]
static_for<0, NumCoord, 1>{}([&](auto iCoord) {

View File

@@ -444,7 +444,7 @@ struct F16xMXF4FlatmmPipelineAGmemBGmemCRegV1
typename BFlatBlockWindowTmp,
typename DequantBFlatWindow>
CK_TILE_HOST_DEVICE auto operator()(ADramBlockWindowTmp a_copy_dram_window_,
const AElementFunction& a_element_func,
[[maybe_unused]] const AElementFunction& a_element_func,
const BFlatBlockWindowTmp& b_flat_dram_block_window_tmp,
const DequantBFlatWindow& scale_b_flat_window,
const index_t num_loop,
@@ -606,7 +606,7 @@ struct F16xMXF4FlatmmPipelineAGmemBGmemCRegV1
scale_b_warp_tensor_pong;
using ABlockTile = decltype(load_tile(a_copy_dram_window));
ABlockTile a_block_tile;
[[maybe_unused]] ABlockTile a_block_tile;
enum
{
@@ -621,7 +621,7 @@ struct F16xMXF4FlatmmPipelineAGmemBGmemCRegV1
if constexpr(prefill_location & PrefillAfterGemm)
async_load_tile(lds_tile_a, dram_tile_a);
};
auto prefill_lds_a_stage2 = [&](auto lds_tile_a) {
auto prefill_lds_a_stage2 = [&]([[maybe_unused]] auto lds_tile_a) {
// async_load_fence();
// __builtin_amdgcn_s_waitcnt(0x03fc);
// data has been stored in lds, no need more operation.

View File

@@ -7,7 +7,7 @@
namespace ck_tile {
#define CKTILE_FLATMM_USE_BUFFER_LOAD_LDS_AS_POSSIBLE 0
#define CKTILE_FLATMM_USE_BUFFER_LOAD_LDS_AS_POSSIBLE 1
#if defined(__gfx950__)
#define CKTILE_FLATMM_ARCH_SUPPORT_BUFFER_LOAD_LDS_DWORDx4 1