Update device_gemm_xdl_cshuffle_streamk_v3.hpp

This commit is contained in:
Muhammed Emin Ozturk
2025-06-22 16:43:26 -07:00
committed by GitHub
parent 256aec203e
commit 17dcb200e6

View File

@@ -169,25 +169,14 @@ struct DeviceGemm_Xdl_CShuffle_Streamk_V3 : public DeviceGemm_Streamk_V2<ALayout
hip_check_error(hipGetDeviceProperties(&dev_prop, dev));
num_cu = dev_prop.multiProcessorCount;
arg.Grid_size = num_cu * occupancy;
// grid_dim = arg.Grid_size;
grid_dim.x = num_cu * occupancy; // Set the x-dimension
// TODO: Set grid_dim.y and grid_dim.z appropriately if they are not 1.
// This often comes from the block_2_ctile_map.CalculateGridSize(...)
// For now, assuming they might be 1 or derived from block_2_ctile_map elsewhere
// if needed. If block_2_ctile_map.CalculateGridSize gives (N0, M0, k_split),
// then grid_dim might be (N0, M0, k_split) or (total_blocks, 1, 1) The current
// code sets grid_dim = arg.Grid_size (if positive) or occupancy-based (if
// negative) which implies a 1D grid of blocks. We'll stick to that
// interpretation for grid_dim.x
grid_dim.y = 1;
grid_dim.z = 1;
}
else
{
// grid_dim = arg.Grid_size;
grid_dim.x = arg.Grid_size;
// TODO: As above, confirm y and z dimensions.
grid_dim.y = 1;
grid_dim.z = 1;
}