mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-06-30 19:57:40 +00:00
print hipOccupancyDefined Grid size for best in ckProfiler
This commit is contained in:
13
include/ck/tensor_operation/gpu/device/device_base.hpp
Executable file → Normal file
13
include/ck/tensor_operation/gpu/device/device_base.hpp
Executable file → Normal file
@@ -56,17 +56,10 @@ struct BaseArgument
|
||||
virtual ~BaseArgument() {}
|
||||
|
||||
void* p_workspace_ = nullptr;
|
||||
|
||||
virtual dim3 GetLaunchGridDims() const
|
||||
{
|
||||
return dim3{0, 0, 0};
|
||||
}
|
||||
|
||||
virtual bool HasLaunchGridDims() const
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
virtual dim3 GetLaunchGridDims() const { return dim3{0, 0, 0}; }
|
||||
|
||||
virtual bool HasLaunchGridDims() const { return false; }
|
||||
};
|
||||
|
||||
struct BaseInvoker
|
||||
|
||||
17
include/ck/tensor_operation/gpu/device/impl/device_gemm_xdl_cshuffle_streamk_v3.hpp
Executable file → Normal file
17
include/ck/tensor_operation/gpu/device/impl/device_gemm_xdl_cshuffle_streamk_v3.hpp
Executable file → Normal file
@@ -168,17 +168,18 @@ struct DeviceGemm_Xdl_CShuffle_Streamk_V3 : public DeviceGemm_Streamk_V2<ALayout
|
||||
hip_check_error(hipGetDevice(&dev));
|
||||
hip_check_error(hipGetDeviceProperties(&dev_prop, dev));
|
||||
num_cu = dev_prop.multiProcessorCount;
|
||||
// arg.Grid_size = num_cu * occupancy;
|
||||
arg.Grid_size = num_cu * occupancy;
|
||||
// grid_dim = arg.Grid_size;
|
||||
grid_dim.x = num_cu * occupancy; // Set the x-dimension
|
||||
|
||||
// TODO: Set grid_dim.y and grid_dim.z appropriately if they are not 1.
|
||||
// This often comes from the block_2_ctile_map.CalculateGridSize(...)
|
||||
// For now, assuming they might be 1 or derived from block_2_ctile_map elsewhere if needed.
|
||||
// If block_2_ctile_map.CalculateGridSize gives (N0, M0, k_split), then
|
||||
// grid_dim might be (N0, M0, k_split) or (total_blocks, 1, 1)
|
||||
// The current code sets grid_dim = arg.Grid_size (if positive) or occupancy-based (if negative)
|
||||
// which implies a 1D grid of blocks. We'll stick to that interpretation for grid_dim.x
|
||||
// For now, assuming they might be 1 or derived from block_2_ctile_map elsewhere
|
||||
// if needed. If block_2_ctile_map.CalculateGridSize gives (N0, M0, k_split),
|
||||
// then grid_dim might be (N0, M0, k_split) or (total_blocks, 1, 1) The current
|
||||
// code sets grid_dim = arg.Grid_size (if positive) or occupancy-based (if
|
||||
// negative) which implies a 1D grid of blocks. We'll stick to that
|
||||
// interpretation for grid_dim.x
|
||||
grid_dim.y = 1;
|
||||
grid_dim.z = 1;
|
||||
}
|
||||
@@ -190,7 +191,7 @@ struct DeviceGemm_Xdl_CShuffle_Streamk_V3 : public DeviceGemm_Streamk_V2<ALayout
|
||||
grid_dim.y = 1;
|
||||
grid_dim.z = 1;
|
||||
}
|
||||
|
||||
|
||||
arg.SetLaunchGridDims(grid_dim); // Store the determined launch grid dimensions
|
||||
|
||||
if(stream_config.flush_cache)
|
||||
@@ -819,7 +820,7 @@ struct DeviceGemm_Xdl_CShuffle_Streamk_V3 : public DeviceGemm_Streamk_V2<ALayout
|
||||
<< BlkGemmPipelineVersionToString[BlkGemmPipelineVer] << ", "
|
||||
<< "BlkGemmPipelinePrefetchStages: "
|
||||
<< GridwiseGemm::BlockwiseGemmPipe::PrefetchStages ;
|
||||
|
||||
|
||||
// clang-format on
|
||||
|
||||
return str.str();
|
||||
|
||||
15
include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdl_cshuffle_streamk_v3.hpp
Executable file → Normal file
15
include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdl_cshuffle_streamk_v3.hpp
Executable file → Normal file
@@ -631,19 +631,10 @@ struct GridwiseGemm_xdl_cshuffle_streamk_v3
|
||||
|
||||
mutable dim3 launch_grid_dims_;
|
||||
|
||||
void SetLaunchGridDims(dim3 dims) const
|
||||
{
|
||||
launch_grid_dims_ = dims;
|
||||
}
|
||||
void SetLaunchGridDims(dim3 dims) const { launch_grid_dims_ = dims; }
|
||||
|
||||
dim3 GetLaunchGridDims() const override
|
||||
{
|
||||
return launch_grid_dims_;
|
||||
}
|
||||
bool HasLaunchGridDims() const override
|
||||
{
|
||||
return launch_grid_dims_.x > 0;
|
||||
}
|
||||
dim3 GetLaunchGridDims() const override { return launch_grid_dims_; }
|
||||
bool HasLaunchGridDims() const override { return launch_grid_dims_.x > 0; }
|
||||
};
|
||||
|
||||
struct SplitKBatchOffset
|
||||
|
||||
Reference in New Issue
Block a user