mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-07-01 04:07:56 +00:00
Small fixes.
This commit is contained in:
@@ -306,6 +306,9 @@ struct DeviceGroupedConvBwdWeight_Explicit_Xdl
|
||||
if (arg.split_k_ < 0)
|
||||
{
|
||||
// TODO: Add split-K autodeduction.
|
||||
// This will probably require adding interface to the GEMM operation for
|
||||
// querying the optimal split-K value, as we cannot easily access the actual GEMM kernel
|
||||
// from here.
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
@@ -655,7 +655,7 @@ struct DeviceGroupedConvBwdWeightTwoStage_Xdl_CShuffle
|
||||
const auto gemmM = a_grid_desc_kbatch_k0_m_k1.GetLength(I1);
|
||||
const auto gemmN = b_grid_desc_kbatch_k0_n_k1.GetLength(I1);
|
||||
|
||||
const auto grid_size = GridwiseGemm::Block2CTileMap::CalculateGridSize(gemmM, gemmN) * Conv_G_;
|
||||
const auto grid_size = GridwiseGemm::Block2CTileMap::CalculateGridSize(gemmM, gemmN) * Conv_G_ / NumGroupsToMerge;
|
||||
k_batch_ = get_best_occupancy_k_batch_value(max_occupancy.value_, grid_size);
|
||||
|
||||
// Ensure that k_batch_ does not exceed the maximum value
|
||||
|
||||
Reference in New Issue
Block a user