Small fixes.

This commit is contained in:
Ville Pietilä
2025-07-11 07:21:11 +00:00
parent 727d578ca9
commit e19f337b9a
2 changed files with 4 additions and 1 deletions

View File

@@ -306,6 +306,9 @@ struct DeviceGroupedConvBwdWeight_Explicit_Xdl
if (arg.split_k_ < 0)
{
// TODO: Add split-K autodeduction.
// This will probably require adding interface to the GEMM operation for
// querying the optimal split-K value, as we cannot easily access the actual GEMM kernel
// from here.
return false;
}

View File

@@ -655,7 +655,7 @@ struct DeviceGroupedConvBwdWeightTwoStage_Xdl_CShuffle
const auto gemmM = a_grid_desc_kbatch_k0_m_k1.GetLength(I1);
const auto gemmN = b_grid_desc_kbatch_k0_n_k1.GetLength(I1);
const auto grid_size = GridwiseGemm::Block2CTileMap::CalculateGridSize(gemmM, gemmN) * Conv_G_;
const auto grid_size = GridwiseGemm::Block2CTileMap::CalculateGridSize(gemmM, gemmN) * Conv_G_ / NumGroupsToMerge;
k_batch_ = get_best_occupancy_k_batch_value(max_occupancy.value_, grid_size);
// Ensure that k_batch_ does not exceed the maximum value