[rocm-libraries] ROCm/rocm-libraries#4357 (commit ff3e982)

[CK_TILE] Add support and tests for V6 pipeline in conv fwd
 (#4357)

Added support for conv v6 pipeline in ck tile's convolution forward
kernel. CK Tile v6 pipeline is the equivalent to old ck's V5 pipeline
and should be faster than other pipelines for some cases. This PR also
adds tests inside profiler that's currently inside experimental
directory, so now we should be able to detect regressions easier.
This commit is contained in:
jakpiase
2026-02-08 19:57:53 +00:00
committed by assistant-librarian[bot]
parent 57d26db844
commit 731afe535a
13 changed files with 97 additions and 42 deletions

View File

@@ -763,6 +763,32 @@ struct GemmPipelineAgBgCrCompV6 : public BaseGemmPipelineAgBgCrCompV6<Problem>
return Base::TailHandler(RunPipeline, has_hot_loop, tail_number);
}
template <typename ADramBlockWindowTmp,
typename BDramBlockWindowTmp,
typename std::enable_if_t<!is_detected<is_tuple, ADramBlockWindowTmp>::value &&
!is_detected<is_tuple, BDramBlockWindowTmp>::value,
bool>* = nullptr>
CK_TILE_DEVICE auto operator()(const ADramBlockWindowTmp& a_dram_block_window_tmp,
const BDramBlockWindowTmp& b_dram_block_window_tmp,
const index_t num_loop,
void* __restrict__ p_smem) const
{
const bool has_hot_loop = Base::BlockHasHotloop(num_loop);
const auto tail_number = Base::GetBlockLoopTailNum(num_loop);
const auto RunPipeline = [&](auto hot_loop_, auto tail_num_) {
return PipelineImpl<Scheduler>{}.template operator()<hot_loop_.value, tail_num_.value>(
ck_tile::make_tuple(a_dram_block_window_tmp),
[](auto& e, const ADataType& a) { e = a; },
ck_tile::make_tuple(b_dram_block_window_tmp),
[](auto& e, const BDataType& b) { e = b; },
num_loop,
p_smem);
};
return Base::TailHandler(RunPipeline, has_hot_loop, tail_number);
}
template <typename ADramBlockWindowTmp,
typename BDramBlockWindowTmp,
typename AElementFunction,