mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-04-20 06:49:15 +00:00
[rocm-libraries] ROCm/rocm-libraries#4791 (commit 6cc17c6)
[CK][CK TILE] Improve oob check ## Motivation Improve OOB checks. Remove permutes which have been generated by thread buffer zero clear. at now in assembly there is only condmask instead of permute + condmask. Change number of KPack for generated instances ## Technical Details Remove permute instructions from assembly ## Test Plan test_grouped_convnd_fwd_tile ## Test Result passed ## Submission Checklist - [x] Look over the contributing guidelines at https://github.com/ROCm/ROCm/blob/develop/CONTRIBUTING.md#pull-requests.
This commit is contained in:
committed by
assistant-librarian[bot]
parent
f3f4d7d842
commit
1a2c0d835a
@@ -96,7 +96,7 @@ int call_profiler(const ckt::Args<SIGNATURE>& args, bool time_kernel)
|
||||
std::string op_name;
|
||||
bool valid;
|
||||
std::tie(valid, avg_time, op_name) = ckp::run_grouped_conv_forward_tile_algs(
|
||||
args, inputs.get(), outputs.get(), ck_tile::stream_config{nullptr, time_kernel});
|
||||
args, inputs.get(), outputs.get(), ck_tile::stream_config{nullptr, time_kernel, 0, 5, 50});
|
||||
if(time_kernel)
|
||||
{
|
||||
std::cout << "Best configuration parameters:" << "\nname: " << op_name
|
||||
|
||||
Reference in New Issue
Block a user