mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-04-20 06:49:15 +00:00
[rocm-libraries] ROCm/rocm-libraries#4791 (commit 6cc17c6)
[CK][CK TILE] Improve oob check ## Motivation Improve OOB checks. Remove permutes which have been generated by thread buffer zero clear. at now in assembly there is only condmask instead of permute + condmask. Change number of KPack for generated instances ## Technical Details Remove permute instructions from assembly ## Test Plan test_grouped_convnd_fwd_tile ## Test Result passed ## Submission Checklist - [x] Look over the contributing guidelines at https://github.com/ROCm/ROCm/blob/develop/CONTRIBUTING.md#pull-requests.
This commit is contained in:
committed by
assistant-librarian[bot]
parent
f3f4d7d842
commit
1a2c0d835a
@@ -197,7 +197,21 @@ def parse_fwd_instances(instances, problem_name):
|
||||
dtype = get_dtype(problem_name)
|
||||
# TODO: Make it more flexible
|
||||
# k_per_xdl = f"ck_tile::get_k_warp_tile<{dtype}, {m_per_xdl}>()"
|
||||
k_per_xdl = 8 if dtype == "float" else 16
|
||||
if dtype == "float":
|
||||
if m_per_xdl == 32:
|
||||
if instance.find("BlkGemmPipelineVersion") == -1:
|
||||
k_per_xdl = 4
|
||||
else:
|
||||
# Increase for universal gemm
|
||||
k_per_xdl = 8
|
||||
else:
|
||||
k_per_xdl = 8
|
||||
else:
|
||||
if m_per_xdl == 32:
|
||||
k_per_xdl = 16
|
||||
else:
|
||||
k_per_xdl = 32
|
||||
k_per_xdl = min(k_per_xdl, k_per_block)
|
||||
|
||||
conv = ConvInstanceTemplateParams(
|
||||
spec,
|
||||
|
||||
Reference in New Issue
Block a user