update 3.8 v2 (#2112)

* update 3.8 v2

* update 3.8

---------

Co-authored-by: yuzhai <yuzhai@nvidia.com>
This commit is contained in:
Yujia Zhai
2025-02-19 19:03:14 -08:00
committed by GitHub
parent e9627ce55b
commit b84e9802d8
166 changed files with 3986 additions and 4037 deletions

View File

@@ -489,7 +489,7 @@ def get_valid_schedules(tile_description, cuda_version, is_aligned, data_types,
if is_fp32 and (is_tn or is_nn) and (cta_n % cta_k != 0):
return [], []
grouped = gemm_kind == GemmKind.GroupedGemmUniversal3x
grouped = is_grouped(gemm_kind)
if grouped:
# the following cases are unsupported by grouped GEMM
if not is_aligned: