v3.9 update (#2203)

* v3.9 update

* voidD

---------

Co-authored-by: yuzhai <yuzhai@nvidia.com>
This commit is contained in:
Yujia Zhai
2025-04-02 12:11:18 -07:00
committed by GitHub
parent 62750a2b75
commit 6f4921858b
129 changed files with 7719 additions and 2036 deletions

View File

@@ -10025,7 +10025,8 @@ def GenerateSM120_TensorOp_fp4_UMMA_gemm_with_block_scaled(manifest, cuda_versio
tile_sizes_cooperative = [
[128, 128, 128],
[128, 128, 256]
[128, 128, 256],
[256, 128, 128]
]
tile_sizes_pingpong = [