mirror of
https://github.com/NVIDIA/cutlass.git
synced 2026-05-12 01:10:08 +00:00
Small Tile N BlockScaled GEMM + Grouped GEMM (#3176)
Co-authored-by: dePaul Miller <23461061+depaulmillz@users.noreply.github.com>
This commit is contained in:
@@ -11192,6 +11192,8 @@ def GenerateSM120_TensorOp_mixed_8bits_UMMA_gemm_with_block_scaled(manifest, cud
|
||||
]
|
||||
|
||||
tile_sizes = [
|
||||
[128, 32, 128],
|
||||
[128, 64, 128],
|
||||
[128, 128, 128]
|
||||
]
|
||||
|
||||
@@ -11324,12 +11326,20 @@ def GenerateSM120_TensorOp_fp4_UMMA_gemm_with_block_scaled(manifest, cuda_versio
|
||||
]
|
||||
|
||||
tile_sizes_cooperative = [
|
||||
[128, 32, 128],
|
||||
[128, 32, 256],
|
||||
[128, 64, 128],
|
||||
[128, 64, 256],
|
||||
[128, 128, 128],
|
||||
[128, 128, 256],
|
||||
[256, 128, 128]
|
||||
]
|
||||
|
||||
tile_sizes_pingpong = [
|
||||
[128, 32, 128],
|
||||
[128, 32, 256],
|
||||
[128, 64, 128],
|
||||
[128, 64, 256],
|
||||
[128, 128, 128],
|
||||
[128, 128, 256]
|
||||
]
|
||||
|
||||
Reference in New Issue
Block a user