Ck tile engine gemm unit tests exapand test coverage (#3025)

* initial commit for testing datatypes, layouts and traits

* correct warp tile size for small datatype config to make a validate instance for fp16, bf16, fp8

* add tile size coverage test

* Cover more tests, parallel instance generation, documentation

* update cmakelist to run more tests

* initial codes to support add test params in json file

* add congurable  problem sizes for different tests

* modify README.md

* clean test_gemm_simple code

* correct padding coverage test

* Add comprehensive and quick tile size config files

* remove fp64 from datatypes

* update documents. manage selecting tile_size config (quick or Comprehensive)

* correct padding test problem sizes

* update comprehensive test and correct documents

* Skip GEMM tests with unsupported arguments instead of failing

* change gen_single instead of gen_indivisual because of an issue. add splitk tests to tile_size_quick_config

* clean CMakeList, remod py file

* Refactor test configs: Rename tile_size to coverage, remove separate traits config,  clean cmakefile, readme

* update fp32, fp8 to test all layouts, clean documents and comments

* limit fp32 test layouts to rcr because of compilation error on some gpus

* remove fp32 because of the removing from gemm_instance_builder, make quick test smaller, updating comments

* Fix fp8/bf8 test failures on gfx950 by adding OCP FP8 format support

* Reduce quick_coverage test count from ~250 to ~144 for faster CI
This commit is contained in:
msaffari-amd
2025-11-03 10:29:16 +01:00
committed by GitHub
parent 3ae3992c18
commit d405641f06
11 changed files with 545 additions and 173 deletions

View File

@@ -0,0 +1,37 @@
{
"problem": {
"description": "Comprehensive coverage testing - extensive tile size coverage (16-256, step 16) with multiple warp configurations and all trait combinations. Several thousand kernels."
},
"test_params": {
"problem_sizes": [
{"m": 512, "n": 512, "k": 256, "split_k": 1},
{"m": 1024, "n": 512, "k": 512, "split_k": 1},
{"m": 512, "n": 1024, "k": 512, "split_k": 1},
{"m": 1024, "n": 1024, "k": 256, "split_k": 1},
{"m": 1024, "n": 1024, "k": 256, "split_k": 2},
{"m": 1024, "n": 1024, "k": 256, "split_k": 4}
]
},
"tile_config": {
"tile_m": {"values": [16, 32, 48, 64, 80, 96, 112, 128, 144, 160, 176, 192, 208, 224, 240, 256]},
"tile_n": {"values": [16, 32, 48, 64, 80, 96, 112, 128, 144, 160, 176, 192, 208, 224, 240, 256]},
"tile_k": {"values": [16, 32, 64]},
"warp_m": {"values": [1, 2, 4]},
"warp_n": {"values": [1, 2, 4]},
"warp_k": {"values": [1]},
"warp_tile_m": {"values": [16, 32]},
"warp_tile_n": {"values": [16, 32]},
"warp_tile_k": {"values": [8, 16, 32, 64, 128]}
},
"trait_config": {
"pipeline": {"values": ["mem", "compv3", "compv4"]},
"epilogue": {"values": ["default", "cshuffle"]},
"scheduler": {"values": ["intrawave", "interwave"]},
"pad_m": {"values": [false]},
"pad_n": {"values": [false]},
"pad_k": {"values": [false]},
"persistent": {"values": [true, false]}
},
"k_block_per_cu": 1,
"permute_n": false
}

View File

@@ -0,0 +1,34 @@
{
"problem": {
"description": "Configuration optimized for large data types (fp32) with smaller warp tiles due to memory constraints"
},
"test_params": {
"problem_sizes": [
{"m": 512, "n": 512, "k": 128, "split_k": 1},
{"m": 512, "n": 256, "k": 192, "split_k": 1},
{"m": 256, "n": 384, "k": 192, "split_k": 1}
]
},
"tile_config": {
"tile_m": {"values": [256]},
"tile_n": {"values": [128]},
"tile_k": {"values": [32]},
"warp_m": {"values": [2]},
"warp_n": {"values": [2]},
"warp_k": {"values": [1]},
"warp_tile_m": {"values": [16]},
"warp_tile_n": {"values": [16]},
"warp_tile_k": {"values": [16]}
},
"trait_config": {
"pipeline": {"values": ["compv3"]},
"epilogue": {"values": ["default"]},
"scheduler": {"values": ["intrawave"]},
"pad_m": {"values": [false]},
"pad_n": {"values": [false]},
"pad_k": {"values": [false]},
"persistent": {"values": [false]}
},
"k_block_per_cu": 1,
"permute_n": false
}

View File

@@ -0,0 +1,34 @@
{
"problem": {
"description": "Padding coverage testing - fixed config with fp16/rcr, varying only padding combinations"
},
"test_params": {
"problem_sizes": [
{"m": 104, "n": 104, "k": 56, "split_k": 1},
{"m": 200, "n": 152, "k": 80, "split_k": 1},
{"m": 152, "n": 200, "k": 64, "split_k": 1}
]
},
"tile_config": {
"tile_m": {"values": [64]},
"tile_n": {"values": [64]},
"tile_k": {"values": [32]},
"warp_m": {"values": [2]},
"warp_n": {"values": [2]},
"warp_k": {"values": [1]},
"warp_tile_m": {"values": [32]},
"warp_tile_n": {"values": [32]},
"warp_tile_k": {"values": [16]}
},
"trait_config": {
"pipeline": {"values": ["compv3"]},
"epilogue": {"values": ["default"]},
"scheduler": {"values": ["intrawave"]},
"pad_m": {"values": [true]},
"pad_n": {"values": [true]},
"pad_k": {"values": [true]},
"persistent": {"values": [false]}
},
"k_block_per_cu": 1,
"permute_n": false
}

View File

@@ -0,0 +1,34 @@
{
"problem": {
"description": "Quick coverage testing - tests multiple tile sizes with all trait combinations (pipelines, epilogues, schedulers). Approximately 144 kernels."
},
"test_params": {
"problem_sizes": [
{"m": 512, "n": 1024, "k": 512, "split_k": 1},
{"m": 1024, "n": 1024, "k": 256, "split_k": 2},
{"m": 1024, "n": 1024, "k": 256, "split_k": 4}
]
},
"tile_config": {
"tile_m": {"values": [32, 64, 256]},
"tile_n": {"values": [32, 64, 256]},
"tile_k": {"values": [16, 32]},
"warp_m": {"values": [2]},
"warp_n": {"values": [2]},
"warp_k": {"values": [1]},
"warp_tile_m": {"values": [16]},
"warp_tile_n": {"values": [16]},
"warp_tile_k": {"values": [16]}
},
"trait_config": {
"pipeline": {"values": ["mem", "compv3", "compv4"]},
"epilogue": {"values": ["default", "cshuffle"]},
"scheduler": {"values": ["intrawave", "interwave"]},
"pad_m": {"values": [false]},
"pad_n": {"values": [false]},
"pad_k": {"values": [false]},
"persistent": {"values": [false]}
},
"k_block_per_cu": 1,
"permute_n": false
}

View File

@@ -1,88 +1,33 @@
{
"problem": {
"description": "Basic functionality validation with moderate problem sizes"
},
"test_params": {
"problem_sizes": [
{"m": 256, "n": 256, "k": 128, "split_k": 1},
{"m": 512, "n": 256, "k": 256, "split_k": 1},
{"m": 256, "n": 512, "k": 256, "split_k": 1}
]
},
"tile_config": {
"tile_m": {
"values": [
128
]
},
"tile_n": {
"values": [
128
]
},
"tile_k": {
"values": [
64
]
},
"warp_m": {
"values": [
2
]
},
"warp_n": {
"values": [
2
]
},
"warp_k": {
"values": [
1
]
},
"warp_tile_m": {
"values": [
16
]
},
"warp_tile_n": {
"values": [
16
]
},
"warp_tile_k": {
"values": [
16
]
}
"tile_m": {"values": [128]},
"tile_n": {"values": [128]},
"tile_k": {"values": [64]},
"warp_m": {"values": [2]},
"warp_n": {"values": [2]},
"warp_k": {"values": [1]},
"warp_tile_m": {"values": [16]},
"warp_tile_n": {"values": [16]},
"warp_tile_k": {"values": [16]}
},
"trait_config": {
"pipeline": {
"values": [
"compv3",
"compv4"
]
},
"scheduler": {
"values": [
"intrawave"
]
},
"epilogue": {
"values": [
"default"
]
},
"pad_m": {
"values": [
false
]
},
"pad_n": {
"values": [
false
]
},
"pad_k": {
"values": [
false
]
},
"persistent": {
"values": [
false
]
}
"pipeline": {"values": ["compv3", "compv4"]},
"epilogue": {"values": ["default"]},
"scheduler": {"values": ["intrawave"]},
"pad_m": {"values": [false]},
"pad_n": {"values": [false]},
"pad_k": {"values": [false]},
"persistent": {"values": [false]}
},
"k_block_per_cu": 1,
"permute_n": false

View File

@@ -0,0 +1,35 @@
{
"problem": {
"description": "Configuration optimized for small data types (fp8, fp16, bf16) with larger warp tiles"
},
"test_params": {
"problem_sizes": [
{"m": 512, "n": 512, "k": 256, "split_k": 1},
{"m": 1024, "n": 512, "k": 512, "split_k": 1},
{"m": 512, "n": 1024, "k": 512, "split_k": 1},
{"m": 1024, "n": 1024, "k": 256, "split_k": 1}
]
},
"tile_config": {
"tile_m": {"values": [128]},
"tile_n": {"values": [128]},
"tile_k": {"values": [32]},
"warp_m": {"values": [2]},
"warp_n": {"values": [2]},
"warp_k": {"values": [1]},
"warp_tile_m": {"values": [32]},
"warp_tile_n": {"values": [32]},
"warp_tile_k": {"values": [16]}
},
"trait_config": {
"pipeline": {"values": ["compv3"]},
"epilogue": {"values": ["default"]},
"scheduler": {"values": ["intrawave"]},
"pad_m": {"values": [false]},
"pad_n": {"values": [false]},
"pad_k": {"values": [false]},
"persistent": {"values": [false]}
},
"k_block_per_cu": 1,
"permute_n": false
}