mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-04-20 06:49:15 +00:00
Ck tile engine gemm unit tests exapand test coverage (#3025)
* initial commit for testing datatypes, layouts and traits * correct warp tile size for small datatype config to make a validate instance for fp16, bf16, fp8 * add tile size coverage test * Cover more tests, parallel instance generation, documentation * update cmakelist to run more tests * initial codes to support add test params in json file * add congurable problem sizes for different tests * modify README.md * clean test_gemm_simple code * correct padding coverage test * Add comprehensive and quick tile size config files * remove fp64 from datatypes * update documents. manage selecting tile_size config (quick or Comprehensive) * correct padding test problem sizes * update comprehensive test and correct documents * Skip GEMM tests with unsupported arguments instead of failing * change gen_single instead of gen_indivisual because of an issue. add splitk tests to tile_size_quick_config * clean CMakeList, remod py file * Refactor test configs: Rename tile_size to coverage, remove separate traits config, clean cmakefile, readme * update fp32, fp8 to test all layouts, clean documents and comments * limit fp32 test layouts to rcr because of compilation error on some gpus * remove fp32 because of the removing from gemm_instance_builder, make quick test smaller, updating comments * Fix fp8/bf8 test failures on gfx950 by adding OCP FP8 format support * Reduce quick_coverage test count from ~250 to ~144 for faster CI
This commit is contained in:
@@ -0,0 +1,37 @@
|
||||
{
|
||||
"problem": {
|
||||
"description": "Comprehensive coverage testing - extensive tile size coverage (16-256, step 16) with multiple warp configurations and all trait combinations. Several thousand kernels."
|
||||
},
|
||||
"test_params": {
|
||||
"problem_sizes": [
|
||||
{"m": 512, "n": 512, "k": 256, "split_k": 1},
|
||||
{"m": 1024, "n": 512, "k": 512, "split_k": 1},
|
||||
{"m": 512, "n": 1024, "k": 512, "split_k": 1},
|
||||
{"m": 1024, "n": 1024, "k": 256, "split_k": 1},
|
||||
{"m": 1024, "n": 1024, "k": 256, "split_k": 2},
|
||||
{"m": 1024, "n": 1024, "k": 256, "split_k": 4}
|
||||
]
|
||||
},
|
||||
"tile_config": {
|
||||
"tile_m": {"values": [16, 32, 48, 64, 80, 96, 112, 128, 144, 160, 176, 192, 208, 224, 240, 256]},
|
||||
"tile_n": {"values": [16, 32, 48, 64, 80, 96, 112, 128, 144, 160, 176, 192, 208, 224, 240, 256]},
|
||||
"tile_k": {"values": [16, 32, 64]},
|
||||
"warp_m": {"values": [1, 2, 4]},
|
||||
"warp_n": {"values": [1, 2, 4]},
|
||||
"warp_k": {"values": [1]},
|
||||
"warp_tile_m": {"values": [16, 32]},
|
||||
"warp_tile_n": {"values": [16, 32]},
|
||||
"warp_tile_k": {"values": [8, 16, 32, 64, 128]}
|
||||
},
|
||||
"trait_config": {
|
||||
"pipeline": {"values": ["mem", "compv3", "compv4"]},
|
||||
"epilogue": {"values": ["default", "cshuffle"]},
|
||||
"scheduler": {"values": ["intrawave", "interwave"]},
|
||||
"pad_m": {"values": [false]},
|
||||
"pad_n": {"values": [false]},
|
||||
"pad_k": {"values": [false]},
|
||||
"persistent": {"values": [true, false]}
|
||||
},
|
||||
"k_block_per_cu": 1,
|
||||
"permute_n": false
|
||||
}
|
||||
@@ -0,0 +1,34 @@
|
||||
{
|
||||
"problem": {
|
||||
"description": "Configuration optimized for large data types (fp32) with smaller warp tiles due to memory constraints"
|
||||
},
|
||||
"test_params": {
|
||||
"problem_sizes": [
|
||||
{"m": 512, "n": 512, "k": 128, "split_k": 1},
|
||||
{"m": 512, "n": 256, "k": 192, "split_k": 1},
|
||||
{"m": 256, "n": 384, "k": 192, "split_k": 1}
|
||||
]
|
||||
},
|
||||
"tile_config": {
|
||||
"tile_m": {"values": [256]},
|
||||
"tile_n": {"values": [128]},
|
||||
"tile_k": {"values": [32]},
|
||||
"warp_m": {"values": [2]},
|
||||
"warp_n": {"values": [2]},
|
||||
"warp_k": {"values": [1]},
|
||||
"warp_tile_m": {"values": [16]},
|
||||
"warp_tile_n": {"values": [16]},
|
||||
"warp_tile_k": {"values": [16]}
|
||||
},
|
||||
"trait_config": {
|
||||
"pipeline": {"values": ["compv3"]},
|
||||
"epilogue": {"values": ["default"]},
|
||||
"scheduler": {"values": ["intrawave"]},
|
||||
"pad_m": {"values": [false]},
|
||||
"pad_n": {"values": [false]},
|
||||
"pad_k": {"values": [false]},
|
||||
"persistent": {"values": [false]}
|
||||
},
|
||||
"k_block_per_cu": 1,
|
||||
"permute_n": false
|
||||
}
|
||||
@@ -0,0 +1,34 @@
|
||||
{
|
||||
"problem": {
|
||||
"description": "Padding coverage testing - fixed config with fp16/rcr, varying only padding combinations"
|
||||
},
|
||||
"test_params": {
|
||||
"problem_sizes": [
|
||||
{"m": 104, "n": 104, "k": 56, "split_k": 1},
|
||||
{"m": 200, "n": 152, "k": 80, "split_k": 1},
|
||||
{"m": 152, "n": 200, "k": 64, "split_k": 1}
|
||||
]
|
||||
},
|
||||
"tile_config": {
|
||||
"tile_m": {"values": [64]},
|
||||
"tile_n": {"values": [64]},
|
||||
"tile_k": {"values": [32]},
|
||||
"warp_m": {"values": [2]},
|
||||
"warp_n": {"values": [2]},
|
||||
"warp_k": {"values": [1]},
|
||||
"warp_tile_m": {"values": [32]},
|
||||
"warp_tile_n": {"values": [32]},
|
||||
"warp_tile_k": {"values": [16]}
|
||||
},
|
||||
"trait_config": {
|
||||
"pipeline": {"values": ["compv3"]},
|
||||
"epilogue": {"values": ["default"]},
|
||||
"scheduler": {"values": ["intrawave"]},
|
||||
"pad_m": {"values": [true]},
|
||||
"pad_n": {"values": [true]},
|
||||
"pad_k": {"values": [true]},
|
||||
"persistent": {"values": [false]}
|
||||
},
|
||||
"k_block_per_cu": 1,
|
||||
"permute_n": false
|
||||
}
|
||||
@@ -0,0 +1,34 @@
|
||||
{
|
||||
"problem": {
|
||||
"description": "Quick coverage testing - tests multiple tile sizes with all trait combinations (pipelines, epilogues, schedulers). Approximately 144 kernels."
|
||||
},
|
||||
"test_params": {
|
||||
"problem_sizes": [
|
||||
{"m": 512, "n": 1024, "k": 512, "split_k": 1},
|
||||
{"m": 1024, "n": 1024, "k": 256, "split_k": 2},
|
||||
{"m": 1024, "n": 1024, "k": 256, "split_k": 4}
|
||||
]
|
||||
},
|
||||
"tile_config": {
|
||||
"tile_m": {"values": [32, 64, 256]},
|
||||
"tile_n": {"values": [32, 64, 256]},
|
||||
"tile_k": {"values": [16, 32]},
|
||||
"warp_m": {"values": [2]},
|
||||
"warp_n": {"values": [2]},
|
||||
"warp_k": {"values": [1]},
|
||||
"warp_tile_m": {"values": [16]},
|
||||
"warp_tile_n": {"values": [16]},
|
||||
"warp_tile_k": {"values": [16]}
|
||||
},
|
||||
"trait_config": {
|
||||
"pipeline": {"values": ["mem", "compv3", "compv4"]},
|
||||
"epilogue": {"values": ["default", "cshuffle"]},
|
||||
"scheduler": {"values": ["intrawave", "interwave"]},
|
||||
"pad_m": {"values": [false]},
|
||||
"pad_n": {"values": [false]},
|
||||
"pad_k": {"values": [false]},
|
||||
"persistent": {"values": [false]}
|
||||
},
|
||||
"k_block_per_cu": 1,
|
||||
"permute_n": false
|
||||
}
|
||||
@@ -1,88 +1,33 @@
|
||||
{
|
||||
"problem": {
|
||||
"description": "Basic functionality validation with moderate problem sizes"
|
||||
},
|
||||
"test_params": {
|
||||
"problem_sizes": [
|
||||
{"m": 256, "n": 256, "k": 128, "split_k": 1},
|
||||
{"m": 512, "n": 256, "k": 256, "split_k": 1},
|
||||
{"m": 256, "n": 512, "k": 256, "split_k": 1}
|
||||
]
|
||||
},
|
||||
"tile_config": {
|
||||
"tile_m": {
|
||||
"values": [
|
||||
128
|
||||
]
|
||||
},
|
||||
"tile_n": {
|
||||
"values": [
|
||||
128
|
||||
]
|
||||
},
|
||||
"tile_k": {
|
||||
"values": [
|
||||
64
|
||||
]
|
||||
},
|
||||
"warp_m": {
|
||||
"values": [
|
||||
2
|
||||
]
|
||||
},
|
||||
"warp_n": {
|
||||
"values": [
|
||||
2
|
||||
]
|
||||
},
|
||||
"warp_k": {
|
||||
"values": [
|
||||
1
|
||||
]
|
||||
},
|
||||
"warp_tile_m": {
|
||||
"values": [
|
||||
16
|
||||
]
|
||||
},
|
||||
"warp_tile_n": {
|
||||
"values": [
|
||||
16
|
||||
]
|
||||
},
|
||||
"warp_tile_k": {
|
||||
"values": [
|
||||
16
|
||||
]
|
||||
}
|
||||
"tile_m": {"values": [128]},
|
||||
"tile_n": {"values": [128]},
|
||||
"tile_k": {"values": [64]},
|
||||
"warp_m": {"values": [2]},
|
||||
"warp_n": {"values": [2]},
|
||||
"warp_k": {"values": [1]},
|
||||
"warp_tile_m": {"values": [16]},
|
||||
"warp_tile_n": {"values": [16]},
|
||||
"warp_tile_k": {"values": [16]}
|
||||
},
|
||||
"trait_config": {
|
||||
"pipeline": {
|
||||
"values": [
|
||||
"compv3",
|
||||
"compv4"
|
||||
]
|
||||
},
|
||||
"scheduler": {
|
||||
"values": [
|
||||
"intrawave"
|
||||
]
|
||||
},
|
||||
"epilogue": {
|
||||
"values": [
|
||||
"default"
|
||||
]
|
||||
},
|
||||
"pad_m": {
|
||||
"values": [
|
||||
false
|
||||
]
|
||||
},
|
||||
"pad_n": {
|
||||
"values": [
|
||||
false
|
||||
]
|
||||
},
|
||||
"pad_k": {
|
||||
"values": [
|
||||
false
|
||||
]
|
||||
},
|
||||
"persistent": {
|
||||
"values": [
|
||||
false
|
||||
]
|
||||
}
|
||||
"pipeline": {"values": ["compv3", "compv4"]},
|
||||
"epilogue": {"values": ["default"]},
|
||||
"scheduler": {"values": ["intrawave"]},
|
||||
"pad_m": {"values": [false]},
|
||||
"pad_n": {"values": [false]},
|
||||
"pad_k": {"values": [false]},
|
||||
"persistent": {"values": [false]}
|
||||
},
|
||||
"k_block_per_cu": 1,
|
||||
"permute_n": false
|
||||
|
||||
@@ -0,0 +1,35 @@
|
||||
{
|
||||
"problem": {
|
||||
"description": "Configuration optimized for small data types (fp8, fp16, bf16) with larger warp tiles"
|
||||
},
|
||||
"test_params": {
|
||||
"problem_sizes": [
|
||||
{"m": 512, "n": 512, "k": 256, "split_k": 1},
|
||||
{"m": 1024, "n": 512, "k": 512, "split_k": 1},
|
||||
{"m": 512, "n": 1024, "k": 512, "split_k": 1},
|
||||
{"m": 1024, "n": 1024, "k": 256, "split_k": 1}
|
||||
]
|
||||
},
|
||||
"tile_config": {
|
||||
"tile_m": {"values": [128]},
|
||||
"tile_n": {"values": [128]},
|
||||
"tile_k": {"values": [32]},
|
||||
"warp_m": {"values": [2]},
|
||||
"warp_n": {"values": [2]},
|
||||
"warp_k": {"values": [1]},
|
||||
"warp_tile_m": {"values": [32]},
|
||||
"warp_tile_n": {"values": [32]},
|
||||
"warp_tile_k": {"values": [16]}
|
||||
},
|
||||
"trait_config": {
|
||||
"pipeline": {"values": ["compv3"]},
|
||||
"epilogue": {"values": ["default"]},
|
||||
"scheduler": {"values": ["intrawave"]},
|
||||
"pad_m": {"values": [false]},
|
||||
"pad_n": {"values": [false]},
|
||||
"pad_k": {"values": [false]},
|
||||
"persistent": {"values": [false]}
|
||||
},
|
||||
"k_block_per_cu": 1,
|
||||
"permute_n": false
|
||||
}
|
||||
Reference in New Issue
Block a user