mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-02 20:51:23 +00:00
* initial commit -m benchmark * only support profile * fix * fix doc * add default config * add ci * fix cmake * tmp save for gen blobs * fix bug * merge * range config * test success * fix * fix * move struct * remove config property * fix config * remove comment * add cmake option & modify * add changelog * fix * format * add pydantic module to the docker image * fix * add benchmark for cold and warmp up * python format * add asm cache control * fix README * remove pydantic module * modify changelog * fix config * recover benchmark_gemm and fix * format python * refactor profiler * fix csv bug * fix codegen bug * add kernel instance object * add benchmark gemm executable * fix jenkins & delete extra header * disable warning output & enable default config * Disable sparsity for invalid warp tile combinations * fix gemm host template func * refactor gemm profiler * filter out some inmstances * default config test & fix codegen bug * add sparse flag to gen more instances --------- Co-authored-by: illsilin <Illia.Silin@amd.com> Co-authored-by: khuagarw <khuagarw@amd.com> Co-authored-by: Thomas Ning <Thomas.Ning@amd.com>
130 lines
2.4 KiB
JSON
130 lines
2.4 KiB
JSON
{
|
|
"problem": {
|
|
"layout_a": {
|
|
"values": [
|
|
"r"
|
|
]
|
|
},
|
|
"layout_b": {
|
|
"values": [
|
|
"c"
|
|
]
|
|
},
|
|
"layout_c": {
|
|
"values": [
|
|
"r"
|
|
]
|
|
},
|
|
"datatype_a": {
|
|
"values": [
|
|
"fp16"
|
|
]
|
|
},
|
|
"datatype_b": {
|
|
"values": [
|
|
"fp16"
|
|
]
|
|
},
|
|
"datatype_c": {
|
|
"values": [
|
|
"fp16"
|
|
]
|
|
}
|
|
},
|
|
"tile_config": {
|
|
"tile_m": {
|
|
"max": 512,
|
|
"min": 64,
|
|
"step": 64,
|
|
"exclude": []
|
|
},
|
|
"tile_n": {
|
|
"max": 512,
|
|
"min": 64,
|
|
"step": 32,
|
|
"exclude": []
|
|
},
|
|
"tile_k": {
|
|
"max": 512,
|
|
"min": 64,
|
|
"step": 64,
|
|
"exclude": []
|
|
},
|
|
"warp_m": {
|
|
"values": [
|
|
4,
|
|
2,
|
|
1
|
|
]
|
|
},
|
|
"warp_n": {
|
|
"values": [
|
|
4,
|
|
2,
|
|
1
|
|
]
|
|
},
|
|
"warp_k": {
|
|
"values": [
|
|
1
|
|
]
|
|
},
|
|
"warp_tile_m": {
|
|
"values": [
|
|
16,
|
|
32
|
|
]
|
|
},
|
|
"warp_tile_n": {
|
|
"values": [
|
|
16,
|
|
32
|
|
]
|
|
},
|
|
"warp_tile_k": {
|
|
"values": [
|
|
8,
|
|
16,
|
|
32,
|
|
64,
|
|
128
|
|
]
|
|
}
|
|
},
|
|
"trait_config": {
|
|
"pipeline": {
|
|
"values": [
|
|
"compv4",
|
|
"compv3",
|
|
"mem"
|
|
]
|
|
},
|
|
"scheduler": {
|
|
"values": [
|
|
"intrawave",
|
|
"interwave"
|
|
]
|
|
},
|
|
"epilogue": {
|
|
"values": [
|
|
"default",
|
|
"cshuffle"
|
|
]
|
|
},
|
|
"pad_m": {
|
|
"values": [
|
|
false
|
|
]
|
|
},
|
|
"pad_n": {
|
|
"values": [
|
|
false
|
|
]
|
|
},
|
|
"pad_k": {
|
|
"values": [
|
|
false
|
|
]
|
|
}
|
|
}
|
|
} |