mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-16 19:09:59 +00:00
* Making edits to identify individual compilation issues.
* Minor fix for blob txt files not being created.
* Fixing compilation issues.
* Fixing ordering bug.
* Adding python profiling functionality.
* Setting individual build as default.
* Setting gpu target filtering for tile engine to gfx90a, gfx942 and gfx950.
* update the default running parameters and settings
* Fixing bug with benchmarking, shifting file generation to build instead of config.
* Updating fixes.
* Fixing json output and parsing.
* Disable ccache for tile engine gemm ops because we dont need it.
* Removing duplicate type definition.
* Improving json printing.
* Add the flexibility of different layout and more warp tile support
* Fix extra flag in name of individual kernels.
* Fixing bug with booleans.
* Solve the first patch of the post merge conflict
* Compilation fixes, and cosmetic improvements.
* Yet again compilation fixes after latest changes from develop.
* Fixing python benchmarking script.
---------
Co-authored-by: Vidyasagar Ananthan <vidyasagar.ananthan@amd.com>
Co-authored-by: Vidyasagar Ananthan <vanantha@amd.com>
[ROCm/composable_kernel commit: 705804d9bf]
105 lines
1.9 KiB
JSON
105 lines
1.9 KiB
JSON
{
|
|
"problem": {
|
|
},
|
|
"tile_config": {
|
|
"tile_m": {
|
|
"max": 256,
|
|
"min": 64,
|
|
"step": 64
|
|
},
|
|
"tile_n": {
|
|
"max": 256,
|
|
"min": 64,
|
|
"step": 64
|
|
},
|
|
"tile_k": {
|
|
"max": 256,
|
|
"min": 64,
|
|
"step": 64
|
|
},
|
|
"warp_m": {
|
|
"values": [
|
|
4,
|
|
2,
|
|
1
|
|
]
|
|
},
|
|
"warp_n": {
|
|
"values": [
|
|
4,
|
|
2,
|
|
1
|
|
]
|
|
},
|
|
"warp_k": {
|
|
"values": [
|
|
1
|
|
]
|
|
},
|
|
"warp_tile_m": {
|
|
"values": [
|
|
4,
|
|
16,
|
|
32
|
|
]
|
|
},
|
|
"warp_tile_n": {
|
|
"values": [
|
|
16,
|
|
32,
|
|
64
|
|
]
|
|
},
|
|
"warp_tile_k": {
|
|
"values": [
|
|
8,
|
|
16,
|
|
32,
|
|
64,
|
|
128
|
|
]
|
|
}
|
|
},
|
|
"trait_config": {
|
|
"pipeline": {
|
|
"values": [
|
|
"compv3",
|
|
"compv4",
|
|
"mem"
|
|
]
|
|
},
|
|
"scheduler": {
|
|
"values": [
|
|
"intrawave",
|
|
"interwave"
|
|
]
|
|
},
|
|
"epilogue": {
|
|
"values": [
|
|
"cshuffle",
|
|
"default"
|
|
]
|
|
},
|
|
"pad_m": {
|
|
"values": [
|
|
false
|
|
]
|
|
},
|
|
"pad_n": {
|
|
"values": [
|
|
false
|
|
]
|
|
},
|
|
"pad_k": {
|
|
"values": [
|
|
false
|
|
]
|
|
},
|
|
"persistent": {
|
|
"values": [
|
|
false,
|
|
true
|
|
]
|
|
}
|
|
}
|
|
} |