mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-18 12:00:07 +00:00
* merging recent changes to universal gemm to tile_engine
* Reducing Linking time by generating less intermediate files
* make small libs to build faster
* Reducing the instances
* reducing instances
* Restoring default config
* Restoring default config
* warp_n reverted in default config
* Adding diff json files for fp8 and fp16, cmake changes for fp8
* Restructure the CMake File
* Added more granularity for build and some debugging code
* removed some of debugging statements
* added fp8 instances
* tahe datatype from command line to enable both type of json files
* updated README file
* code cleanup
* code cleanup
* updated jenkinsfile
* enable tile_engine daily builds
* updating cmake file
* updated CMakeLists.txt
* Updating CMake code fixing gfx12 build
* Updating CMake code fixing gfx12 build
* Fix CMake file null checks
* fixed traces of rebase
* Update tile_engine/ops/gemm/README.md
Co-authored-by: spolifroni-amd <Sandra.Polifroni@amd.com>
* Update tile_engine/ops/gemm/README.md
Co-authored-by: spolifroni-amd <Sandra.Polifroni@amd.com>
* Update tile_engine/ops/gemm/README.md
Co-authored-by: spolifroni-amd <Sandra.Polifroni@amd.com>
* fixing rebase issue
---------
Co-authored-by: khushbu <khuagarw@gmail.com>
Co-authored-by: ThomasNing <thomas.ning@amd.com>
Co-authored-by: illsilin_amdeng <Illia.Silin@amd.com>
Co-authored-by: AviralGoelAMD <aviral.goel@amd.com>
Co-authored-by: spolifroni-amd <Sandra.Polifroni@amd.com>
[ROCm/composable_kernel commit: a14753b86f]
115 lines
1.4 KiB
JSON
115 lines
1.4 KiB
JSON
{
|
|
"problem": {
|
|
"layout_a": {
|
|
"values": [
|
|
"r"
|
|
]
|
|
},
|
|
"layout_b": {
|
|
"values": [
|
|
"c"
|
|
]
|
|
},
|
|
"layout_c": {
|
|
"values": [
|
|
"r"
|
|
]
|
|
}
|
|
},
|
|
"tile_config": {
|
|
"tile_m": {
|
|
"values": [
|
|
256
|
|
]
|
|
},
|
|
"tile_n": {
|
|
"values": [
|
|
128,
|
|
256
|
|
]
|
|
},
|
|
"tile_k": {
|
|
"values": [
|
|
32
|
|
]
|
|
},
|
|
"warp_m": {
|
|
"values": [
|
|
1,
|
|
2,
|
|
4
|
|
]
|
|
},
|
|
"warp_n": {
|
|
"values": [
|
|
1,
|
|
2,
|
|
4
|
|
]
|
|
},
|
|
"warp_k": {
|
|
"values": [
|
|
1
|
|
]
|
|
},
|
|
"warp_tile_m": {
|
|
"values": [
|
|
4,
|
|
16,
|
|
32
|
|
]
|
|
},
|
|
"warp_tile_n": {
|
|
"values": [
|
|
16,
|
|
32,
|
|
64
|
|
]
|
|
},
|
|
"warp_tile_k": {
|
|
"values": [
|
|
8,
|
|
16,
|
|
32,
|
|
64,
|
|
128
|
|
]
|
|
}
|
|
},
|
|
"trait_config": {
|
|
"pipeline": {
|
|
"values": [
|
|
"compv3",
|
|
"compv4",
|
|
"mem"
|
|
]
|
|
},
|
|
"scheduler": {
|
|
"values": [
|
|
"intrawave",
|
|
"interwave"
|
|
]
|
|
},
|
|
"epilogue": {
|
|
"values": [
|
|
"cshuffle",
|
|
"default"
|
|
]
|
|
},
|
|
"pad_m": {
|
|
"values": [
|
|
false
|
|
]
|
|
},
|
|
"pad_n": {
|
|
"values": [
|
|
false
|
|
]
|
|
},
|
|
"pad_k": {
|
|
"values": [
|
|
false
|
|
]
|
|
}
|
|
}
|
|
} |