mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-03-24 00:57:39 +00:00
* WIP POC of dispatcher * Dispatcher python workflow setup. * Dispatcher cleanup and updates. Further dispatcher cleanup and updates. Build fixes Improvements and python to CK example Improvements to readme * Fixes to python paths * Cleaning up code * Improving dispatcher support for different arch Fixing typos * Fix formatting errors * Cleaning up examples * Improving codegeneration * Improving and fixing C++ examples * Adding conv functionality (fwd,bwd,bwdw) and examples. * Fixes based on feedback. * Further fixes based on feedback. * Adding stress test for autogeneration and autocorrection, and fixing preshuffle bug. * Another round of improvements based on feedback. * Trimming out unnecessary code. * Fixing the multi-D implementation. * Using gpu verification for gemms and fixing convolutions tflops calculation. * Fix counter usage issue and arch filtering per ops. * Adding changelog and other fixes. * Improve examples and resolve critical bugs. * Reduce build time for python examples. * Fixing minor bug. * Fix compilation error. * Improve installation instructions for dispatcher. * Add docker based installation instructions for dispatcher. * Fixing arch-based filtering to match tile engine. * Remove dead code and fix arch filtering. * Minor bugfix. * Updates after rebase. * Trimming code. * Fix copyright headers. * Consolidate examples, cut down code. * Minor fixes. * Improving python examples. * Update readmes. * Remove conv functionality. * Cleanup following conv removable.
80 lines
1.6 KiB
JSON
80 lines
1.6 KiB
JSON
{
|
|
"registry": "export_demo",
|
|
"kernel_count": 3,
|
|
"kernels": [
|
|
{
|
|
"tile": "128x128x32",
|
|
"dtypes": {
|
|
"A": "fp16",
|
|
"B": "fp16",
|
|
"C": "fp16"
|
|
},
|
|
"layout": "rcr",
|
|
"pipeline": "compv4",
|
|
"target": "gfx942"
|
|
},
|
|
{
|
|
"tile": "256x256x64",
|
|
"dtypes": {
|
|
"A": "fp16",
|
|
"B": "fp16",
|
|
"C": "fp16"
|
|
},
|
|
"layout": "rcr",
|
|
"pipeline": "compv4",
|
|
"target": "gfx942"
|
|
},
|
|
{
|
|
"tile": "64x64x32",
|
|
"dtypes": {
|
|
"A": "fp16",
|
|
"B": "fp16",
|
|
"C": "fp16"
|
|
},
|
|
"layout": "rcr",
|
|
"pipeline": "compv4",
|
|
"target": "gfx942"
|
|
}
|
|
],
|
|
"cpp_registry": {
|
|
"metadata": {
|
|
"timestamp": "Dec 4 2025 06:23:15",
|
|
"total_kernels": 1,
|
|
"export_version": "1.0",
|
|
"dispatcher_version": "1.0.0"
|
|
},
|
|
"statistics": {
|
|
"by_datatype": {},
|
|
"by_pipeline": {},
|
|
"by_scheduler": {}
|
|
},
|
|
"kernels": [
|
|
{
|
|
"identifier": "128x128x32_2x2x1_32x32x16_nopers",
|
|
"name": "gemm_fp16_rcrr_compv4_cshuffle_intrawave_False_False_False_False_128x128x32_2x2x1_32x32x16",
|
|
"algorithm": {
|
|
"tile_shape": {
|
|
"m": 128,
|
|
"n": 128,
|
|
"k": 32
|
|
},
|
|
"wave_shape": {
|
|
"m": 2,
|
|
"n": 2,
|
|
"k": 1
|
|
},
|
|
"warp_tile_shape": {
|
|
"m": 32,
|
|
"n": 32,
|
|
"k": 16
|
|
},
|
|
"block_size": 256,
|
|
"persistent": false,
|
|
"double_buffer": true,
|
|
"preshuffle": false,
|
|
"transpose_c": false
|
|
}
|
|
}
|
|
]
|
|
}
|
|
} |