mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-01 20:21:23 +00:00
* updates to support int8 in 03_gemm example * added comments, using aliases, helper functions * test(gemm_universal): add test cases for int8 gemm pipeline * fix(test_gemm): fix for failing test unit test for int8 * test(ck_tile): add int8 unit test for gemm universal * refactor(gemm_universal): GPU reference verification for GEMM code improved * style(gemm_universal): removed extra comments and did clang format * merging recent changes to universal gemm to tile_engine * ck tile engine integration work * feat(tile_engine): add int8 support to tile engine ops/gemm * feat(tile_engine): added 32 32 16 mfma instances to tile engine for int8 * style: Format code with clang-format-12 * refactor(tile_engine): address review comments * style: removed unhelpful comments & unused variables. * build: tile engine uses default config * feat: add int8 support for CK_TILE GEMM * style: added trailing commas to codegen_utils.py * refactor: tile engine * refactor: formatting and code review * refactor: code formatting for python files * fix: suppress build warning * add support for gfx950 * refactor:KWarpTile size in gemms util * Fix the branch and wrap up the k warp tile * Add bf8 integration * refactor: clang format and rebase --------- Co-authored-by: zjli2013 <leezhengjiang@gmail.com> Co-authored-by: AviralGoelAMD <aviral.goel@amd.com> Co-authored-by: Khushbu Agarwal <khuagarw@amd.com>
136 lines
2.5 KiB
JSON
136 lines
2.5 KiB
JSON
{
|
|
"problem": {
|
|
"layout_a": {
|
|
"values": [
|
|
"r"
|
|
]
|
|
},
|
|
"layout_b": {
|
|
"values": [
|
|
"c"
|
|
]
|
|
},
|
|
"layout_c": {
|
|
"values": [
|
|
"r"
|
|
]
|
|
},
|
|
"datatype_a": {
|
|
"values": [
|
|
"fp16"
|
|
]
|
|
},
|
|
"datatype_b": {
|
|
"values": [
|
|
"fp16"
|
|
]
|
|
},
|
|
"datatype_c": {
|
|
"values": [
|
|
"fp16"
|
|
]
|
|
}
|
|
},
|
|
"tile_config": {
|
|
"tile_m": {
|
|
"max": 256,
|
|
"min": 64,
|
|
"step": 64,
|
|
"exclude": []
|
|
},
|
|
"tile_n": {
|
|
"max": 256,
|
|
"min": 64,
|
|
"step": 32,
|
|
"exclude": []
|
|
},
|
|
"tile_k": {
|
|
"max": 256,
|
|
"min": 64,
|
|
"step": 64,
|
|
"exclude": [192]
|
|
},
|
|
"warp_m": {
|
|
"values": [
|
|
4,
|
|
2,
|
|
1
|
|
]
|
|
},
|
|
"warp_n": {
|
|
"values": [
|
|
4,
|
|
2,
|
|
1
|
|
]
|
|
},
|
|
"warp_k": {
|
|
"values": [
|
|
1
|
|
]
|
|
},
|
|
"warp_tile_m": {
|
|
"values": [
|
|
4,
|
|
8,
|
|
16,
|
|
32,
|
|
64
|
|
]
|
|
},
|
|
"warp_tile_n": {
|
|
"values": [
|
|
4,
|
|
8,
|
|
16,
|
|
32,
|
|
64
|
|
]
|
|
},
|
|
"warp_tile_k": {
|
|
"values": [
|
|
8,
|
|
16,
|
|
32,
|
|
64,
|
|
128
|
|
]
|
|
}
|
|
},
|
|
"trait_config": {
|
|
"pipeline": {
|
|
"values": [
|
|
"compv4",
|
|
"compv3",
|
|
"mem"
|
|
]
|
|
},
|
|
"scheduler": {
|
|
"values": [
|
|
"intrawave",
|
|
"interwave"
|
|
]
|
|
},
|
|
"epilogue": {
|
|
"values": [
|
|
"default",
|
|
"cshuffle"
|
|
]
|
|
},
|
|
"pad_m": {
|
|
"values": [
|
|
false
|
|
]
|
|
},
|
|
"pad_n": {
|
|
"values": [
|
|
false
|
|
]
|
|
},
|
|
"pad_k": {
|
|
"values": [
|
|
false
|
|
]
|
|
}
|
|
}
|
|
} |