[Tile Engine] Add benchmark for tile engine gemm. (#2193)

* initial commit -m benchmark

* only support profile

* fix

* fix doc

* add default config

* add ci

* fix cmake

* tmp save for gen blobs

* fix bug

* merge

* range config

* test success

* fix

* fix

* move struct

* remove config property

* fix config

* remove comment

* add cmake option & modify

* add changelog

* fix

* format

* add pydantic module to the docker image

* fix

* add benchmark for cold and warmp up

* python format

* add asm cache control

* fix README

* remove pydantic module

* modify changelog

* fix config

* recover benchmark_gemm and fix

* format python

* refactor profiler

* fix csv bug

* fix codegen bug

* add kernel instance object

* add benchmark gemm executable

* fix jenkins & delete extra header

* disable warning output & enable default config

* Disable sparsity for invalid warp tile combinations

* fix gemm host template func

* refactor gemm profiler

* filter out some inmstances

* default config test & fix codegen bug

* add sparse flag to gen more instances

---------

Co-authored-by: illsilin <Illia.Silin@amd.com>
Co-authored-by: khuagarw <khuagarw@amd.com>
Co-authored-by: Thomas Ning <Thomas.Ning@amd.com>
This commit is contained in:
Casey-Shi
2025-05-27 13:32:36 +08:00
committed by GitHub
parent c42b957d65
commit 128f5a1eab
16 changed files with 1911 additions and 914 deletions

View File

@@ -0,0 +1,130 @@
{
"problem": {
"layout_a": {
"values": [
"r"
]
},
"layout_b": {
"values": [
"c"
]
},
"layout_c": {
"values": [
"r"
]
},
"datatype_a": {
"values": [
"fp16"
]
},
"datatype_b": {
"values": [
"fp16"
]
},
"datatype_c": {
"values": [
"fp16"
]
}
},
"tile_config": {
"tile_m": {
"max": 512,
"min": 64,
"step": 64,
"exclude": []
},
"tile_n": {
"max": 512,
"min": 64,
"step": 32,
"exclude": []
},
"tile_k": {
"max": 512,
"min": 64,
"step": 64,
"exclude": []
},
"warp_m": {
"values": [
4,
2,
1
]
},
"warp_n": {
"values": [
4,
2,
1
]
},
"warp_k": {
"values": [
1
]
},
"warp_tile_m": {
"values": [
16,
32
]
},
"warp_tile_n": {
"values": [
16,
32
]
},
"warp_tile_k": {
"values": [
8,
16,
32,
64,
128
]
}
},
"trait_config": {
"pipeline": {
"values": [
"compv4",
"compv3",
"mem"
]
},
"scheduler": {
"values": [
"intrawave",
"interwave"
]
},
"epilogue": {
"values": [
"default",
"cshuffle"
]
},
"pad_m": {
"values": [
false
]
},
"pad_n": {
"values": [
false
]
},
"pad_k": {
"values": [
false
]
}
}
}