Merge commit 'f5f795c4d6cdfa86e282ba077839aad409ca3103' into develop

This commit is contained in:
assistant-librarian[bot]
2025-08-28 01:39:48 +00:00
parent a4d13272e3
commit d279e73d10
5 changed files with 125 additions and 49 deletions

View File

@@ -486,7 +486,7 @@ auto create_args(int argc, char* argv[])
.insert("stride_b", "0", "Tensor B stride")
.insert("stride_c", "0", "Tensor C stride")
.insert("v", "2", "0. No validation, 1. Validation on CPU, 2. Validation on GPU")
.insert("prec", "fp16", "data type. fp16/bf16/fp8/bf8")
.insert("prec", "fp16", "data type. fp16/bf16/fp8/bf8/pk_int4_t")
.insert("warmup", "50", "number of iterations before benchmark the kernel")
.insert("repeat", "100", "number of iterations to benchmark the kernel")
.insert("timer", "gpu", "gpu:gpu timer, cpu:cpu timer")