mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-06-29 19:28:33 +00:00
fix doc
This commit is contained in:
@@ -20,25 +20,24 @@ make tile_engine_gemm -j
|
||||
## tile_engine_gemm inputs
|
||||
```
|
||||
|
||||
-m m dimension (default:3840)
|
||||
-n n dimension (default:4096)
|
||||
-k k dimension (default:2048)
|
||||
-stride_a Tensor A stride (default:0)
|
||||
-stride_b Tensor B stride (default:0)
|
||||
-stride_c Tensor C stride (default:0)
|
||||
-split_k SplitK value (default:1)
|
||||
-v No validation: 0, Validation on CPU: 1, Validation on GPU: 2 (default:2)
|
||||
-metric The metric value of kernel performance - latency: 0, tflops: 1, bandwidth: 2 (default:0)
|
||||
-warmup Number of iterations before benchmark the kernel (default:50)
|
||||
-repeat Number of iterations to benchmark the kernel (default:100)
|
||||
-timer gpu:gpu timer, cpu:cpu timer (default:gpu)
|
||||
-init Value for initializing tensor - random: 0, linear: 1, constant(1): 2 (default:0)
|
||||
-pipeline possible values are: compv3, compv4, mem (default:compv3)
|
||||
-scheduler possible values are: intrawave, interwave (default:intrawave)
|
||||
-epilogue possible values are: cshuffle, default (default:cshuffle)
|
||||
-pad_m Pad in m direction - true/false (default:false)
|
||||
-pad_n Pad in n direction - true/false (default:false)
|
||||
-pad_k Pad in k direction - true/false (default:false)
|
||||
-m The value for m dimension. Default is 3840.
|
||||
-n The value for n dimension. Default is 4096.
|
||||
-k The value for k dimension. Default is 2048.
|
||||
-stride_a The stride value for tensor A. Default is 0.
|
||||
-stride_b The stride value for tensor B. Default is 0.
|
||||
-stride_c The stride value for tensor C Default is 0.
|
||||
-split_k The split value for k dimension. Default is 1.
|
||||
-v The type of validation. Set to 0 for no validation, 1 for validation on CPU, or 2 for validation on GPU. Default is 2, validation on GPU.
|
||||
-warmup The number of iterations before benchmark the kernel. Default is 50.
|
||||
-repeat The number of iterations to benchmark the kernel. Default is 100.
|
||||
-timer The type of timer. Possible values are gpu timer or cpu timer. Default is gpu timer.
|
||||
-init The method of tensor initialization. Set to 0 for random, to 1 for linear, or 2 for constant(1). Default is 0, random.
|
||||
-metric Metric with which to measure kernel performance. Set to 0 for latency, 1 for tflops, or 2 for bandwidth. Default is 0, latency.
|
||||
-pipeline The type of pipeline. Possible values are compv3, compv4 or mem. Default is compv3.
|
||||
-epilogue The type of epilogue. Possible values are cshuffle or default. Default is csshuffle.
|
||||
-pad_m Whether pad or not in m direction. Possible values are true or false. Default is false.
|
||||
-pad_n Whether pad or not in n direction. Possible values are true or false. Default is false.
|
||||
-pad_k Whether pad or not in k direction. Possible values are true or false. Default is false.
|
||||
|
||||
Note: pipeline, scheduler, epilogue, pad_m, pad_n, pad_k should be one of the options specified in instance_combination.json
|
||||
```
|
||||
|
||||
@@ -225,25 +225,56 @@ auto calculate_rtol_atol(const ck_tile::index_t K,
|
||||
inline auto create_args(int argc, char* argv[])
|
||||
{
|
||||
ck_tile::ArgParser arg_parser;
|
||||
arg_parser.insert("m", "3840", "m dimension")
|
||||
.insert("n", "4096", "n dimension")
|
||||
.insert("k", "2048", "k dimension")
|
||||
.insert("stride_a", "0", "Tensor A stride")
|
||||
.insert("stride_b", "0", "Tensor B stride")
|
||||
.insert("stride_c", "0", "Tensor C stride")
|
||||
.insert("split_k", "1", "splitK value")
|
||||
.insert("v", "2", "0. No validation, 1. Validation on CPU, 2. Validation on GPU")
|
||||
.insert("warmup", "50", "number of iterations before benchmark the kernel")
|
||||
.insert("repeat", "100", "number of iterations to benchmark the kernel")
|
||||
.insert("timer", "gpu", "gpu:gpu timer, cpu:cpu timer")
|
||||
.insert("init", "0", "0:random, 1:linear, 2:constant(1)")
|
||||
.insert("metric", "0", "0:latency, 1:tflops, 2:bandwidth")
|
||||
.insert("pipeline", "compv3", "compv3, compv4, mem")
|
||||
.insert("scheduler", "intrawave", "intrawave, interwave")
|
||||
.insert("epilogue", "cshuffle", "cshuffle, default")
|
||||
.insert("pad_m", "false", "true, false")
|
||||
.insert("pad_n", "false", "true, false")
|
||||
.insert("pad_k", "false", "true, false");
|
||||
arg_parser.insert("m", "3840", "The value for m dimension. Default is 3840.")
|
||||
.insert("n", "4096", "The value for n dimension. Default is 4096.")
|
||||
.insert("k", "2048", "The value for k dimension. Default is 2048.")
|
||||
.insert("stride_a", "0", "The stride value for tensor A. Default is 0.")
|
||||
.insert("stride_b", "0", "The stride value for tensor B. Default is 0.")
|
||||
.insert("stride_c", "0", "The stride value for tensor C Default is 0.")
|
||||
.insert("split_k", "1", "The split value for k dimension. Default is 1.")
|
||||
.insert("v",
|
||||
"2",
|
||||
"The type of validation. Set to 0 for no validation, 1 for validation on CPU, or 2 "
|
||||
"for validation on GPU. Default is 2, validation on GPU.")
|
||||
.insert(
|
||||
"warmup", "50", "The number of iterations before benchmark the kernel. Default is 50.")
|
||||
.insert(
|
||||
"repeat", "100", "The number of iterations to benchmark the kernel. Default is 100.")
|
||||
.insert(
|
||||
"timer",
|
||||
"gpu",
|
||||
"The type of timer. Possible values are gpu timer or cpu timer. Default is gpu timer.")
|
||||
.insert("init",
|
||||
"0",
|
||||
"The method of tensor initialization. Set to 0 for random, to 1 for linear, or 2 "
|
||||
"for constant(1). Default is 0, random.")
|
||||
.insert("metric",
|
||||
"0",
|
||||
"Metric with which to measure kernel performance. Set to 0 for latency, 1 for "
|
||||
"tflops, or 2 for bandwidth. Default is 0, latency.")
|
||||
.insert(
|
||||
"pipeline",
|
||||
"compv3",
|
||||
"The type of pipeline. Possible values are compv3, compv4 or mem. Default is compv3.")
|
||||
.insert("scheduler",
|
||||
"The type of pipeline. Possible values are compv3, compv4 or mem. Default is "
|
||||
"compv3.")
|
||||
.insert(
|
||||
"epilogue",
|
||||
"cshuffle",
|
||||
"The type of epilogue. Possible values are cshuffle or default. Default is csshuffle.")
|
||||
.insert("pad_m",
|
||||
"false",
|
||||
"Whether pad or not in m direction. Possible values are true or false. Default is "
|
||||
"false.")
|
||||
.insert("pad_n",
|
||||
"false",
|
||||
"Whether pad or not in n direction. Possible values are true or false. Default is "
|
||||
"false.")
|
||||
.insert("pad_k",
|
||||
"false",
|
||||
"Whether pad or not in k direction. Possible values are true or false. Default is "
|
||||
"false.");
|
||||
|
||||
bool result = arg_parser.parse(argc, argv);
|
||||
return std::make_tuple(result, arg_parser);
|
||||
|
||||
Reference in New Issue
Block a user