mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-13 01:36:06 +00:00
Standalone sweep once softmax kernel w/ ckProfiler (#295)
* use 'sweep once' softmax kernel where applicable * threadwise copy's dst buffer can specify invalid element value * add int8 in/out float compute softmax support give a bit of leeway for int absolute tolerance as there's a single data point of all test cases showing off-by-1 error * format * softmax inherits DeviceNormalization * softmax profiler stub * tighten up reference softmax interface * example prints tensor dimension * add fp32 to softmax profiler * rename header * hook with ckProfiler * format * resolve merge conflict * resolve merge conflicts * update normalization profiler help string * resolve conflict * typo * remove residual * softmax profiler: address feedback * test for mixed precision input/output * fully qualify ck::math::isnan * add comment for device normalization interface * revise wording * constness for alpha/beta scaler pointer
This commit is contained in:
@@ -20,6 +20,7 @@ int profile_conv_fwd_bias_relu_add(int, char*[]);
|
||||
int profile_convnd_fwd(int argc, char* argv[]);
|
||||
int profile_convnd_bwd_data(int, char*[], int);
|
||||
int profile_conv_bwd_weight(int, char*[]);
|
||||
int profile_normalization(int, char*[]);
|
||||
int profile_reduce(int, char*[]);
|
||||
|
||||
static void print_helper_message()
|
||||
@@ -130,6 +131,11 @@ int main(int argc, char* argv[])
|
||||
{
|
||||
return profile_gemm_add_add_fastgelu(argc, argv);
|
||||
}
|
||||
else if(strcmp(argv[1], "batchnorm") == 0 || strcmp(argv[1], "layernorm") == 0 ||
|
||||
strcmp(argv[1], "softmax") == 0)
|
||||
{
|
||||
return profile_normalization(argc, argv);
|
||||
}
|
||||
else
|
||||
{
|
||||
print_helper_message();
|
||||
|
||||
Reference in New Issue
Block a user