Group norm (#417)

* Add groupnorm example by layernorm 1. Reference is not ready 2. shape of gamma and beta need to be fix * Let shape of gamma and beta can be same as x * Modify test, instance and client example * [What] Fix bug of layernorm for greater than 2 dimension. [Why] We need to get upper length from merge transform instead of embed transform. * Add reference for groupnorm * Fuse sigmoid after groupnorm * [What] Rename original layernorm into layernorm2d [Why] Prepare to add groupnorm using layernorm5d * clang-format * Add groupnorm test * Refine error message * Add groupnorm ckProfiler * Test groupnorm kernel from device_instance * update example * upadte profiler * Fix test naming * Fix argc number * Move descriptor and sweeponce to argument for quick debugging Co-authored-by: Chao Liu <chao.liu2@amd.com> [ROCm/composable_kernel commit: 4eba345f6e]
2026-07-13 18:51:13 +00:00 · 2022-09-20 11:30:46 +08:00
parent 2df5929e5e
commit f09ecf09f6
24 changed files with 1218 additions and 416 deletions
--- a/profiler/src/profiler.cpp
+++ b/profiler/src/profiler.cpp
@@ -3,26 +3,27 @@

 #include <cstring>

-int profile_gemm(int, char*[]);
-int profile_gemm_splitk(int, char*[]);
-int profile_gemm_bilinear(int, char*[]);
-int profile_gemm_add_add_fastgelu(int, char*[]);
-int profile_gemm_reduce(int, char*[]);
-int profile_gemm_bias_add_reduce(int, char*[]);
-int profile_batched_gemm(int, char*[]);
-int profile_batched_gemm_gemm(int, char*[]);
-int profile_batched_gemm_add_relu_gemm_add(int, char*[]);
-int profile_batched_gemm_reduce(int, char*[]);
-int profile_grouped_gemm(int, char*[]);
-int profile_conv_fwd(int, char*[]);
-int profile_conv_fwd_bias_relu(int, char*[]);
-int profile_conv_fwd_bias_relu_add(int, char*[]);
-int profile_conv_bwd_data(int, char*[]);
-int profile_conv_bwd_weight(int, char*[]);
-int profile_grouped_conv_fwd(int, char*[]);
-int profile_normalization(int, char*[]);
+// int profile_gemm(int, char*[]);
+// int profile_gemm_splitk(int, char*[]);
+// int profile_gemm_bilinear(int, char*[]);
+// int profile_gemm_add_add_fastgelu(int, char*[]);
+// int profile_gemm_reduce(int, char*[]);
+// int profile_gemm_bias_add_reduce(int, char*[]);
+// int profile_batched_gemm(int, char*[]);
+// int profile_batched_gemm_gemm(int, char*[]);
+// int profile_batched_gemm_add_relu_gemm_add(int, char*[]);
+// int profile_batched_gemm_reduce(int, char*[]);
+// int profile_grouped_gemm(int, char*[]);
+// int profile_conv_fwd(int, char*[]);
+// int profile_conv_fwd_bias_relu(int, char*[]);
+// int profile_conv_fwd_bias_relu_add(int, char*[]);
+// int profile_conv_bwd_data(int, char*[]);
+// int profile_conv_bwd_weight(int, char*[]);
+// int profile_grouped_conv_fwd(int, char*[]);
+// int profile_normalization(int, char*[]);
 int profile_layernorm(int, char*[]);
-int profile_reduce(int, char*[]);
+int profile_groupnorm(int, char*[]);
+// int profile_reduce(int, char*[]);

 static void print_helper_message()
 {
@@ -56,6 +57,7 @@ int main(int argc, char* argv[])

        return 0;
    }
+#if 0
    else if(strcmp(argv[1], "gemm") == 0)
    {
        return profile_gemm(argc, argv);
@@ -132,10 +134,15 @@ int main(int argc, char* argv[])
    {
        return profile_normalization(argc, argv);
    }
+#endif
    else if(strcmp(argv[1], "layernorm") == 0)
    {
        return profile_layernorm(argc, argv);
    }
+    else if(strcmp(argv[1], "groupnorm") == 0)
+    {
+        return profile_groupnorm(argc, argv);
+    }
    else
    {
        print_helper_message();