Add xdlops v4r4r4 into online compilation (#48)

* init for v4r4 xdlops olc * refactor wrap * init impl of v4r4 nchw xdlops olc * tuning * test perf * fixed v4r4 nhwc * tuned v4r4 nhwc * use gridwise_gemm_xdlops_v2r3 * swap a/b * add pointer support into offline v2r3 * debugging v4r4r4 transform for olc * change timer of olc * refactor v4r4 xdlops nchw olc * remove transform fun in v4r4 xdlops nhwc olc Co-authored-by: Chao Liu <chao.liu2@amd.com>
2026-05-12 09:16:52 +00:00 · 2021-07-16 23:27:08 -05:00
parent 0a72e4df94
commit fbdf4332c7
28 changed files with 1851 additions and 3909 deletions
--- a/driver/conv_driver_v2.cpp
+++ b/driver/conv_driver_v2.cpp
@@ -19,8 +19,6 @@
 #include "device_dynamic_convolution_forward_implicit_gemm_v4r5r2_nchw_kcyx_nkhw.hpp"
 #include "device_dynamic_convolution_forward_implicit_gemm_v5r1_nchw_kcyx_nkhw.hpp"
 #include "device_dynamic_convolution_forward_implicit_gemm_v4r4r2_xdlops_nchw_kcyx_nkhw.hpp"
-#include "device_dynamic_convolution_forward_implicit_gemm_v4r4r2_xdlops_nhwc_kyxc_nhwk.hpp"
-#include "device_dynamic_convolution_forward_implicit_gemm_v4r4r3_xdlops_nhwc_kyxc_nhwk.hpp"
 #include "device_dynamic_convolution_forward_implicit_gemm_v4r4r4_xdlops_nhwc_kyxc_nhwk.hpp"

 #define USE_DYNAMIC_MODE 1
@@ -30,9 +28,7 @@
 #define USE_CONV_FWD_V4R5_NCHW 0
 #define USE_CONV_FWD_V4R5R2_NCHW 0
 #define USE_CONV_FWD_V5R1_NCHW 0
-#define USE_CONV_FWD_V4R4_XDL_NCHW 1
-#define USE_CONV_FWD_V4R4R2_XDL_NHWC 0
-#define USE_CONV_FWD_V4R4R3_XDL_NHWC 0
+#define USE_CONV_FWD_V4R4R2_XDL_NCHW 1
 #define USE_CONV_FWD_V4R4R4_XDL_NHWC 1

 enum ConvForwardAlgo
@@ -43,10 +39,8 @@ enum ConvForwardAlgo
    V4R5NCHW,      // 3
    V4R5R2NCHW,    // 4
    V5R1NCHW,      // 5
-    V4R4XDLNCHW,   // 6
-    V4R4R2XDLNHWC, // 7
-    V4R4R3XDLNHWC, // 8
-    V4R4R4XDLNHWC  // 9
+    V4R4R2XDLNCHW, // 6
+    V4R4R4XDLNHWC  // 7
 };

 int main(int argc, char* argv[])
@@ -462,8 +456,8 @@ int main(int argc, char* argv[])
    }
 #endif

-#if USE_CONV_FWD_V4R4_XDL_NCHW
-    if(algo == ConvForwardAlgo::V4R4XDLNCHW)
+#if USE_CONV_FWD_V4R4R2_XDL_NCHW
+    if(algo == ConvForwardAlgo::V4R4R2XDLNCHW)
    {
        if(layout != ConvTensorLayout::NCHW)
        {
@@ -489,60 +483,6 @@ int main(int argc, char* argv[])
    }
 #endif

-#if USE_CONV_FWD_V4R4R2_XDL_NHWC
-    if(algo == ConvForwardAlgo::V4R4R2XDLNHWC)
-    {
-        if(layout != ConvTensorLayout::NHWC)
-        {
-            throw std::runtime_error("wrong! layout");
-        }
-
-        const auto tmp = f_make_for_device_nhwc();
-
-        device_dynamic_convolution_forward_implicit_gemm_v4r4r2_xdlops_nhwc_kyxc_nhwk<in_data_t,
-                                                                                      acc_data_t,
-                                                                                      out_data_t>(
-            tmp[I0],
-            tmp[I1],
-            tmp[I2],
-            tmp[I3],
-            tmp[I4],
-            tmp[I5],
-            tmp[I6],
-            in,
-            wei,
-            out_device,
-            nrepeat);
-    }
-#endif
-
-#if USE_CONV_FWD_V4R4R3_XDL_NHWC
-    if(algo == ConvForwardAlgo::V4R4R3XDLNHWC)
-    {
-        if(layout != ConvTensorLayout::NHWC)
-        {
-            throw std::runtime_error("wrong! layout");
-        }
-
-        const auto tmp = f_make_for_device_nhwc();
-
-        device_dynamic_convolution_forward_implicit_gemm_v4r4r3_xdlops_nhwc_kyxc_nhwk<in_data_t,
-                                                                                      acc_data_t,
-                                                                                      out_data_t>(
-            tmp[I0],
-            tmp[I1],
-            tmp[I2],
-            tmp[I3],
-            tmp[I4],
-            tmp[I5],
-            tmp[I6],
-            in,
-            wei,
-            out_device,
-            nrepeat);
-    }
-#endif
-
 #if USE_CONV_FWD_V4R4R4_XDL_NHWC
    if(algo == ConvForwardAlgo::V4R4R4XDLNHWC)
    {