diff --git a/host/driver_offline/src/conv_fwd_driver_offline.cpp b/host/driver_offline/src/conv_fwd_driver_offline.cpp index d7811cef3b..e0da35c7ba 100644 --- a/host/driver_offline/src/conv_fwd_driver_offline.cpp +++ b/host/driver_offline/src/conv_fwd_driver_offline.cpp @@ -20,10 +20,10 @@ #define USE_DYNAMIC_MODE 0 #define USE_CONV_FWD_V4R4_NCHW 0 -#define USE_CONV_FWD_V4R4R2_NHWC 1 -#define USE_CONV_FWD_V6R1_NCHW 1 +#define USE_CONV_FWD_V4R4R2_NHWC 0 +#define USE_CONV_FWD_V6R1_NCHW 0 #define USE_CONV_FWD_V4R4R2_XDL_NCHW 0 -#define USE_CONV_FWD_V4R4R4_XDL_NHWC 0 +#define USE_CONV_FWD_V4R4R4_XDL_NHWC 1 enum ConvTensorLayout { diff --git a/profiler/conv_profiler.cpp b/profiler/conv_profiler.cpp index 98121ec507..1d39d59e75 100644 --- a/profiler/conv_profiler.cpp +++ b/profiler/conv_profiler.cpp @@ -34,14 +34,14 @@ int conv_profiler(int argc, char* argv[]) { if(argc != 25) { - printf("arg1: tensor operation (conv=Convolution)\n"); - printf("arg2: data type (0=fp32, 1=fp16)\n"); - printf("arg3: input tensor layout (0=NCHW, 1=NHWC)\n"); - printf("arg4: weight tensor layout (0=KCYX, 1=KYXC)\n"); - printf("arg5: output tensor layout (0=NKHW, 1=NHWK)\n"); - printf("arg6: verification (0=no, 1=yes)\n"); - printf("arg7: initialization (0=no init, 1=integer value, 2=decimal value)\n"); - printf("arg8: print matrix value (0=no, 1=yes)\n"); + printf("arg1: tensor operation (conv: Convolution)\n"); + printf("arg2: data type (0: fp32; 1: fp16)\n"); + printf("arg3: input tensor layout (0: NCHW; 1: NHWC)\n"); + printf("arg4: weight tensor layout (0: KCYX; 1: KYXC)\n"); + printf("arg5: output tensor layout (0: NKHW; 1: NHWK)\n"); + printf("arg6: verification (0: no; 1: yes)\n"); + printf("arg7: initialization (0: no init; 1: integer value; 2: decimal value)\n"); + printf("arg8: print tensor value (0: no; 1: yes)\n"); printf("arg9: run kernel # of times (>1)\n"); printf("arg10 to 24: N, K, C, Y, X, Hi, Wi, Sy, Sx, Dy, Dx, LeftPy, LeftPx, RightPy, " "RightPx\n"); diff --git a/profiler/gemm_profiler.cpp b/profiler/gemm_profiler.cpp index 31b2d84c53..018fe872d0 100644 --- a/profiler/gemm_profiler.cpp +++ b/profiler/gemm_profiler.cpp @@ -37,12 +37,15 @@ int gemm_profiler(int argc, char* argv[]) { if(argc != 14) { - printf("arg1: tensor operation (gemm=GEMM)\n"); - printf("arg2: data type (0=fp32, 1=fp16)\n"); - printf("arg3: matrix layout (0=NN, 1=NT, 2=TN, 3=TT)\n"); - printf("arg4: verification (0=no, 1=yes)\n"); - printf("arg5: initialization (0=no init, 1=integer value, 2=decimal value)\n"); - printf("arg6: print matrix value (0=no, 1=yes)\n"); + printf("arg1: tensor operation (gemm: GEMM)\n"); + printf("arg2: data type (0: fp32; 1: fp16)\n"); + printf("arg3: matrix layout (0: A[m, k] * B[k, n] = C[m, n];\n"); + printf(" 1: A[m, k] * B[n, k] = C[m, n];\n"); + printf(" 2: A[k, n] * B[k, n] = C[m, n];\n"); + printf(" 3: A[k, n] * B[n, k] = C[m, n])\n"); + printf("arg4: verification (0: no; 1: yes)\n"); + printf("arg5: initialization (0: no init; 1: integer value; 2: decimal value)\n"); + printf("arg8: print tensor value (0: no; 1: yes)\n"); printf("arg7: run kernel # of times (>1)\n"); printf("arg8 to 13: M, N, K, StrideA, StrideB, StrideC\n"); exit(1);