diff --git a/client_example/24_grouped_conv_activation/grouped_convnd_fwd_convscale_reduce/common.hpp b/client_example/24_grouped_conv_activation/grouped_convnd_fwd_convscale_reduce/common.hpp index 98f41dc7fb..9b0ee27f38 100644 --- a/client_example/24_grouped_conv_activation/grouped_convnd_fwd_convscale_reduce/common.hpp +++ b/client_example/24_grouped_conv_activation/grouped_convnd_fwd_convscale_reduce/common.hpp @@ -24,6 +24,8 @@ #include "ck/library/tensor_operation_instance/gpu/reduce/reduce.hpp" #include "ck/library/utility/host_tensor.hpp" +using ::ck::HostTensorDescriptor; + using PassThrough = ck::tensor_operation::element_wise::PassThrough; using ConvScaleRelu = ck::tensor_operation::element_wise::ScaleScaleRelu; using ConvScale = ck::tensor_operation::element_wise::ScaleScalePass; diff --git a/client_example/31_grouped_gemm_bf16Aint8B/grouped_gemm_bias_fastgelu_xdl_bf16_i8.cpp b/client_example/31_grouped_gemm_bf16Aint8B/grouped_gemm_bias_fastgelu_xdl_bf16_i8.cpp index 0bf748cdbb..b43f89698c 100644 --- a/client_example/31_grouped_gemm_bf16Aint8B/grouped_gemm_bias_fastgelu_xdl_bf16_i8.cpp +++ b/client_example/31_grouped_gemm_bf16Aint8B/grouped_gemm_bias_fastgelu_xdl_bf16_i8.cpp @@ -15,6 +15,8 @@ #include "ck/library/tensor_operation_instance/gpu/grouped_gemm_multi_abd_fixed_nk.hpp" +using ::ck::hip_check_error; + template using S = ck::Sequence; diff --git a/client_example/31_grouped_gemm_bf16Aint8B/grouped_gemm_fastgelu_xdl_bf16_i8.cpp b/client_example/31_grouped_gemm_bf16Aint8B/grouped_gemm_fastgelu_xdl_bf16_i8.cpp index f300583d13..cf12ace19c 100644 --- a/client_example/31_grouped_gemm_bf16Aint8B/grouped_gemm_fastgelu_xdl_bf16_i8.cpp +++ b/client_example/31_grouped_gemm_bf16Aint8B/grouped_gemm_fastgelu_xdl_bf16_i8.cpp @@ -17,6 +17,8 @@ #include "ck/host_utility/hip_check_error.hpp" +using ::ck::hip_check_error; + template using S = ck::Sequence; diff --git a/client_example/31_grouped_gemm_bf16Aint8B/grouped_gemm_multiply_bias_fastgelu_xdl_bf16_i8.cpp b/client_example/31_grouped_gemm_bf16Aint8B/grouped_gemm_multiply_bias_fastgelu_xdl_bf16_i8.cpp index 47d3e0abf9..d69233fef1 100644 --- a/client_example/31_grouped_gemm_bf16Aint8B/grouped_gemm_multiply_bias_fastgelu_xdl_bf16_i8.cpp +++ b/client_example/31_grouped_gemm_bf16Aint8B/grouped_gemm_multiply_bias_fastgelu_xdl_bf16_i8.cpp @@ -17,6 +17,8 @@ #include "ck/host_utility/hip_check_error.hpp" +using ::ck::hip_check_error; + template using S = ck::Sequence; diff --git a/client_example/31_grouped_gemm_bf16Aint8B/grouped_gemm_multiply_xdl_bf16_i8.cpp b/client_example/31_grouped_gemm_bf16Aint8B/grouped_gemm_multiply_xdl_bf16_i8.cpp index 8c705d3bcc..9f20133689 100644 --- a/client_example/31_grouped_gemm_bf16Aint8B/grouped_gemm_multiply_xdl_bf16_i8.cpp +++ b/client_example/31_grouped_gemm_bf16Aint8B/grouped_gemm_multiply_xdl_bf16_i8.cpp @@ -17,6 +17,8 @@ #include "ck/host_utility/hip_check_error.hpp" +using ::ck::hip_check_error; + template using S = ck::Sequence; diff --git a/client_example/31_grouped_gemm_bf16Aint8B/grouped_gemm_xdl_bf16_i8.cpp b/client_example/31_grouped_gemm_bf16Aint8B/grouped_gemm_xdl_bf16_i8.cpp index 557dea7676..87dfc2a5f9 100644 --- a/client_example/31_grouped_gemm_bf16Aint8B/grouped_gemm_xdl_bf16_i8.cpp +++ b/client_example/31_grouped_gemm_bf16Aint8B/grouped_gemm_xdl_bf16_i8.cpp @@ -17,6 +17,8 @@ #include "ck/host_utility/hip_check_error.hpp" +using ::ck::hip_check_error; + template using S = ck::Sequence; diff --git a/example/01_gemm/common.hpp b/example/01_gemm/common.hpp index e482953e46..300f33b164 100644 --- a/example/01_gemm/common.hpp +++ b/example/01_gemm/common.hpp @@ -25,6 +25,11 @@ #include "ck/library/utility/literals.hpp" #include "ck/library/reference_tensor_operation/cpu/reference_gemm.hpp" #include "ck/library/reference_tensor_operation/gpu/reference_gemm.hpp" +#include "ck/host_utility/kernel_launch.hpp" + +using ::ck::DeviceMem; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; struct ProblemSize final { diff --git a/example/02_gemm_bilinear/gemm_bilinear_wmma_fp16.cpp b/example/02_gemm_bilinear/gemm_bilinear_wmma_fp16.cpp index 10dd4eaa1f..08c186856e 100644 --- a/example/02_gemm_bilinear/gemm_bilinear_wmma_fp16.cpp +++ b/example/02_gemm_bilinear/gemm_bilinear_wmma_fp16.cpp @@ -19,6 +19,10 @@ #include "ck/library/utility/check_err.hpp" #include "ck/host_utility/device_prop.hpp" +using ::ck::DeviceMem; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + struct AlphaBetaAdd { AlphaBetaAdd(float alpha, float beta) : alpha_(alpha), beta_(beta){}; diff --git a/example/02_gemm_bilinear/gemm_bilinear_wmma_int8.cpp b/example/02_gemm_bilinear/gemm_bilinear_wmma_int8.cpp index 556aa90f3d..27b166ad80 100644 --- a/example/02_gemm_bilinear/gemm_bilinear_wmma_int8.cpp +++ b/example/02_gemm_bilinear/gemm_bilinear_wmma_int8.cpp @@ -19,6 +19,10 @@ #include "ck/library/utility/check_err.hpp" #include "ck/host_utility/device_prop.hpp" +using ::ck::DeviceMem; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + struct AlphaBetaAdd { AlphaBetaAdd(int alpha, int beta) : alpha_(alpha), beta_(beta){}; diff --git a/example/02_gemm_bilinear/gemm_bilinear_xdl_fp16.cpp b/example/02_gemm_bilinear/gemm_bilinear_xdl_fp16.cpp index 8f8b2e80fe..0f27e95cb1 100644 --- a/example/02_gemm_bilinear/gemm_bilinear_xdl_fp16.cpp +++ b/example/02_gemm_bilinear/gemm_bilinear_xdl_fp16.cpp @@ -18,6 +18,10 @@ #include "ck/library/reference_tensor_operation/cpu/reference_gemm.hpp" #include "ck/library/utility/check_err.hpp" +using ::ck::DeviceMem; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + struct AlphaBetaAdd { AlphaBetaAdd(float alpha, float beta) : alpha_(alpha), beta_(beta){}; diff --git a/example/03_gemm_bias_relu/gemm_bias_relu_xdl_fp16.cpp b/example/03_gemm_bias_relu/gemm_bias_relu_xdl_fp16.cpp index 17e9ceccec..becbed5fd2 100644 --- a/example/03_gemm_bias_relu/gemm_bias_relu_xdl_fp16.cpp +++ b/example/03_gemm_bias_relu/gemm_bias_relu_xdl_fp16.cpp @@ -19,6 +19,10 @@ #include "ck/library/reference_tensor_operation/cpu/reference_gemm.hpp" #include "ck/library/utility/check_err.hpp" +using ::ck::DeviceMem; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + template using S = ck::Sequence; diff --git a/example/04_gemm_add_add_fastgelu/common.hpp b/example/04_gemm_add_add_fastgelu/common.hpp index 91d17df95f..55837f6eab 100644 --- a/example/04_gemm_add_add_fastgelu/common.hpp +++ b/example/04_gemm_add_add_fastgelu/common.hpp @@ -23,6 +23,10 @@ #include "ck/library/utility/host_tensor_generator.hpp" #include "ck/library/utility/literals.hpp" +using ::ck::DeviceMem; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + template using S = ck::Sequence; diff --git a/example/09_convnd_fwd/convnd_fwd_common.hpp b/example/09_convnd_fwd/convnd_fwd_common.hpp index 2a972c13eb..001e2ea72f 100644 --- a/example/09_convnd_fwd/convnd_fwd_common.hpp +++ b/example/09_convnd_fwd/convnd_fwd_common.hpp @@ -19,6 +19,10 @@ #include "ck/library/utility/convolution_host_tensor_descriptor_helper.hpp" #include "ck/library/reference_tensor_operation/cpu/reference_conv_fwd.hpp" +using ::ck::DeviceMem; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + void print_helper_msg() { std::cout << "arg1: verification (0=no, 1=yes)\n" diff --git a/example/09_convnd_fwd/convnd_fwd_dl_common.hpp b/example/09_convnd_fwd/convnd_fwd_dl_common.hpp index aeddd4fc59..3b43dfa4be 100644 --- a/example/09_convnd_fwd/convnd_fwd_dl_common.hpp +++ b/example/09_convnd_fwd/convnd_fwd_dl_common.hpp @@ -19,6 +19,10 @@ #include "ck/library/utility/convolution_host_tensor_descriptor_helper.hpp" #include "ck/library/reference_tensor_operation/cpu/reference_conv_fwd.hpp" +using ::ck::DeviceMem; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + void print_helper_msg() { std::cout << "arg1: verification (0=no, 1=yes)\n" diff --git a/example/10_convnd_fwd_multiple_d_multiple_reduce/common.hpp b/example/10_convnd_fwd_multiple_d_multiple_reduce/common.hpp index 7142521c55..30a2297a28 100644 --- a/example/10_convnd_fwd_multiple_d_multiple_reduce/common.hpp +++ b/example/10_convnd_fwd_multiple_d_multiple_reduce/common.hpp @@ -26,6 +26,10 @@ #include "ck/library/utility/host_tensor_generator.hpp" #include "ck/library/reference_tensor_operation/cpu/reference_conv_fwd.hpp" +using ::ck::DeviceMem; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + using BF16 = ck::bhalf_t; using FP16 = ck::half_t; using FP32 = float; diff --git a/example/12_reduce/reduce_blockwise_two_call.cpp b/example/12_reduce/reduce_blockwise_two_call.cpp index 9e125c4e5d..5e09229663 100644 --- a/example/12_reduce/reduce_blockwise_two_call.cpp +++ b/example/12_reduce/reduce_blockwise_two_call.cpp @@ -20,6 +20,10 @@ #include "ck/library/utility/host_tensor_generator.hpp" #include "ck/library/utility/host_common_util.hpp" +using ::ck::DeviceMem; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + using namespace ck; using namespace ck::tensor_operation::device; diff --git a/example/13_pool2d_fwd/pool2d_fwd_common.hpp b/example/13_pool2d_fwd/pool2d_fwd_common.hpp index abbf1b29f7..0d3a60db53 100644 --- a/example/13_pool2d_fwd/pool2d_fwd_common.hpp +++ b/example/13_pool2d_fwd/pool2d_fwd_common.hpp @@ -19,6 +19,10 @@ #include "ck/library/utility/literals.hpp" #include "ck/library/reference_tensor_operation/cpu/reference_pool_fwd.hpp" +using ::ck::DeviceMem; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + template using S = ck::Sequence; diff --git a/example/14_gemm_quantization/gemm_wmma_quantization_int8.cpp b/example/14_gemm_quantization/gemm_wmma_quantization_int8.cpp index a3023997a1..f06bf7304c 100644 --- a/example/14_gemm_quantization/gemm_wmma_quantization_int8.cpp +++ b/example/14_gemm_quantization/gemm_wmma_quantization_int8.cpp @@ -20,6 +20,10 @@ #include "ck/library/reference_tensor_operation/cpu/reference_gemm.hpp" #include "ck/library/utility/check_err.hpp" +using ::ck::DeviceMem; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + template using S = ck::Sequence; diff --git a/example/14_gemm_quantization/gemm_xdl_bias_relu_quantization_int8.cpp b/example/14_gemm_quantization/gemm_xdl_bias_relu_quantization_int8.cpp index 8f68ac6b05..221fbb0dbc 100644 --- a/example/14_gemm_quantization/gemm_xdl_bias_relu_quantization_int8.cpp +++ b/example/14_gemm_quantization/gemm_xdl_bias_relu_quantization_int8.cpp @@ -19,6 +19,10 @@ #include "ck/library/reference_tensor_operation/cpu/reference_gemm.hpp" #include "ck/library/utility/check_err.hpp" +using ::ck::DeviceMem; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + template using S = ck::Sequence; diff --git a/example/14_gemm_quantization/gemm_xdl_quantization_int8.cpp b/example/14_gemm_quantization/gemm_xdl_quantization_int8.cpp index db7f3cb091..e9a8e89625 100644 --- a/example/14_gemm_quantization/gemm_xdl_quantization_int8.cpp +++ b/example/14_gemm_quantization/gemm_xdl_quantization_int8.cpp @@ -19,6 +19,10 @@ #include "ck/library/reference_tensor_operation/cpu/reference_gemm.hpp" #include "ck/library/utility/check_err.hpp" +using ::ck::DeviceMem; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + template using S = ck::Sequence; diff --git a/example/15_grouped_gemm/grouped_gemm_multiple_d_dl_fp16.cpp b/example/15_grouped_gemm/grouped_gemm_multiple_d_dl_fp16.cpp index 3e1f7f0893..b9fca2df87 100644 --- a/example/15_grouped_gemm/grouped_gemm_multiple_d_dl_fp16.cpp +++ b/example/15_grouped_gemm/grouped_gemm_multiple_d_dl_fp16.cpp @@ -25,6 +25,11 @@ #include "ck/utility/tuple.hpp" #include "ck/utility/sequence.hpp" +using ::ck::DeviceMem; +using ::ck::hip_check_error; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + template using S = ck::Sequence; diff --git a/example/15_grouped_gemm/grouped_gemm_multiple_d_splitk_xdl_fp16.cpp b/example/15_grouped_gemm/grouped_gemm_multiple_d_splitk_xdl_fp16.cpp index 117a18e3bd..248252ded9 100644 --- a/example/15_grouped_gemm/grouped_gemm_multiple_d_splitk_xdl_fp16.cpp +++ b/example/15_grouped_gemm/grouped_gemm_multiple_d_splitk_xdl_fp16.cpp @@ -23,6 +23,11 @@ #include "ck/library/utility/literals.hpp" #include "ck/library/reference_tensor_operation/cpu/reference_gemm_multiple_d.hpp" +using ::ck::DeviceMem; +using ::ck::hip_check_error; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + template using S = ck::Sequence; diff --git a/example/15_grouped_gemm/grouped_gemm_multiple_d_xdl_fp16.cpp b/example/15_grouped_gemm/grouped_gemm_multiple_d_xdl_fp16.cpp index c8de51f550..5940c820d8 100644 --- a/example/15_grouped_gemm/grouped_gemm_multiple_d_xdl_fp16.cpp +++ b/example/15_grouped_gemm/grouped_gemm_multiple_d_xdl_fp16.cpp @@ -23,6 +23,11 @@ #include "ck/library/utility/literals.hpp" #include "ck/library/reference_tensor_operation/cpu/reference_gemm_multiple_d.hpp" +using ::ck::DeviceMem; +using ::ck::hip_check_error; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + template using S = ck::Sequence; diff --git a/example/15_grouped_gemm/grouped_gemm_xdl_bf16.cpp b/example/15_grouped_gemm/grouped_gemm_xdl_bf16.cpp index ac64a468a4..5bfdbd3c83 100644 --- a/example/15_grouped_gemm/grouped_gemm_xdl_bf16.cpp +++ b/example/15_grouped_gemm/grouped_gemm_xdl_bf16.cpp @@ -19,6 +19,11 @@ #include "ck/library/utility/literals.hpp" #include "ck/library/reference_tensor_operation/cpu/reference_gemm.hpp" +using ::ck::DeviceMem; +using ::ck::hip_check_error; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + template using S = ck::Sequence; diff --git a/example/15_grouped_gemm/grouped_gemm_xdl_fixed_nk_bias_fp16.cpp b/example/15_grouped_gemm/grouped_gemm_xdl_fixed_nk_bias_fp16.cpp index 2fcc0e3cb1..abfa8fd491 100644 --- a/example/15_grouped_gemm/grouped_gemm_xdl_fixed_nk_bias_fp16.cpp +++ b/example/15_grouped_gemm/grouped_gemm_xdl_fixed_nk_bias_fp16.cpp @@ -20,6 +20,11 @@ #include "ck/library/utility/literals.hpp" #include "ck/library/reference_tensor_operation/cpu/reference_gemm.hpp" +using ::ck::DeviceMem; +using ::ck::hip_check_error; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + template using S = ck::Sequence; diff --git a/example/15_grouped_gemm/grouped_gemm_xdl_fixed_nk_fp16.cpp b/example/15_grouped_gemm/grouped_gemm_xdl_fixed_nk_fp16.cpp index fb611fd444..8aa73e491d 100644 --- a/example/15_grouped_gemm/grouped_gemm_xdl_fixed_nk_fp16.cpp +++ b/example/15_grouped_gemm/grouped_gemm_xdl_fixed_nk_fp16.cpp @@ -20,6 +20,11 @@ #include "ck/library/utility/literals.hpp" #include "ck/library/reference_tensor_operation/cpu/reference_gemm.hpp" +using ::ck::DeviceMem; +using ::ck::hip_check_error; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + template using S = ck::Sequence; diff --git a/example/15_grouped_gemm/grouped_gemm_xdl_fixed_nk_fp16_fp8.cpp b/example/15_grouped_gemm/grouped_gemm_xdl_fixed_nk_fp16_fp8.cpp index 47eb6637bd..fd40aa0410 100644 --- a/example/15_grouped_gemm/grouped_gemm_xdl_fixed_nk_fp16_fp8.cpp +++ b/example/15_grouped_gemm/grouped_gemm_xdl_fixed_nk_fp16_fp8.cpp @@ -20,6 +20,11 @@ #include "ck/library/utility/literals.hpp" #include "ck/library/reference_tensor_operation/cpu/reference_gemm.hpp" +using ::ck::DeviceMem; +using ::ck::hip_check_error; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + template using S = ck::Sequence; diff --git a/example/15_grouped_gemm/grouped_gemm_xdl_fp16.cpp b/example/15_grouped_gemm/grouped_gemm_xdl_fp16.cpp index 85ea8c2f2c..3856372eb2 100644 --- a/example/15_grouped_gemm/grouped_gemm_xdl_fp16.cpp +++ b/example/15_grouped_gemm/grouped_gemm_xdl_fp16.cpp @@ -19,6 +19,11 @@ #include "ck/library/utility/literals.hpp" #include "ck/library/reference_tensor_operation/cpu/reference_gemm.hpp" +using ::ck::DeviceMem; +using ::ck::hip_check_error; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + template using S = ck::Sequence; diff --git a/example/15_grouped_gemm/grouped_gemm_xdl_fp32.cpp b/example/15_grouped_gemm/grouped_gemm_xdl_fp32.cpp index 236e5e4fa2..ce57c01fab 100644 --- a/example/15_grouped_gemm/grouped_gemm_xdl_fp32.cpp +++ b/example/15_grouped_gemm/grouped_gemm_xdl_fp32.cpp @@ -19,6 +19,11 @@ #include "ck/library/utility/literals.hpp" #include "ck/library/reference_tensor_operation/cpu/reference_gemm.hpp" +using ::ck::DeviceMem; +using ::ck::hip_check_error; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + template using S = ck::Sequence; diff --git a/example/15_grouped_gemm/grouped_gemm_xdl_fp32_tf32.cpp b/example/15_grouped_gemm/grouped_gemm_xdl_fp32_tf32.cpp index c9a3ede151..ad6a1bf4e3 100644 --- a/example/15_grouped_gemm/grouped_gemm_xdl_fp32_tf32.cpp +++ b/example/15_grouped_gemm/grouped_gemm_xdl_fp32_tf32.cpp @@ -21,6 +21,11 @@ #define EXAMPLE_WITH_COMPUTE_DATATYPE +using ::ck::DeviceMem; +using ::ck::hip_check_error; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + template using S = ck::Sequence; diff --git a/example/15_grouped_gemm/grouped_gemm_xdl_int4.cpp b/example/15_grouped_gemm/grouped_gemm_xdl_int4.cpp index 60c4a71a35..3bfacb210b 100644 --- a/example/15_grouped_gemm/grouped_gemm_xdl_int4.cpp +++ b/example/15_grouped_gemm/grouped_gemm_xdl_int4.cpp @@ -19,6 +19,11 @@ #include "ck/library/utility/literals.hpp" #include "ck/library/reference_tensor_operation/cpu/reference_gemm.hpp" +using ::ck::DeviceMem; +using ::ck::hip_check_error; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + template using S = ck::Sequence; diff --git a/example/15_grouped_gemm/grouped_gemm_xdl_int8.cpp b/example/15_grouped_gemm/grouped_gemm_xdl_int8.cpp index 6b8de05f73..b69f0b3878 100644 --- a/example/15_grouped_gemm/grouped_gemm_xdl_int8.cpp +++ b/example/15_grouped_gemm/grouped_gemm_xdl_int8.cpp @@ -19,6 +19,11 @@ #include "ck/library/utility/literals.hpp" #include "ck/library/reference_tensor_operation/cpu/reference_gemm.hpp" +using ::ck::DeviceMem; +using ::ck::hip_check_error; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + template using S = ck::Sequence; diff --git a/example/15_grouped_gemm/grouped_gemm_xdl_splitk_fp16.cpp b/example/15_grouped_gemm/grouped_gemm_xdl_splitk_fp16.cpp index 16d018936b..10f9f9db31 100644 --- a/example/15_grouped_gemm/grouped_gemm_xdl_splitk_fp16.cpp +++ b/example/15_grouped_gemm/grouped_gemm_xdl_splitk_fp16.cpp @@ -19,6 +19,11 @@ #include "ck/library/utility/literals.hpp" #include "ck/library/reference_tensor_operation/cpu/reference_gemm.hpp" +using ::ck::DeviceMem; +using ::ck::hip_check_error; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + template using S = ck::Sequence; diff --git a/example/16_gemm_multi_d_multi_reduces/gemm_add_add_mean_meansquare_xdl_fp16.cpp b/example/16_gemm_multi_d_multi_reduces/gemm_add_add_mean_meansquare_xdl_fp16.cpp index 3cc38b381b..9268b64bed 100644 --- a/example/16_gemm_multi_d_multi_reduces/gemm_add_add_mean_meansquare_xdl_fp16.cpp +++ b/example/16_gemm_multi_d_multi_reduces/gemm_add_add_mean_meansquare_xdl_fp16.cpp @@ -19,6 +19,10 @@ #include "ck/library/reference_tensor_operation/cpu/reference_gemm.hpp" #include "ck/library/utility/check_err.hpp" +using ::ck::DeviceMem; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + template using S = ck::Sequence; diff --git a/example/16_gemm_multi_d_multi_reduces/gemm_add_addsquare_xdl_int8.cpp b/example/16_gemm_multi_d_multi_reduces/gemm_add_addsquare_xdl_int8.cpp index 3de32e9a6d..273de555e4 100644 --- a/example/16_gemm_multi_d_multi_reduces/gemm_add_addsquare_xdl_int8.cpp +++ b/example/16_gemm_multi_d_multi_reduces/gemm_add_addsquare_xdl_int8.cpp @@ -8,6 +8,10 @@ #include "ck/tensor_operation/gpu/device/impl/device_gemm_multiple_d_multiple_r_xdl_cshuffle.hpp" #include "ck/tensor_operation/gpu/device/gemm_specialization.hpp" +using ::ck::DeviceMem; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + // DataType using ADataType = INT8; using BDataType = INT8; diff --git a/example/16_gemm_multi_d_multi_reduces/gemm_max_xdl_bf16.cpp b/example/16_gemm_multi_d_multi_reduces/gemm_max_xdl_bf16.cpp index 0290c1829d..6a2935746f 100644 --- a/example/16_gemm_multi_d_multi_reduces/gemm_max_xdl_bf16.cpp +++ b/example/16_gemm_multi_d_multi_reduces/gemm_max_xdl_bf16.cpp @@ -7,6 +7,10 @@ #include "ck/tensor_operation/gpu/device/impl/device_gemm_multiple_d_multiple_r_xdl_cshuffle.hpp" #include "ck/tensor_operation/gpu/device/gemm_specialization.hpp" +using ::ck::DeviceMem; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + // DataType using ADataType = BF16; using BDataType = BF16; diff --git a/example/16_gemm_multi_d_multi_reduces/gemm_max_xdl_fp16.cpp b/example/16_gemm_multi_d_multi_reduces/gemm_max_xdl_fp16.cpp index e211a63b0b..98fa0ba861 100644 --- a/example/16_gemm_multi_d_multi_reduces/gemm_max_xdl_fp16.cpp +++ b/example/16_gemm_multi_d_multi_reduces/gemm_max_xdl_fp16.cpp @@ -7,6 +7,10 @@ #include "ck/tensor_operation/gpu/device/impl/device_gemm_multiple_d_multiple_r_xdl_cshuffle.hpp" #include "ck/tensor_operation/gpu/device/gemm_specialization.hpp" +using ::ck::DeviceMem; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + // DataType using ADataType = F16; using BDataType = F16; diff --git a/example/16_gemm_multi_d_multi_reduces/gemm_max_xdl_fp32.cpp b/example/16_gemm_multi_d_multi_reduces/gemm_max_xdl_fp32.cpp index 90c2cdcdaa..f7573ca7d6 100644 --- a/example/16_gemm_multi_d_multi_reduces/gemm_max_xdl_fp32.cpp +++ b/example/16_gemm_multi_d_multi_reduces/gemm_max_xdl_fp32.cpp @@ -7,6 +7,10 @@ #include "ck/tensor_operation/gpu/device/impl/device_gemm_multiple_d_multiple_r_xdl_cshuffle.hpp" #include "ck/tensor_operation/gpu/device/gemm_specialization.hpp" +using ::ck::DeviceMem; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + // DataType using ADataType = F32; using BDataType = F32; diff --git a/example/16_gemm_multi_d_multi_reduces/gemm_max_xdl_int4.cpp b/example/16_gemm_multi_d_multi_reduces/gemm_max_xdl_int4.cpp index e8594e206b..69bf7e431c 100644 --- a/example/16_gemm_multi_d_multi_reduces/gemm_max_xdl_int4.cpp +++ b/example/16_gemm_multi_d_multi_reduces/gemm_max_xdl_int4.cpp @@ -7,6 +7,10 @@ #include "ck/tensor_operation/gpu/device/impl/device_gemm_multiple_d_multiple_r_xdl_cshuffle.hpp" #include "ck/tensor_operation/gpu/device/gemm_specialization.hpp" +using ::ck::DeviceMem; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + using ADataType = INT4; using ADataKernelType = INT8; using BDataType = INT4; diff --git a/example/16_gemm_multi_d_multi_reduces/gemm_max_xdl_int8.cpp b/example/16_gemm_multi_d_multi_reduces/gemm_max_xdl_int8.cpp index ee274f3a55..ba23a41049 100644 --- a/example/16_gemm_multi_d_multi_reduces/gemm_max_xdl_int8.cpp +++ b/example/16_gemm_multi_d_multi_reduces/gemm_max_xdl_int8.cpp @@ -7,6 +7,10 @@ #include "ck/tensor_operation/gpu/device/impl/device_gemm_multiple_d_multiple_r_xdl_cshuffle.hpp" #include "ck/tensor_operation/gpu/device/gemm_specialization.hpp" +using ::ck::DeviceMem; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + using ADataType = INT8; using BDataType = INT8; using GemmAccDataType = INT32; diff --git a/example/16_gemm_multi_d_multi_reduces/gemm_mean_meansquare_xdl_bf16.cpp b/example/16_gemm_multi_d_multi_reduces/gemm_mean_meansquare_xdl_bf16.cpp index 3ee3037179..af5002f9f1 100644 --- a/example/16_gemm_multi_d_multi_reduces/gemm_mean_meansquare_xdl_bf16.cpp +++ b/example/16_gemm_multi_d_multi_reduces/gemm_mean_meansquare_xdl_bf16.cpp @@ -7,6 +7,10 @@ #include "ck/tensor_operation/gpu/device/impl/device_gemm_multiple_d_multiple_r_xdl_cshuffle.hpp" #include "ck/tensor_operation/gpu/device/gemm_specialization.hpp" +using ::ck::DeviceMem; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + // DataType using ADataType = BF16; using BDataType = BF16; diff --git a/example/16_gemm_multi_d_multi_reduces/gemm_mean_meansquare_xdl_fp16.cpp b/example/16_gemm_multi_d_multi_reduces/gemm_mean_meansquare_xdl_fp16.cpp index 9ce1e76cf5..3eec9b81c1 100644 --- a/example/16_gemm_multi_d_multi_reduces/gemm_mean_meansquare_xdl_fp16.cpp +++ b/example/16_gemm_multi_d_multi_reduces/gemm_mean_meansquare_xdl_fp16.cpp @@ -7,6 +7,10 @@ #include "ck/tensor_operation/gpu/device/impl/device_gemm_multiple_d_multiple_r_xdl_cshuffle.hpp" #include "ck/tensor_operation/gpu/device/gemm_specialization.hpp" +using ::ck::DeviceMem; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + // DataType using ADataType = F16; using BDataType = F16; diff --git a/example/16_gemm_multi_d_multi_reduces/gemm_mean_meansquare_xdl_fp32.cpp b/example/16_gemm_multi_d_multi_reduces/gemm_mean_meansquare_xdl_fp32.cpp index 7815d2beea..0247beb976 100644 --- a/example/16_gemm_multi_d_multi_reduces/gemm_mean_meansquare_xdl_fp32.cpp +++ b/example/16_gemm_multi_d_multi_reduces/gemm_mean_meansquare_xdl_fp32.cpp @@ -7,6 +7,10 @@ #include "ck/tensor_operation/gpu/device/impl/device_gemm_multiple_d_multiple_r_xdl_cshuffle.hpp" #include "ck/tensor_operation/gpu/device/gemm_specialization.hpp" +using ::ck::DeviceMem; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + // DataType using ADataType = F32; using BDataType = F32; diff --git a/example/16_gemm_multi_d_multi_reduces/gemm_reduce_xdl_common.hpp b/example/16_gemm_multi_d_multi_reduces/gemm_reduce_xdl_common.hpp index 3e3c586dba..d95fe4b716 100644 --- a/example/16_gemm_multi_d_multi_reduces/gemm_reduce_xdl_common.hpp +++ b/example/16_gemm_multi_d_multi_reduces/gemm_reduce_xdl_common.hpp @@ -18,6 +18,10 @@ #include "ck/tensor_operation/gpu/element/element_wise_operation.hpp" #include "ck/tensor_operation/gpu/device/tensor_layout.hpp" +using ::ck::DeviceMem; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + template using S = ck::Sequence; diff --git a/example/17_convnd_bwd_data/convnd_bwd_data_common.hpp b/example/17_convnd_bwd_data/convnd_bwd_data_common.hpp index d219df0245..80918c4a1e 100644 --- a/example/17_convnd_bwd_data/convnd_bwd_data_common.hpp +++ b/example/17_convnd_bwd_data/convnd_bwd_data_common.hpp @@ -18,6 +18,10 @@ #include "ck/library/utility/convolution_host_tensor_descriptor_helper.hpp" #include "ck/library/reference_tensor_operation/cpu/reference_conv_bwd_data.hpp" +using ::ck::DeviceMem; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + void print_helper_msg() { std::cout << "arg1: verification (0=no, 1=yes)\n" diff --git a/example/18_batched_gemm_reduce/batched_gemm_reduce_xdl_fp16.cpp b/example/18_batched_gemm_reduce/batched_gemm_reduce_xdl_fp16.cpp index f4e6b4d6e3..0806a245c0 100644 --- a/example/18_batched_gemm_reduce/batched_gemm_reduce_xdl_fp16.cpp +++ b/example/18_batched_gemm_reduce/batched_gemm_reduce_xdl_fp16.cpp @@ -19,6 +19,10 @@ #include "ck/library/utility/literals.hpp" #include "ck/library/reference_tensor_operation/cpu/reference_batched_gemm.hpp" +using ::ck::DeviceMem; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + template using S = ck::Sequence; diff --git a/example/19_binary_elementwise/broadcast_add_2d_amn_bn.cpp b/example/19_binary_elementwise/broadcast_add_2d_amn_bn.cpp index 1e7899b35d..007c6d7037 100644 --- a/example/19_binary_elementwise/broadcast_add_2d_amn_bn.cpp +++ b/example/19_binary_elementwise/broadcast_add_2d_amn_bn.cpp @@ -14,6 +14,10 @@ #include "ck/library/utility/host_tensor_generator.hpp" #include "ck/library/utility/literals.hpp" +using ::ck::DeviceMem; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + using F16 = ck::half_t; using F32 = float; diff --git a/example/19_binary_elementwise/broadcast_add_3d_am_bmnk.cpp b/example/19_binary_elementwise/broadcast_add_3d_am_bmnk.cpp index 5f321e6284..52b8bda3f1 100644 --- a/example/19_binary_elementwise/broadcast_add_3d_am_bmnk.cpp +++ b/example/19_binary_elementwise/broadcast_add_3d_am_bmnk.cpp @@ -14,6 +14,10 @@ #include "ck/library/utility/host_tensor.hpp" #include "ck/library/utility/host_tensor_generator.hpp" +using ::ck::DeviceMem; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + using F16 = ck::half_t; using F32 = float; diff --git a/example/19_binary_elementwise/elementwise_add_1d.cpp b/example/19_binary_elementwise/elementwise_add_1d.cpp index 90e2d28d95..040809e7c1 100644 --- a/example/19_binary_elementwise/elementwise_add_1d.cpp +++ b/example/19_binary_elementwise/elementwise_add_1d.cpp @@ -12,6 +12,10 @@ #include "ck/library/utility/host_tensor.hpp" #include "ck/library/utility/host_tensor_generator.hpp" +using ::ck::DeviceMem; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + using F16 = ck::half_t; using F32 = float; diff --git a/example/19_binary_elementwise/elementwise_add_4d.cpp b/example/19_binary_elementwise/elementwise_add_4d.cpp index 797521dcb4..fc34bd714f 100644 --- a/example/19_binary_elementwise/elementwise_add_4d.cpp +++ b/example/19_binary_elementwise/elementwise_add_4d.cpp @@ -14,6 +14,10 @@ #include "ck/library/utility/host_tensor.hpp" #include "ck/library/utility/host_tensor_generator.hpp" +using ::ck::DeviceMem; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + using F16 = ck::half_t; using F32 = float; diff --git a/example/20_grouped_conv_bwd_weight/common.hpp b/example/20_grouped_conv_bwd_weight/common.hpp index 9159e51eaf..6b603d2e48 100644 --- a/example/20_grouped_conv_bwd_weight/common.hpp +++ b/example/20_grouped_conv_bwd_weight/common.hpp @@ -20,6 +20,10 @@ #include "ck/library/utility/convolution_host_tensor_descriptor_helper.hpp" #include "ck/library/reference_tensor_operation/cpu/reference_conv_bwd_weight.hpp" +using ::ck::DeviceMem; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + using BF16 = ck::bhalf_t; using F16 = ck::half_t; using F32 = float; diff --git a/example/21_gemm_layernorm/gemm_bias_relu_add_layernorm_xdl_naive_fp16.cpp b/example/21_gemm_layernorm/gemm_bias_relu_add_layernorm_xdl_naive_fp16.cpp index abbc7a946c..eed3ef2e73 100644 --- a/example/21_gemm_layernorm/gemm_bias_relu_add_layernorm_xdl_naive_fp16.cpp +++ b/example/21_gemm_layernorm/gemm_bias_relu_add_layernorm_xdl_naive_fp16.cpp @@ -19,6 +19,10 @@ #include "ck/library/reference_tensor_operation/cpu/reference_gemm.hpp" #include "ck/library/utility/check_err.hpp" +using ::ck::DeviceMem; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + template using S = ck::Sequence; diff --git a/example/21_gemm_layernorm/gemm_bias_relu_add_layernorm_xdl_welford_fp16.cpp b/example/21_gemm_layernorm/gemm_bias_relu_add_layernorm_xdl_welford_fp16.cpp index ae5e3f36ad..918b66ec2d 100644 --- a/example/21_gemm_layernorm/gemm_bias_relu_add_layernorm_xdl_welford_fp16.cpp +++ b/example/21_gemm_layernorm/gemm_bias_relu_add_layernorm_xdl_welford_fp16.cpp @@ -19,6 +19,10 @@ #include "ck/library/reference_tensor_operation/cpu/reference_layernorm.hpp" #include "ck/library/utility/check_err.hpp" +using ::ck::DeviceMem; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + template using S = ck::Sequence; diff --git a/example/21_gemm_layernorm/gemm_layernorm_xdl_naive_fp16.cpp b/example/21_gemm_layernorm/gemm_layernorm_xdl_naive_fp16.cpp index 23c602c39e..b3ac78d3c0 100644 --- a/example/21_gemm_layernorm/gemm_layernorm_xdl_naive_fp16.cpp +++ b/example/21_gemm_layernorm/gemm_layernorm_xdl_naive_fp16.cpp @@ -19,6 +19,10 @@ #include "ck/library/reference_tensor_operation/cpu/reference_gemm.hpp" #include "ck/library/utility/check_err.hpp" +using ::ck::DeviceMem; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + template using S = ck::Sequence; diff --git a/example/21_gemm_layernorm/gemm_xdl_layernorm_naive_single_kernel_fp16.cpp b/example/21_gemm_layernorm/gemm_xdl_layernorm_naive_single_kernel_fp16.cpp index 10d90b795c..92aa74371e 100644 --- a/example/21_gemm_layernorm/gemm_xdl_layernorm_naive_single_kernel_fp16.cpp +++ b/example/21_gemm_layernorm/gemm_xdl_layernorm_naive_single_kernel_fp16.cpp @@ -18,6 +18,10 @@ #include "ck/library/reference_tensor_operation/cpu/reference_gemm_layernorm.hpp" #include "ck/tensor_operation/gpu/device/gemm_specialization.hpp" +using ::ck::DeviceMem; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + // This example demonstrate a single kernel that runs GEMM layer and laynorm in one fused kernel // // The GEMM + Layernorm implementation is a specialized kernel which allows fusing both layers diff --git a/example/22_cgemm/cgemm_xdl_common.hpp b/example/22_cgemm/cgemm_xdl_common.hpp index 26137a7c2e..1caad311e7 100644 --- a/example/22_cgemm/cgemm_xdl_common.hpp +++ b/example/22_cgemm/cgemm_xdl_common.hpp @@ -14,6 +14,10 @@ #include "ck/library/utility/literals.hpp" #include "ck/tensor_operation/gpu/device/tensor_layout.hpp" +using ::ck::DeviceMem; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + template using S = ck::Sequence; diff --git a/example/23_softmax/softmax_blockwise.cpp b/example/23_softmax/softmax_blockwise.cpp index d09e434bcf..8c52e0d184 100644 --- a/example/23_softmax/softmax_blockwise.cpp +++ b/example/23_softmax/softmax_blockwise.cpp @@ -18,6 +18,10 @@ #include "ck/library/utility/host_common_util.hpp" #include "ck/library/reference_tensor_operation/cpu/reference_softmax.hpp" +using ::ck::DeviceMem; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + using namespace ck::tensor_operation::device; using InDataType = ck::half_t; diff --git a/example/24_batched_gemm/batched_gemm_xdl_bf16.cpp b/example/24_batched_gemm/batched_gemm_xdl_bf16.cpp index d155b2c411..a1be6d7e75 100644 --- a/example/24_batched_gemm/batched_gemm_xdl_bf16.cpp +++ b/example/24_batched_gemm/batched_gemm_xdl_bf16.cpp @@ -18,6 +18,11 @@ #include "ck/library/reference_tensor_operation/cpu/reference_batched_gemm.hpp" #include "ck/library/utility/literals.hpp" +using ::ck::DeviceMem; +using ::ck::hip_check_error; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + template using S = ck::Sequence; diff --git a/example/24_batched_gemm/batched_gemm_xdl_bf16_v3.cpp b/example/24_batched_gemm/batched_gemm_xdl_bf16_v3.cpp index bd4be91103..e9bbcbce76 100644 --- a/example/24_batched_gemm/batched_gemm_xdl_bf16_v3.cpp +++ b/example/24_batched_gemm/batched_gemm_xdl_bf16_v3.cpp @@ -18,6 +18,11 @@ #include "ck/library/reference_tensor_operation/cpu/reference_batched_gemm.hpp" #include "ck/library/utility/literals.hpp" +using ::ck::DeviceMem; +using ::ck::hip_check_error; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + template using S = ck::Sequence; diff --git a/example/24_batched_gemm/batched_gemm_xdl_fp16.cpp b/example/24_batched_gemm/batched_gemm_xdl_fp16.cpp index 80f6b1d663..05136daa61 100644 --- a/example/24_batched_gemm/batched_gemm_xdl_fp16.cpp +++ b/example/24_batched_gemm/batched_gemm_xdl_fp16.cpp @@ -18,6 +18,11 @@ #include "ck/library/reference_tensor_operation/cpu/reference_batched_gemm.hpp" #include "ck/library/utility/literals.hpp" +using ::ck::DeviceMem; +using ::ck::hip_check_error; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + template using S = ck::Sequence; diff --git a/example/24_batched_gemm/batched_gemm_xdl_fp16int4_b_scale_v3.cpp b/example/24_batched_gemm/batched_gemm_xdl_fp16int4_b_scale_v3.cpp index 46e452005d..7b8e50b12b 100644 --- a/example/24_batched_gemm/batched_gemm_xdl_fp16int4_b_scale_v3.cpp +++ b/example/24_batched_gemm/batched_gemm_xdl_fp16int4_b_scale_v3.cpp @@ -18,6 +18,11 @@ #include "ck/library/utility/host_tensor_generator.hpp" #include "ck/library/utility/literals.hpp" +using ::ck::DeviceMem; +using ::ck::hip_check_error; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + template using S = ck::Sequence; diff --git a/example/24_batched_gemm/batched_gemm_xdl_fp32.cpp b/example/24_batched_gemm/batched_gemm_xdl_fp32.cpp index 19a63ff841..fd650d9666 100644 --- a/example/24_batched_gemm/batched_gemm_xdl_fp32.cpp +++ b/example/24_batched_gemm/batched_gemm_xdl_fp32.cpp @@ -18,6 +18,11 @@ #include "ck/library/reference_tensor_operation/cpu/reference_batched_gemm.hpp" #include "ck/library/utility/literals.hpp" +using ::ck::DeviceMem; +using ::ck::hip_check_error; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + template using S = ck::Sequence; diff --git a/example/24_batched_gemm/batched_gemm_xdl_fp8_rowwise_v3.cpp b/example/24_batched_gemm/batched_gemm_xdl_fp8_rowwise_v3.cpp index 2a4610bf90..12fe9bc803 100644 --- a/example/24_batched_gemm/batched_gemm_xdl_fp8_rowwise_v3.cpp +++ b/example/24_batched_gemm/batched_gemm_xdl_fp8_rowwise_v3.cpp @@ -18,6 +18,11 @@ #include "ck/library/reference_tensor_operation/cpu/reference_batched_gemm.hpp" #include "ck/library/utility/literals.hpp" +using ::ck::DeviceMem; +using ::ck::hip_check_error; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + template using S = ck::Sequence; diff --git a/example/24_batched_gemm/batched_gemm_xdl_int4.cpp b/example/24_batched_gemm/batched_gemm_xdl_int4.cpp index 4adb79ed29..cb42f0c775 100644 --- a/example/24_batched_gemm/batched_gemm_xdl_int4.cpp +++ b/example/24_batched_gemm/batched_gemm_xdl_int4.cpp @@ -18,6 +18,11 @@ #include "ck/library/reference_tensor_operation/cpu/reference_batched_gemm.hpp" #include "ck/library/utility/literals.hpp" +using ::ck::DeviceMem; +using ::ck::hip_check_error; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + template using S = ck::Sequence; diff --git a/example/24_batched_gemm/batched_gemm_xdl_int8.cpp b/example/24_batched_gemm/batched_gemm_xdl_int8.cpp index 06b8c3c0cd..9d95e02720 100644 --- a/example/24_batched_gemm/batched_gemm_xdl_int8.cpp +++ b/example/24_batched_gemm/batched_gemm_xdl_int8.cpp @@ -18,6 +18,11 @@ #include "ck/library/reference_tensor_operation/cpu/reference_batched_gemm.hpp" #include "ck/library/utility/literals.hpp" +using ::ck::DeviceMem; +using ::ck::hip_check_error; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + template using S = ck::Sequence; diff --git a/example/25_gemm_bias_e_permute/gemm_bias_e_permute_g1m2n3k1_xdl_fp16.cpp b/example/25_gemm_bias_e_permute/gemm_bias_e_permute_g1m2n3k1_xdl_fp16.cpp index 4f4003809b..0ede6c40cc 100644 --- a/example/25_gemm_bias_e_permute/gemm_bias_e_permute_g1m2n3k1_xdl_fp16.cpp +++ b/example/25_gemm_bias_e_permute/gemm_bias_e_permute_g1m2n3k1_xdl_fp16.cpp @@ -19,6 +19,11 @@ #include "ck/library/reference_tensor_operation/cpu/reference_gemm.hpp" +using ::ck::DeviceMem; +using ::ck::HostTensorDescriptor; +using ::ck::make_ParallelTensorFunctor; +using ::ck::Tensor; + using Row = ck::tensor_layout::gemm::RowMajor; using Bypass = ck::tensor_layout::BypassLayoutVerification; diff --git a/example/25_gemm_bias_e_permute/gemm_bias_e_permute_g1m3n2k1_xdl_fp16.cpp b/example/25_gemm_bias_e_permute/gemm_bias_e_permute_g1m3n2k1_xdl_fp16.cpp index e4881da0f5..f1e2bf76f2 100644 --- a/example/25_gemm_bias_e_permute/gemm_bias_e_permute_g1m3n2k1_xdl_fp16.cpp +++ b/example/25_gemm_bias_e_permute/gemm_bias_e_permute_g1m3n2k1_xdl_fp16.cpp @@ -17,6 +17,11 @@ #include "ck/library/utility/host_tensor_generator.hpp" #include "ck/library/utility/numeric.hpp" +using ::ck::DeviceMem; +using ::ck::HostTensorDescriptor; +using ::ck::make_ParallelTensorFunctor; +using ::ck::Tensor; + using Row = ck::tensor_layout::gemm::RowMajor; using Bypass = ck::tensor_layout::BypassLayoutVerification; diff --git a/example/26_contraction/run_contraction_bilinear_example.inc b/example/26_contraction/run_contraction_bilinear_example.inc index ddb6d678e2..a71476043f 100644 --- a/example/26_contraction/run_contraction_bilinear_example.inc +++ b/example/26_contraction/run_contraction_bilinear_example.inc @@ -15,6 +15,10 @@ #include "ck/library/utility/numeric.hpp" #include "ck/library/reference_tensor_operation/cpu/reference_contraction.hpp" +using ::ck::DeviceMem; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + using Row = ck::tensor_layout::gemm::RowMajor; int run_contraction_bilinear_example(int argc, char* argv[]) diff --git a/example/26_contraction/run_contraction_scale_example.inc b/example/26_contraction/run_contraction_scale_example.inc index 0a7287edf9..2c7c849601 100644 --- a/example/26_contraction/run_contraction_scale_example.inc +++ b/example/26_contraction/run_contraction_scale_example.inc @@ -15,6 +15,10 @@ #include "ck/library/utility/numeric.hpp" #include "ck/library/reference_tensor_operation/cpu/reference_contraction.hpp" +using ::ck::DeviceMem; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + using Row = ck::tensor_layout::gemm::RowMajor; int run_contraction_scale_example(int argc, char* argv[]) diff --git a/example/27_layernorm2d_fwd/layernorm2d_fwd_fp16.cpp b/example/27_layernorm2d_fwd/layernorm2d_fwd_fp16.cpp index 20db24f56d..2952c93fd1 100644 --- a/example/27_layernorm2d_fwd/layernorm2d_fwd_fp16.cpp +++ b/example/27_layernorm2d_fwd/layernorm2d_fwd_fp16.cpp @@ -3,6 +3,10 @@ #include "common.hpp" +using ::ck::DeviceMem; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + using XDataType = ck::half_t; using GammaDataType = ck::half_t; using BetaDataType = ck::half_t; diff --git a/example/27_layernorm2d_fwd/layernorm2d_fwd_splitk_fp16.cpp b/example/27_layernorm2d_fwd/layernorm2d_fwd_splitk_fp16.cpp index 5a57082c79..909fd2c687 100644 --- a/example/27_layernorm2d_fwd/layernorm2d_fwd_splitk_fp16.cpp +++ b/example/27_layernorm2d_fwd/layernorm2d_fwd_splitk_fp16.cpp @@ -3,6 +3,10 @@ #include "common.hpp" +using ::ck::DeviceMem; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + using XDataType = ck::half_t; using GammaDataType = ck::half_t; using BetaDataType = ck::half_t; diff --git a/example/28_grouped_gemm_bias_e_permute/grouped_gemm_bias_e_permute_xdl_fp16.cpp b/example/28_grouped_gemm_bias_e_permute/grouped_gemm_bias_e_permute_xdl_fp16.cpp index 70c4a01185..a94bae4f30 100644 --- a/example/28_grouped_gemm_bias_e_permute/grouped_gemm_bias_e_permute_xdl_fp16.cpp +++ b/example/28_grouped_gemm_bias_e_permute/grouped_gemm_bias_e_permute_xdl_fp16.cpp @@ -18,6 +18,11 @@ #include "ck/library/utility/host_tensor_generator.hpp" #include "ck/library/utility/numeric.hpp" +using ::ck::DeviceMem; +using ::ck::HostTensorDescriptor; +using ::ck::make_ParallelTensorFunctor; +using ::ck::Tensor; + using Row = ck::tensor_layout::gemm::RowMajor; using Bypass = ck::tensor_layout::BypassLayoutVerification; diff --git a/example/29_batched_gemm_bias_e_permute/batched_gemm_bias_e_permute_wmma_fp16.cpp b/example/29_batched_gemm_bias_e_permute/batched_gemm_bias_e_permute_wmma_fp16.cpp index c4cb7a13a2..60b2569c13 100644 --- a/example/29_batched_gemm_bias_e_permute/batched_gemm_bias_e_permute_wmma_fp16.cpp +++ b/example/29_batched_gemm_bias_e_permute/batched_gemm_bias_e_permute_wmma_fp16.cpp @@ -17,6 +17,11 @@ #include "ck/library/utility/host_tensor_generator.hpp" #include "ck/library/utility/numeric.hpp" +using ::ck::DeviceMem; +using ::ck::HostTensorDescriptor; +using ::ck::make_ParallelTensorFunctor; +using ::ck::Tensor; + using Row = ck::tensor_layout::gemm::RowMajor; using Bypass = ck::tensor_layout::BypassLayoutVerification; diff --git a/example/29_batched_gemm_bias_e_permute/batched_gemm_bias_e_permute_xdl_fp16.cpp b/example/29_batched_gemm_bias_e_permute/batched_gemm_bias_e_permute_xdl_fp16.cpp index 2b9afc342e..aea466626c 100644 --- a/example/29_batched_gemm_bias_e_permute/batched_gemm_bias_e_permute_xdl_fp16.cpp +++ b/example/29_batched_gemm_bias_e_permute/batched_gemm_bias_e_permute_xdl_fp16.cpp @@ -17,6 +17,11 @@ #include "ck/library/utility/host_tensor_generator.hpp" #include "ck/library/utility/numeric.hpp" +using ::ck::DeviceMem; +using ::ck::HostTensorDescriptor; +using ::ck::make_ParallelTensorFunctor; +using ::ck::Tensor; + using Row = ck::tensor_layout::gemm::RowMajor; using Bypass = ck::tensor_layout::BypassLayoutVerification; diff --git a/example/30_grouped_conv_fwd_multiple_d/common.hpp b/example/30_grouped_conv_fwd_multiple_d/common.hpp index 92fdaa7f33..4e8f08ca36 100644 --- a/example/30_grouped_conv_fwd_multiple_d/common.hpp +++ b/example/30_grouped_conv_fwd_multiple_d/common.hpp @@ -24,6 +24,11 @@ #include "ck/library/utility/convolution_parameter.hpp" #include "ck/library/reference_tensor_operation/cpu/reference_conv_fwd.hpp" +using ::ck::DeviceMem; +using ::ck::hip_check_error; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + using BF16 = ck::bhalf_t; using FP16 = ck::half_t; using FP32 = float; diff --git a/example/30_grouped_conv_fwd_multiple_d/common_wmma.hpp b/example/30_grouped_conv_fwd_multiple_d/common_wmma.hpp index 56b0e6d1dc..78de7ccc0b 100644 --- a/example/30_grouped_conv_fwd_multiple_d/common_wmma.hpp +++ b/example/30_grouped_conv_fwd_multiple_d/common_wmma.hpp @@ -24,6 +24,11 @@ #include "ck/library/utility/convolution_parameter.hpp" #include "ck/library/reference_tensor_operation/cpu/reference_conv_fwd.hpp" +using ::ck::DeviceMem; +using ::ck::hip_check_error; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + using BF16 = ck::bhalf_t; using FP16 = ck::half_t; using FP32 = float; diff --git a/example/31_batched_gemm_gemm/batched_gemm_gemm_wmma_cshuffle_v3_bf16.cpp b/example/31_batched_gemm_gemm/batched_gemm_gemm_wmma_cshuffle_v3_bf16.cpp index b1d732d95b..db65cf524a 100644 --- a/example/31_batched_gemm_gemm/batched_gemm_gemm_wmma_cshuffle_v3_bf16.cpp +++ b/example/31_batched_gemm_gemm/batched_gemm_gemm_wmma_cshuffle_v3_bf16.cpp @@ -19,6 +19,10 @@ #include "ck/library/reference_tensor_operation/cpu/reference_batched_gemm.hpp" #include "ck/host_utility/device_prop.hpp" +using ::ck::DeviceMem; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + using BF16 = ck::bhalf_t; using F32 = float; diff --git a/example/31_batched_gemm_gemm/batched_gemm_gemm_wmma_cshuffle_v3_fp16.cpp b/example/31_batched_gemm_gemm/batched_gemm_gemm_wmma_cshuffle_v3_fp16.cpp index e3f22a3186..ad8dc90376 100644 --- a/example/31_batched_gemm_gemm/batched_gemm_gemm_wmma_cshuffle_v3_fp16.cpp +++ b/example/31_batched_gemm_gemm/batched_gemm_gemm_wmma_cshuffle_v3_fp16.cpp @@ -19,6 +19,10 @@ #include "ck/library/reference_tensor_operation/cpu/reference_batched_gemm.hpp" #include "ck/host_utility/device_prop.hpp" +using ::ck::DeviceMem; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + using F16 = ck::half_t; using F32 = float; diff --git a/example/31_batched_gemm_gemm/batched_gemm_gemm_wmma_cshuffle_v3_fp8.cpp b/example/31_batched_gemm_gemm/batched_gemm_gemm_wmma_cshuffle_v3_fp8.cpp index e58333b2f3..a59012f69f 100644 --- a/example/31_batched_gemm_gemm/batched_gemm_gemm_wmma_cshuffle_v3_fp8.cpp +++ b/example/31_batched_gemm_gemm/batched_gemm_gemm_wmma_cshuffle_v3_fp8.cpp @@ -19,6 +19,10 @@ #include "ck/library/reference_tensor_operation/cpu/reference_batched_gemm.hpp" #include "ck/host_utility/device_prop.hpp" +using ::ck::DeviceMem; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + using PassThrough = ck::tensor_operation::element_wise::PassThrough; using ADataType = ck::f8_t; diff --git a/example/31_batched_gemm_gemm/batched_gemm_gemm_wmma_cshuffle_v3_int8.cpp b/example/31_batched_gemm_gemm/batched_gemm_gemm_wmma_cshuffle_v3_int8.cpp index d534306115..37ba7c8496 100644 --- a/example/31_batched_gemm_gemm/batched_gemm_gemm_wmma_cshuffle_v3_int8.cpp +++ b/example/31_batched_gemm_gemm/batched_gemm_gemm_wmma_cshuffle_v3_int8.cpp @@ -19,6 +19,10 @@ #include "ck/library/reference_tensor_operation/cpu/reference_batched_gemm.hpp" #include "ck/host_utility/device_prop.hpp" +using ::ck::DeviceMem; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + using PassThrough = ck::tensor_operation::element_wise::PassThrough; using ADataType = int8_t; diff --git a/example/31_batched_gemm_gemm/batched_gemm_gemm_xdl_bf16.cpp b/example/31_batched_gemm_gemm/batched_gemm_gemm_xdl_bf16.cpp index 9afd199f24..0dc2944e07 100644 --- a/example/31_batched_gemm_gemm/batched_gemm_gemm_xdl_bf16.cpp +++ b/example/31_batched_gemm_gemm/batched_gemm_gemm_xdl_bf16.cpp @@ -26,6 +26,10 @@ Gemm + Gemm fused operation. Computes C_m_o = A_m_k * B0_k_n * B1_n_o #include "ck/library/utility/literals.hpp" #include "ck/library/reference_tensor_operation/cpu/reference_batched_gemm.hpp" +using ::ck::DeviceMem; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + template using S = ck::Sequence; diff --git a/example/31_batched_gemm_gemm/batched_gemm_gemm_xdl_fp16.cpp b/example/31_batched_gemm_gemm/batched_gemm_gemm_xdl_fp16.cpp index 6a8aa8a721..ed0e513388 100644 --- a/example/31_batched_gemm_gemm/batched_gemm_gemm_xdl_fp16.cpp +++ b/example/31_batched_gemm_gemm/batched_gemm_gemm_xdl_fp16.cpp @@ -26,6 +26,10 @@ Gemm + Gemm fused operation. Computes C_m_o = A_m_k * B0_k_n * B1_n_o #include "ck/library/utility/literals.hpp" #include "ck/library/reference_tensor_operation/cpu/reference_batched_gemm.hpp" +using ::ck::DeviceMem; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + template using S = ck::Sequence; diff --git a/example/31_batched_gemm_gemm/batched_gemm_gemm_xdl_fp32.cpp b/example/31_batched_gemm_gemm/batched_gemm_gemm_xdl_fp32.cpp index 8a27fb2b3c..bff8bdedbe 100644 --- a/example/31_batched_gemm_gemm/batched_gemm_gemm_xdl_fp32.cpp +++ b/example/31_batched_gemm_gemm/batched_gemm_gemm_xdl_fp32.cpp @@ -26,6 +26,10 @@ Gemm + Gemm fused operation. Computes C_m_o = A_m_k * B0_k_n * B1_n_o #include "ck/library/utility/literals.hpp" #include "ck/library/reference_tensor_operation/cpu/reference_batched_gemm.hpp" +using ::ck::DeviceMem; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + template using S = ck::Sequence; diff --git a/example/31_batched_gemm_gemm/batched_gemm_gemm_xdl_int4.cpp b/example/31_batched_gemm_gemm/batched_gemm_gemm_xdl_int4.cpp index bf55e2fd84..d6fbf970e5 100644 --- a/example/31_batched_gemm_gemm/batched_gemm_gemm_xdl_int4.cpp +++ b/example/31_batched_gemm_gemm/batched_gemm_gemm_xdl_int4.cpp @@ -28,6 +28,10 @@ Gemm + Gemm fused operation. Computes C_m_o = A_m_k * B0_k_n * B1_n_o #include "ck/library/utility/literals.hpp" #include "ck/library/reference_tensor_operation/cpu/reference_batched_gemm.hpp" +using ::ck::DeviceMem; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + template using S = ck::Sequence; diff --git a/example/31_batched_gemm_gemm/batched_gemm_gemm_xdl_int8.cpp b/example/31_batched_gemm_gemm/batched_gemm_gemm_xdl_int8.cpp index e5789ba4b0..3145c6d324 100644 --- a/example/31_batched_gemm_gemm/batched_gemm_gemm_xdl_int8.cpp +++ b/example/31_batched_gemm_gemm/batched_gemm_gemm_xdl_int8.cpp @@ -26,6 +26,10 @@ Gemm + Gemm fused operation. Computes C_m_o = A_m_k * B0_k_n * B1_n_o #include "ck/library/utility/literals.hpp" #include "ck/library/reference_tensor_operation/cpu/reference_batched_gemm.hpp" +using ::ck::DeviceMem; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + template using S = ck::Sequence; diff --git a/example/32_batched_gemm_scale_softmax_gemm/batched_gemm_lower_triangle_scale_softmax_gemm_permute_wmma_fp16.cpp b/example/32_batched_gemm_scale_softmax_gemm/batched_gemm_lower_triangle_scale_softmax_gemm_permute_wmma_fp16.cpp index 69ab5c5c0b..5e9fbba935 100644 --- a/example/32_batched_gemm_scale_softmax_gemm/batched_gemm_lower_triangle_scale_softmax_gemm_permute_wmma_fp16.cpp +++ b/example/32_batched_gemm_scale_softmax_gemm/batched_gemm_lower_triangle_scale_softmax_gemm_permute_wmma_fp16.cpp @@ -29,6 +29,10 @@ Gemm + Softmax + Gemm fused operation. Computes C_g_m_n = Softmax(A_g_m_k * B0_g #include "ck/library/reference_tensor_operation/cpu/reference_softmax.hpp" #include "ck/host_utility/device_prop.hpp" +using ::ck::DeviceMem; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + template using S = ck::Sequence; diff --git a/example/32_batched_gemm_scale_softmax_gemm/batched_gemm_lower_triangle_scale_softmax_gemm_permute_xdl_fp16.cpp b/example/32_batched_gemm_scale_softmax_gemm/batched_gemm_lower_triangle_scale_softmax_gemm_permute_xdl_fp16.cpp index 2604a50a76..e4191577bc 100644 --- a/example/32_batched_gemm_scale_softmax_gemm/batched_gemm_lower_triangle_scale_softmax_gemm_permute_xdl_fp16.cpp +++ b/example/32_batched_gemm_scale_softmax_gemm/batched_gemm_lower_triangle_scale_softmax_gemm_permute_xdl_fp16.cpp @@ -28,6 +28,10 @@ Gemm + Softmax + Gemm fused operation. Computes C_g_m_o = Softmax(A_g_m_k * B0_g #include "ck/library/reference_tensor_operation/cpu/reference_batched_gemm.hpp" #include "ck/library/reference_tensor_operation/cpu/reference_softmax.hpp" +using ::ck::DeviceMem; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + template using S = ck::Sequence; diff --git a/example/32_batched_gemm_scale_softmax_gemm/batched_gemm_scale_softmax_gemm_permute_wmma_fp16.cpp b/example/32_batched_gemm_scale_softmax_gemm/batched_gemm_scale_softmax_gemm_permute_wmma_fp16.cpp index f5cedb14c9..2cc1ba94c8 100644 --- a/example/32_batched_gemm_scale_softmax_gemm/batched_gemm_scale_softmax_gemm_permute_wmma_fp16.cpp +++ b/example/32_batched_gemm_scale_softmax_gemm/batched_gemm_scale_softmax_gemm_permute_wmma_fp16.cpp @@ -29,6 +29,10 @@ Gemm + Softmax + Gemm fused operation. Computes C_g_m_n = Softmax(A_g_m_k * B0_g #include "ck/library/reference_tensor_operation/cpu/reference_softmax.hpp" #include "ck/host_utility/device_prop.hpp" +using ::ck::DeviceMem; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + template using S = ck::Sequence; diff --git a/example/32_batched_gemm_scale_softmax_gemm/batched_gemm_scale_softmax_gemm_permute_xdl_bf16.cpp b/example/32_batched_gemm_scale_softmax_gemm/batched_gemm_scale_softmax_gemm_permute_xdl_bf16.cpp index 331bfe99c2..53a9db1029 100644 --- a/example/32_batched_gemm_scale_softmax_gemm/batched_gemm_scale_softmax_gemm_permute_xdl_bf16.cpp +++ b/example/32_batched_gemm_scale_softmax_gemm/batched_gemm_scale_softmax_gemm_permute_xdl_bf16.cpp @@ -27,6 +27,10 @@ Gemm + Softmax + Gemm fused operation. Computes C_g_m_o = Softmax(A_g_m_k * B0_g #include "ck/library/reference_tensor_operation/cpu/reference_batched_gemm.hpp" #include "ck/library/reference_tensor_operation/cpu/reference_softmax.hpp" +using ::ck::DeviceMem; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + template using S = ck::Sequence; diff --git a/example/32_batched_gemm_scale_softmax_gemm/batched_gemm_scale_softmax_gemm_permute_xdl_fp16.cpp b/example/32_batched_gemm_scale_softmax_gemm/batched_gemm_scale_softmax_gemm_permute_xdl_fp16.cpp index cd321c0da3..c87e718ce3 100644 --- a/example/32_batched_gemm_scale_softmax_gemm/batched_gemm_scale_softmax_gemm_permute_xdl_fp16.cpp +++ b/example/32_batched_gemm_scale_softmax_gemm/batched_gemm_scale_softmax_gemm_permute_xdl_fp16.cpp @@ -28,6 +28,10 @@ Gemm + Softmax + Gemm fused operation. Computes C_g_m_o = Softmax(A_g_m_k * B0_g #include "ck/library/reference_tensor_operation/cpu/reference_batched_gemm.hpp" #include "ck/library/reference_tensor_operation/cpu/reference_softmax.hpp" +using ::ck::DeviceMem; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + template using S = ck::Sequence; diff --git a/example/32_batched_gemm_scale_softmax_gemm/batched_gemm_scale_softmax_gemm_xdl_bf16.cpp b/example/32_batched_gemm_scale_softmax_gemm/batched_gemm_scale_softmax_gemm_xdl_bf16.cpp index f30ec3fd03..74781e2d45 100644 --- a/example/32_batched_gemm_scale_softmax_gemm/batched_gemm_scale_softmax_gemm_xdl_bf16.cpp +++ b/example/32_batched_gemm_scale_softmax_gemm/batched_gemm_scale_softmax_gemm_xdl_bf16.cpp @@ -26,6 +26,10 @@ Gemm + Softmax + Gemm fused operation. Computes C_g_m_o = Softmax(A_g_m_k * B0_g #include "ck/library/reference_tensor_operation/cpu/reference_batched_gemm.hpp" #include "ck/library/reference_tensor_operation/cpu/reference_softmax.hpp" +using ::ck::DeviceMem; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + template using S = ck::Sequence; diff --git a/example/32_batched_gemm_scale_softmax_gemm/batched_gemm_scale_softmax_gemm_xdl_fp16.cpp b/example/32_batched_gemm_scale_softmax_gemm/batched_gemm_scale_softmax_gemm_xdl_fp16.cpp index e403ba7f66..54365aae5b 100644 --- a/example/32_batched_gemm_scale_softmax_gemm/batched_gemm_scale_softmax_gemm_xdl_fp16.cpp +++ b/example/32_batched_gemm_scale_softmax_gemm/batched_gemm_scale_softmax_gemm_xdl_fp16.cpp @@ -27,6 +27,10 @@ Gemm + Softmax + Gemm fused operation. Computes C_g_m_o = Softmax(A_g_m_k * B0_g #include "ck/library/reference_tensor_operation/cpu/reference_batched_gemm.hpp" #include "ck/library/reference_tensor_operation/cpu/reference_softmax.hpp" +using ::ck::DeviceMem; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + template using S = ck::Sequence; diff --git a/example/32_batched_gemm_scale_softmax_gemm/cross_attention_forward_wmma_fp16.cpp b/example/32_batched_gemm_scale_softmax_gemm/cross_attention_forward_wmma_fp16.cpp index 41c6dff2df..90dbc67e92 100644 --- a/example/32_batched_gemm_scale_softmax_gemm/cross_attention_forward_wmma_fp16.cpp +++ b/example/32_batched_gemm_scale_softmax_gemm/cross_attention_forward_wmma_fp16.cpp @@ -29,6 +29,10 @@ Gemm + Softmax + Gemm fused operation. Computes C_g_m_n = Softmax(A_g_m_k * B0_g #include "ck/library/reference_tensor_operation/cpu/reference_softmax.hpp" #include "ck/host_utility/device_prop.hpp" +using ::ck::DeviceMem; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + template using S = ck::Sequence; diff --git a/example/32_batched_gemm_scale_softmax_gemm/grouped_gemm_lower_triangle_scale_softmax_gemm_permute_xdl_fp16.cpp b/example/32_batched_gemm_scale_softmax_gemm/grouped_gemm_lower_triangle_scale_softmax_gemm_permute_xdl_fp16.cpp index 7738a6b6d4..36aef659b8 100644 --- a/example/32_batched_gemm_scale_softmax_gemm/grouped_gemm_lower_triangle_scale_softmax_gemm_permute_xdl_fp16.cpp +++ b/example/32_batched_gemm_scale_softmax_gemm/grouped_gemm_lower_triangle_scale_softmax_gemm_permute_xdl_fp16.cpp @@ -27,6 +27,10 @@ Gemm + Softmax + Gemm fused operation. Computes C_g_m_o = Softmax(A_g_m_k * B0_g #include "ck/library/reference_tensor_operation/cpu/reference_batched_gemm.hpp" #include "ck/library/reference_tensor_operation/cpu/reference_softmax.hpp" +using ::ck::DeviceMem; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + template using S = ck::Sequence; diff --git a/example/32_batched_gemm_scale_softmax_gemm/grouped_gemm_scale_softmax_gemm_permute_xdl_fp16.cpp b/example/32_batched_gemm_scale_softmax_gemm/grouped_gemm_scale_softmax_gemm_permute_xdl_fp16.cpp index b59498829e..7b88e0c530 100644 --- a/example/32_batched_gemm_scale_softmax_gemm/grouped_gemm_scale_softmax_gemm_permute_xdl_fp16.cpp +++ b/example/32_batched_gemm_scale_softmax_gemm/grouped_gemm_scale_softmax_gemm_permute_xdl_fp16.cpp @@ -28,6 +28,10 @@ Gemm + Softmax + Gemm fused operation. Computes C_g_m_o = Softmax(A_g_m_k * B0_g #include "ck/library/reference_tensor_operation/cpu/reference_batched_gemm.hpp" #include "ck/library/reference_tensor_operation/cpu/reference_softmax.hpp" +using ::ck::DeviceMem; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + template using S = ck::Sequence; diff --git a/example/32_batched_gemm_scale_softmax_gemm/grouped_query_attention_forward_wmma_fp16.cpp b/example/32_batched_gemm_scale_softmax_gemm/grouped_query_attention_forward_wmma_fp16.cpp index 955c25f0d1..65fa1885ac 100644 --- a/example/32_batched_gemm_scale_softmax_gemm/grouped_query_attention_forward_wmma_fp16.cpp +++ b/example/32_batched_gemm_scale_softmax_gemm/grouped_query_attention_forward_wmma_fp16.cpp @@ -30,6 +30,10 @@ Example is GQA-4 #include "ck/library/reference_tensor_operation/cpu/reference_softmax.hpp" #include "ck/host_utility/device_prop.hpp" +using ::ck::DeviceMem; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + template using S = ck::Sequence; diff --git a/example/32_batched_gemm_scale_softmax_gemm/multi_query_attention_forward_wmma_fp16.cpp b/example/32_batched_gemm_scale_softmax_gemm/multi_query_attention_forward_wmma_fp16.cpp index 112be07c49..4fe9ddc3eb 100644 --- a/example/32_batched_gemm_scale_softmax_gemm/multi_query_attention_forward_wmma_fp16.cpp +++ b/example/32_batched_gemm_scale_softmax_gemm/multi_query_attention_forward_wmma_fp16.cpp @@ -28,6 +28,10 @@ Shazeer, Noam. “Fast Transformer Decoding: One Write-Head Is All You Need.” #include "ck/library/reference_tensor_operation/cpu/reference_softmax.hpp" #include "ck/host_utility/device_prop.hpp" +using ::ck::DeviceMem; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + template using S = ck::Sequence; diff --git a/example/32_batched_gemm_scale_softmax_gemm/self_attention_forward_wmma_fp16.cpp b/example/32_batched_gemm_scale_softmax_gemm/self_attention_forward_wmma_fp16.cpp index 9ec1bc933f..8a45f4b5bb 100644 --- a/example/32_batched_gemm_scale_softmax_gemm/self_attention_forward_wmma_fp16.cpp +++ b/example/32_batched_gemm_scale_softmax_gemm/self_attention_forward_wmma_fp16.cpp @@ -29,6 +29,10 @@ Gemm + Softmax + Gemm fused operation. Computes C_g_m_n = Softmax(A_g_m_k * B0_g #include "ck/library/reference_tensor_operation/cpu/reference_softmax.hpp" #include "ck/host_utility/device_prop.hpp" +using ::ck::DeviceMem; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + template using S = ck::Sequence; diff --git a/example/33_multiple_reduce/dual_reduce_common.hpp b/example/33_multiple_reduce/dual_reduce_common.hpp index cd21790be6..66f877ffb4 100644 --- a/example/33_multiple_reduce/dual_reduce_common.hpp +++ b/example/33_multiple_reduce/dual_reduce_common.hpp @@ -19,6 +19,11 @@ #include "ck/library/utility/host_tensor_generator.hpp" #include "ck/library/utility/host_common_util.hpp" +using ::ck::DeviceMem; +using ::ck::HostTensorDescriptor; +using ::ck::joinable_thread; +using ::ck::Tensor; + static struct option long_options[] = {{"inLengths", required_argument, nullptr, 'D'}, {"verify", required_argument, nullptr, 'v'}, {"help", no_argument, nullptr, '?'}, diff --git a/example/34_batchnorm/batchnorm_backward_nhwc.cpp b/example/34_batchnorm/batchnorm_backward_nhwc.cpp index 9737b0d99b..4344ec5525 100644 --- a/example/34_batchnorm/batchnorm_backward_nhwc.cpp +++ b/example/34_batchnorm/batchnorm_backward_nhwc.cpp @@ -14,6 +14,10 @@ #include "ck/library/reference_tensor_operation/cpu/reference_batchnorm_backward.hpp" #include "ck/tensor_operation/gpu/device/impl/device_batchnorm_backward_impl.hpp" +using ::ck::DeviceMem; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + static struct option long_options[] = {{"inOutLengths", required_argument, nullptr, 'D'}, {"verify", required_argument, nullptr, 'v'}, {"help", no_argument, nullptr, '?'}, diff --git a/example/34_batchnorm/batchnorm_forward_inferring_nhwc.cpp b/example/34_batchnorm/batchnorm_forward_inferring_nhwc.cpp index 1ffbabd04b..5f65de1b2c 100644 --- a/example/34_batchnorm/batchnorm_forward_inferring_nhwc.cpp +++ b/example/34_batchnorm/batchnorm_forward_inferring_nhwc.cpp @@ -20,6 +20,10 @@ #include "batchnorm_infer_impl.hpp" +using ::ck::DeviceMem; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + static struct option long_options[] = {{"inOutLengths", required_argument, nullptr, 'D'}, {"verify", required_argument, nullptr, 'v'}, {"help", no_argument, nullptr, '?'}, diff --git a/example/34_batchnorm/batchnorm_forward_training_nhwc.cpp b/example/34_batchnorm/batchnorm_forward_training_nhwc.cpp index 06441be860..a29b58412d 100644 --- a/example/34_batchnorm/batchnorm_forward_training_nhwc.cpp +++ b/example/34_batchnorm/batchnorm_forward_training_nhwc.cpp @@ -20,6 +20,10 @@ #include "ck/library/utility/host_common_util.hpp" #include "ck/tensor_operation/gpu/element/element_wise_operation.hpp" +using ::ck::DeviceMem; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + static struct option long_options[] = {{"inOutLengths", required_argument, nullptr, 'D'}, {"verify", required_argument, nullptr, 'v'}, {"help", no_argument, nullptr, '?'}, diff --git a/example/34_batchnorm/batchnorm_forward_training_nhwc_obsolete.cpp b/example/34_batchnorm/batchnorm_forward_training_nhwc_obsolete.cpp index 8f2b7613b5..2e5f7a3086 100644 --- a/example/34_batchnorm/batchnorm_forward_training_nhwc_obsolete.cpp +++ b/example/34_batchnorm/batchnorm_forward_training_nhwc_obsolete.cpp @@ -20,6 +20,10 @@ #include "ck/library/utility/host_common_util.hpp" #include "ck/tensor_operation/gpu/element/element_wise_operation.hpp" +using ::ck::DeviceMem; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + static struct option long_options[] = {{"inOutLengths", required_argument, nullptr, 'D'}, {"verify", required_argument, nullptr, 'v'}, {"help", no_argument, nullptr, '?'}, diff --git a/example/35_splitK_gemm/common.hpp b/example/35_splitK_gemm/common.hpp index 325cc37731..6ed9214a55 100644 --- a/example/35_splitK_gemm/common.hpp +++ b/example/35_splitK_gemm/common.hpp @@ -23,6 +23,10 @@ #include "ck/library/reference_tensor_operation/cpu/reference_gemm.hpp" #include "ck/library/reference_tensor_operation/cpu/reference_gemm_multiple_d.hpp" +using ::ck::DeviceMem; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + struct ProblemSizeSplitK final { ck::index_t M = 256; diff --git a/example/35_splitK_gemm/splitK_gemm_xdl_bf16.cpp b/example/35_splitK_gemm/splitK_gemm_xdl_bf16.cpp index 1b8194f838..97608418f9 100644 --- a/example/35_splitK_gemm/splitK_gemm_xdl_bf16.cpp +++ b/example/35_splitK_gemm/splitK_gemm_xdl_bf16.cpp @@ -18,6 +18,10 @@ #include "ck/library/reference_tensor_operation/cpu/reference_gemm.hpp" #include "ck/library/utility/literals.hpp" +using ::ck::DeviceMem; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + template using S = ck::Sequence; diff --git a/example/35_splitK_gemm/splitK_gemm_xdl_fp16.cpp b/example/35_splitK_gemm/splitK_gemm_xdl_fp16.cpp index 8628e8770c..d7240396c5 100644 --- a/example/35_splitK_gemm/splitK_gemm_xdl_fp16.cpp +++ b/example/35_splitK_gemm/splitK_gemm_xdl_fp16.cpp @@ -18,6 +18,10 @@ #include "ck/library/reference_tensor_operation/cpu/reference_gemm.hpp" #include "ck/library/utility/literals.hpp" +using ::ck::DeviceMem; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + template using S = ck::Sequence; diff --git a/example/35_splitK_gemm/splitK_gemm_xdl_fp16_fp8.cpp b/example/35_splitK_gemm/splitK_gemm_xdl_fp16_fp8.cpp index 8091a5b448..f3b9f66346 100644 --- a/example/35_splitK_gemm/splitK_gemm_xdl_fp16_fp8.cpp +++ b/example/35_splitK_gemm/splitK_gemm_xdl_fp16_fp8.cpp @@ -18,6 +18,10 @@ #include "ck/library/reference_tensor_operation/cpu/reference_gemm.hpp" #include "ck/library/utility/literals.hpp" +using ::ck::DeviceMem; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + template using S = ck::Sequence; diff --git a/example/35_splitK_gemm/splitK_gemm_xdl_fp32.cpp b/example/35_splitK_gemm/splitK_gemm_xdl_fp32.cpp index 4257451754..66ceb55a79 100644 --- a/example/35_splitK_gemm/splitK_gemm_xdl_fp32.cpp +++ b/example/35_splitK_gemm/splitK_gemm_xdl_fp32.cpp @@ -18,6 +18,10 @@ #include "ck/library/reference_tensor_operation/cpu/reference_gemm.hpp" #include "ck/library/utility/literals.hpp" +using ::ck::DeviceMem; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + template using S = ck::Sequence; diff --git a/example/35_splitK_gemm/splitK_gemm_xdl_int4.cpp b/example/35_splitK_gemm/splitK_gemm_xdl_int4.cpp index f0d4e28ad2..229d2b8709 100644 --- a/example/35_splitK_gemm/splitK_gemm_xdl_int4.cpp +++ b/example/35_splitK_gemm/splitK_gemm_xdl_int4.cpp @@ -18,6 +18,10 @@ #include "ck/library/reference_tensor_operation/cpu/reference_gemm.hpp" #include "ck/library/utility/literals.hpp" +using ::ck::DeviceMem; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + template using S = ck::Sequence; diff --git a/example/35_splitK_gemm/splitK_gemm_xdl_int8.cpp b/example/35_splitK_gemm/splitK_gemm_xdl_int8.cpp index d800443932..17bb675d34 100644 --- a/example/35_splitK_gemm/splitK_gemm_xdl_int8.cpp +++ b/example/35_splitK_gemm/splitK_gemm_xdl_int8.cpp @@ -18,6 +18,10 @@ #include "ck/library/reference_tensor_operation/cpu/reference_gemm.hpp" #include "ck/library/utility/literals.hpp" +using ::ck::DeviceMem; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + template using S = ck::Sequence; diff --git a/example/35_splitK_gemm/splitK_gemm_xdl_lds_direct_load_fp16.cpp b/example/35_splitK_gemm/splitK_gemm_xdl_lds_direct_load_fp16.cpp index ef27c7bb9f..41596019ca 100644 --- a/example/35_splitK_gemm/splitK_gemm_xdl_lds_direct_load_fp16.cpp +++ b/example/35_splitK_gemm/splitK_gemm_xdl_lds_direct_load_fp16.cpp @@ -26,6 +26,10 @@ #include "ck/library/reference_tensor_operation/cpu/reference_gemm.hpp" #include "ck/library/utility/literals.hpp" +using ::ck::DeviceMem; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + template using S = ck::Sequence; diff --git a/example/36_sparse_embedding/sparse_embedding3_forward_layernorm.cpp b/example/36_sparse_embedding/sparse_embedding3_forward_layernorm.cpp index ff7acea48d..232a0fcf86 100644 --- a/example/36_sparse_embedding/sparse_embedding3_forward_layernorm.cpp +++ b/example/36_sparse_embedding/sparse_embedding3_forward_layernorm.cpp @@ -19,6 +19,10 @@ #include "ck/library/utility/host_tensor_generator.hpp" #include "ck/library/reference_tensor_operation/cpu/reference_sparse_embedding3_forward_layernorm.hpp" +using ::ck::DeviceMem; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + // clang-format off using EmbType = ck::half_t; using IndexType = int64_t; diff --git a/example/37_batched_gemm_add_add_relu_gemm_add/batched_gemm_add_add_relu_gemm_add_xdl_fp16.cpp b/example/37_batched_gemm_add_add_relu_gemm_add/batched_gemm_add_add_relu_gemm_add_xdl_fp16.cpp index 4934f74393..51344f5572 100644 --- a/example/37_batched_gemm_add_add_relu_gemm_add/batched_gemm_add_add_relu_gemm_add_xdl_fp16.cpp +++ b/example/37_batched_gemm_add_add_relu_gemm_add/batched_gemm_add_add_relu_gemm_add_xdl_fp16.cpp @@ -22,6 +22,10 @@ Computes C_m_o = Relu(A0[m, k] * B0[n, k] + D00[m, n] + D01[mn]) * B1[n, o] + D1 #include "ck/library/utility/literals.hpp" #include "ck/library/reference_tensor_operation/cpu/reference_batched_gemm.hpp" +using ::ck::DeviceMem; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + template using S = ck::Sequence; diff --git a/example/38_grouped_conv_bwd_data_multiple_d/common.hpp b/example/38_grouped_conv_bwd_data_multiple_d/common.hpp index 1823d4fc0a..3297e818b8 100644 --- a/example/38_grouped_conv_bwd_data_multiple_d/common.hpp +++ b/example/38_grouped_conv_bwd_data_multiple_d/common.hpp @@ -22,6 +22,11 @@ #include "ck/library/utility/host_tensor.hpp" #include "ck/library/utility/host_tensor_generator.hpp" +using ::ck::DeviceMem; +using ::ck::hip_check_error; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + template using S = ck::Sequence; diff --git a/example/39_permute/common.hpp b/example/39_permute/common.hpp index b23128a536..be1b824341 100644 --- a/example/39_permute/common.hpp +++ b/example/39_permute/common.hpp @@ -26,6 +26,10 @@ #include "ck/library/utility/host_tensor.hpp" #include "ck/library/utility/host_tensor_generator.hpp" +using ::ck::DeviceMem; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + using F16 = ck::half_t; using F32 = float; using F64 = double; diff --git a/example/40_conv2d_fwd_quantization/conv2d_fwd_dl_bias_relu_perchannel_quantization_int8.cpp b/example/40_conv2d_fwd_quantization/conv2d_fwd_dl_bias_relu_perchannel_quantization_int8.cpp index f9a7d9f638..2b89958342 100644 --- a/example/40_conv2d_fwd_quantization/conv2d_fwd_dl_bias_relu_perchannel_quantization_int8.cpp +++ b/example/40_conv2d_fwd_quantization/conv2d_fwd_dl_bias_relu_perchannel_quantization_int8.cpp @@ -4,6 +4,11 @@ #include "common.hpp" #include "ck/tensor_operation/gpu/device/impl/device_grouped_conv_fwd_dl_multiple_d_nhwc_kyxc_nhwk.hpp" +using ::ck::DeviceMem; +using ::ck::hip_check_error; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + using InDataType = int8_t; using WeiDataType = int8_t; using BiasDataType = int32_t; diff --git a/example/40_conv2d_fwd_quantization/conv2d_fwd_dl_bias_relu_perlayer_quantization_int8.cpp b/example/40_conv2d_fwd_quantization/conv2d_fwd_dl_bias_relu_perlayer_quantization_int8.cpp index 333987edd6..06fa50689c 100644 --- a/example/40_conv2d_fwd_quantization/conv2d_fwd_dl_bias_relu_perlayer_quantization_int8.cpp +++ b/example/40_conv2d_fwd_quantization/conv2d_fwd_dl_bias_relu_perlayer_quantization_int8.cpp @@ -4,6 +4,11 @@ #include "common.hpp" #include "ck/tensor_operation/gpu/device/impl/device_grouped_conv_fwd_dl_multiple_d_nhwc_kyxc_nhwk.hpp" +using ::ck::DeviceMem; +using ::ck::hip_check_error; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + using InDataType = int8_t; using WeiDataType = int8_t; using BiasDataType = int32_t; diff --git a/example/40_conv2d_fwd_quantization/conv2d_fwd_dl_bias_tanh_perchannel_quantization_int8.cpp b/example/40_conv2d_fwd_quantization/conv2d_fwd_dl_bias_tanh_perchannel_quantization_int8.cpp index 4b94045421..aa31cb2de8 100644 --- a/example/40_conv2d_fwd_quantization/conv2d_fwd_dl_bias_tanh_perchannel_quantization_int8.cpp +++ b/example/40_conv2d_fwd_quantization/conv2d_fwd_dl_bias_tanh_perchannel_quantization_int8.cpp @@ -4,6 +4,11 @@ #include "common.hpp" #include "ck/tensor_operation/gpu/device/impl/device_grouped_conv_fwd_dl_multiple_d_nhwc_kyxc_nhwk.hpp" +using ::ck::DeviceMem; +using ::ck::hip_check_error; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + using InDataType = int8_t; using WeiDataType = int8_t; using BiasDataType = int32_t; diff --git a/example/40_conv2d_fwd_quantization/conv2d_fwd_dl_bias_tanh_perlayer_quantization_int8.cpp b/example/40_conv2d_fwd_quantization/conv2d_fwd_dl_bias_tanh_perlayer_quantization_int8.cpp index b74e06b10a..48e27370cd 100644 --- a/example/40_conv2d_fwd_quantization/conv2d_fwd_dl_bias_tanh_perlayer_quantization_int8.cpp +++ b/example/40_conv2d_fwd_quantization/conv2d_fwd_dl_bias_tanh_perlayer_quantization_int8.cpp @@ -4,6 +4,11 @@ #include "common.hpp" #include "ck/tensor_operation/gpu/device/impl/device_grouped_conv_fwd_dl_multiple_d_nhwc_kyxc_nhwk.hpp" +using ::ck::DeviceMem; +using ::ck::hip_check_error; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + using InDataType = int8_t; using WeiDataType = int8_t; using BiasDataType = int32_t; diff --git a/example/40_conv2d_fwd_quantization/conv2d_fwd_dl_perchannel_quantization_int8.cpp b/example/40_conv2d_fwd_quantization/conv2d_fwd_dl_perchannel_quantization_int8.cpp index c3ac40a1bc..8ce49d86db 100644 --- a/example/40_conv2d_fwd_quantization/conv2d_fwd_dl_perchannel_quantization_int8.cpp +++ b/example/40_conv2d_fwd_quantization/conv2d_fwd_dl_perchannel_quantization_int8.cpp @@ -4,6 +4,11 @@ #include "common.hpp" #include "ck/tensor_operation/gpu/device/impl/device_grouped_conv_fwd_dl_multiple_d_nhwc_kyxc_nhwk.hpp" +using ::ck::DeviceMem; +using ::ck::hip_check_error; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + using InDataType = int8_t; using WeiDataType = int8_t; using RequantScaleDataType = float; diff --git a/example/40_conv2d_fwd_quantization/conv2d_fwd_dl_perlayer_quantization_int8.cpp b/example/40_conv2d_fwd_quantization/conv2d_fwd_dl_perlayer_quantization_int8.cpp index 437fd6f4c2..b58e25cb53 100644 --- a/example/40_conv2d_fwd_quantization/conv2d_fwd_dl_perlayer_quantization_int8.cpp +++ b/example/40_conv2d_fwd_quantization/conv2d_fwd_dl_perlayer_quantization_int8.cpp @@ -4,6 +4,11 @@ #include "common.hpp" #include "ck/tensor_operation/gpu/device/impl/device_grouped_conv_fwd_dl_multiple_d_nhwc_kyxc_nhwk.hpp" +using ::ck::DeviceMem; +using ::ck::hip_check_error; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + using InDataType = int8_t; using WeiDataType = int8_t; using AccDataType = int32_t; diff --git a/example/40_conv2d_fwd_quantization/conv2d_fwd_xdl_bias_relu_perchannel_quantization_int8.cpp b/example/40_conv2d_fwd_quantization/conv2d_fwd_xdl_bias_relu_perchannel_quantization_int8.cpp index d9cfae2898..8aa0a43356 100644 --- a/example/40_conv2d_fwd_quantization/conv2d_fwd_xdl_bias_relu_perchannel_quantization_int8.cpp +++ b/example/40_conv2d_fwd_quantization/conv2d_fwd_xdl_bias_relu_perchannel_quantization_int8.cpp @@ -4,6 +4,11 @@ #include "common.hpp" #include "ck/tensor_operation/gpu/device/impl/device_grouped_conv_fwd_multiple_abd_xdl_cshuffle.hpp" +using ::ck::DeviceMem; +using ::ck::hip_check_error; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + using InDataType = int8_t; using WeiDataType = int8_t; using BiasDataType = int32_t; diff --git a/example/40_conv2d_fwd_quantization/conv2d_fwd_xdl_bias_relu_perlayer_quantization_int8.cpp b/example/40_conv2d_fwd_quantization/conv2d_fwd_xdl_bias_relu_perlayer_quantization_int8.cpp index 9d3024fce7..22a5521dd3 100644 --- a/example/40_conv2d_fwd_quantization/conv2d_fwd_xdl_bias_relu_perlayer_quantization_int8.cpp +++ b/example/40_conv2d_fwd_quantization/conv2d_fwd_xdl_bias_relu_perlayer_quantization_int8.cpp @@ -4,6 +4,11 @@ #include "common.hpp" #include "ck/tensor_operation/gpu/device/impl/device_grouped_conv_fwd_multiple_abd_xdl_cshuffle.hpp" +using ::ck::DeviceMem; +using ::ck::hip_check_error; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + using InDataType = int8_t; using WeiDataType = int8_t; using BiasDataType = int32_t; diff --git a/example/40_conv2d_fwd_quantization/conv2d_fwd_xdl_perchannel_quantization_int8.cpp b/example/40_conv2d_fwd_quantization/conv2d_fwd_xdl_perchannel_quantization_int8.cpp index 2d4ae1f837..c01e9d9bb0 100644 --- a/example/40_conv2d_fwd_quantization/conv2d_fwd_xdl_perchannel_quantization_int8.cpp +++ b/example/40_conv2d_fwd_quantization/conv2d_fwd_xdl_perchannel_quantization_int8.cpp @@ -4,6 +4,11 @@ #include "common.hpp" #include "ck/tensor_operation/gpu/device/impl/device_grouped_conv_fwd_multiple_abd_xdl_cshuffle.hpp" +using ::ck::DeviceMem; +using ::ck::hip_check_error; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + using InDataType = int8_t; using WeiDataType = int8_t; using RequantScaleDataType = float; diff --git a/example/40_conv2d_fwd_quantization/conv2d_fwd_xdl_perlayer_quantization_int8.cpp b/example/40_conv2d_fwd_quantization/conv2d_fwd_xdl_perlayer_quantization_int8.cpp index 79b0c00fa5..5091065c30 100644 --- a/example/40_conv2d_fwd_quantization/conv2d_fwd_xdl_perlayer_quantization_int8.cpp +++ b/example/40_conv2d_fwd_quantization/conv2d_fwd_xdl_perlayer_quantization_int8.cpp @@ -4,6 +4,11 @@ #include "common.hpp" #include "ck/tensor_operation/gpu/device/impl/device_grouped_conv_fwd_multiple_abd_xdl_cshuffle.hpp" +using ::ck::DeviceMem; +using ::ck::hip_check_error; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + using InDataType = int8_t; using WeiDataType = int8_t; using AccDataType = int32_t; diff --git a/example/41_grouped_conv_conv_fwd/grouped_conv_conv_fwd_xdl_bf16.cpp b/example/41_grouped_conv_conv_fwd/grouped_conv_conv_fwd_xdl_bf16.cpp index ba589ec044..fd746cecfa 100644 --- a/example/41_grouped_conv_conv_fwd/grouped_conv_conv_fwd_xdl_bf16.cpp +++ b/example/41_grouped_conv_conv_fwd/grouped_conv_conv_fwd_xdl_bf16.cpp @@ -20,6 +20,10 @@ #include "ck/library/utility/convolution_host_tensor_descriptor_helper.hpp" #include "ck/library/reference_tensor_operation/cpu/reference_conv_fwd.hpp" +using ::ck::DeviceMem; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + using In0DataType = ck::bhalf_t; using Wei0DataType = ck::bhalf_t; using Acc0DataType = float; diff --git a/example/41_grouped_conv_conv_fwd/grouped_conv_conv_fwd_xdl_fp16.cpp b/example/41_grouped_conv_conv_fwd/grouped_conv_conv_fwd_xdl_fp16.cpp index 847859068f..541967fc8e 100644 --- a/example/41_grouped_conv_conv_fwd/grouped_conv_conv_fwd_xdl_fp16.cpp +++ b/example/41_grouped_conv_conv_fwd/grouped_conv_conv_fwd_xdl_fp16.cpp @@ -20,6 +20,10 @@ #include "ck/library/utility/convolution_host_tensor_descriptor_helper.hpp" #include "ck/library/reference_tensor_operation/cpu/reference_conv_fwd.hpp" +using ::ck::DeviceMem; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + using In0DataType = ck::half_t; using Wei0DataType = ck::half_t; using Acc0DataType = float; diff --git a/example/41_grouped_conv_conv_fwd/grouped_conv_conv_fwd_xdl_fp32.cpp b/example/41_grouped_conv_conv_fwd/grouped_conv_conv_fwd_xdl_fp32.cpp index 9a104dbfab..f8d6827ed0 100644 --- a/example/41_grouped_conv_conv_fwd/grouped_conv_conv_fwd_xdl_fp32.cpp +++ b/example/41_grouped_conv_conv_fwd/grouped_conv_conv_fwd_xdl_fp32.cpp @@ -20,6 +20,10 @@ #include "ck/library/utility/convolution_host_tensor_descriptor_helper.hpp" #include "ck/library/reference_tensor_operation/cpu/reference_conv_fwd.hpp" +using ::ck::DeviceMem; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + using In0DataType = float; using Wei0DataType = float; using Acc0DataType = float; diff --git a/example/41_grouped_conv_conv_fwd/grouped_conv_conv_fwd_xdl_int4.cpp b/example/41_grouped_conv_conv_fwd/grouped_conv_conv_fwd_xdl_int4.cpp index cf7b1ce3a8..a010a88b19 100644 --- a/example/41_grouped_conv_conv_fwd/grouped_conv_conv_fwd_xdl_int4.cpp +++ b/example/41_grouped_conv_conv_fwd/grouped_conv_conv_fwd_xdl_int4.cpp @@ -22,6 +22,10 @@ #include "ck/library/utility/convolution_host_tensor_descriptor_helper.hpp" #include "ck/library/reference_tensor_operation/cpu/reference_conv_fwd.hpp" +using ::ck::DeviceMem; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + using In0DataType = ck::int4_t; using Wei0DataType = ck::int4_t; using KernelIn0DataType = int8_t; diff --git a/example/41_grouped_conv_conv_fwd/grouped_conv_conv_fwd_xdl_int8.cpp b/example/41_grouped_conv_conv_fwd/grouped_conv_conv_fwd_xdl_int8.cpp index 3ade6c811a..2e59a36b01 100644 --- a/example/41_grouped_conv_conv_fwd/grouped_conv_conv_fwd_xdl_int8.cpp +++ b/example/41_grouped_conv_conv_fwd/grouped_conv_conv_fwd_xdl_int8.cpp @@ -20,6 +20,10 @@ #include "ck/library/utility/convolution_host_tensor_descriptor_helper.hpp" #include "ck/library/reference_tensor_operation/cpu/reference_conv_fwd.hpp" +using ::ck::DeviceMem; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + using In0DataType = int8_t; using Wei0DataType = int8_t; using Acc0DataType = int32_t; diff --git a/example/42_groupnorm_fwd/groupnorm_fwd_sigmoid_mul_fp16.cpp b/example/42_groupnorm_fwd/groupnorm_fwd_sigmoid_mul_fp16.cpp index 15c02b8213..35aaa21e38 100644 --- a/example/42_groupnorm_fwd/groupnorm_fwd_sigmoid_mul_fp16.cpp +++ b/example/42_groupnorm_fwd/groupnorm_fwd_sigmoid_mul_fp16.cpp @@ -3,6 +3,10 @@ #include "common.hpp" +using ::ck::DeviceMem; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + constexpr int Rank = 5; constexpr int NumReduceDim = 3; diff --git a/example/42_groupnorm_fwd/groupnorm_fwd_splitk_fp16.cpp b/example/42_groupnorm_fwd/groupnorm_fwd_splitk_fp16.cpp index 37fdf1b253..18e3fdf9ba 100644 --- a/example/42_groupnorm_fwd/groupnorm_fwd_splitk_fp16.cpp +++ b/example/42_groupnorm_fwd/groupnorm_fwd_splitk_fp16.cpp @@ -3,6 +3,10 @@ #include "common.hpp" +using ::ck::DeviceMem; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + constexpr int Rank = 5; constexpr int NumReduceDim = 3; diff --git a/example/42_groupnorm_fwd/groupnorm_fwd_swish_fp16.cpp b/example/42_groupnorm_fwd/groupnorm_fwd_swish_fp16.cpp index 6d17264cef..3259fdf78d 100644 --- a/example/42_groupnorm_fwd/groupnorm_fwd_swish_fp16.cpp +++ b/example/42_groupnorm_fwd/groupnorm_fwd_swish_fp16.cpp @@ -3,6 +3,10 @@ #include "common.hpp" +using ::ck::DeviceMem; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + constexpr int Rank = 5; constexpr int NumReduceDim = 3; diff --git a/example/43_splitk_gemm_bias_e_permute/splitk_gemm_bias_e_permute_xdl_fp16.cpp b/example/43_splitk_gemm_bias_e_permute/splitk_gemm_bias_e_permute_xdl_fp16.cpp index 873982227b..03f9812557 100644 --- a/example/43_splitk_gemm_bias_e_permute/splitk_gemm_bias_e_permute_xdl_fp16.cpp +++ b/example/43_splitk_gemm_bias_e_permute/splitk_gemm_bias_e_permute_xdl_fp16.cpp @@ -16,6 +16,11 @@ #include "ck/library/utility/host_tensor.hpp" #include "ck/library/utility/host_tensor_generator.hpp" +using ::ck::DeviceMem; +using ::ck::HostTensorDescriptor; +using ::ck::make_ParallelTensorFunctor; +using ::ck::Tensor; + template using S = ck::Sequence; diff --git a/example/43_splitk_gemm_bias_e_permute/splitk_gemm_bias_e_permute_xdl_fp32.cpp b/example/43_splitk_gemm_bias_e_permute/splitk_gemm_bias_e_permute_xdl_fp32.cpp index 32a7e4a76e..9462f95e8a 100644 --- a/example/43_splitk_gemm_bias_e_permute/splitk_gemm_bias_e_permute_xdl_fp32.cpp +++ b/example/43_splitk_gemm_bias_e_permute/splitk_gemm_bias_e_permute_xdl_fp32.cpp @@ -16,6 +16,11 @@ #include "ck/library/utility/host_tensor.hpp" #include "ck/library/utility/host_tensor_generator.hpp" +using ::ck::DeviceMem; +using ::ck::HostTensorDescriptor; +using ::ck::make_ParallelTensorFunctor; +using ::ck::Tensor; + template using S = ck::Sequence; diff --git a/example/44_elementwise_permute/elementwise_binary_4D_fp16.cpp b/example/44_elementwise_permute/elementwise_binary_4D_fp16.cpp index bd9a8c151b..8688451dec 100644 --- a/example/44_elementwise_permute/elementwise_binary_4D_fp16.cpp +++ b/example/44_elementwise_permute/elementwise_binary_4D_fp16.cpp @@ -16,6 +16,10 @@ #include "ck/library/utility/host_tensor.hpp" #include "ck/library/utility/host_tensor_generator.hpp" +using ::ck::DeviceMem; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + using F16 = ck::half_t; using F32 = float; diff --git a/example/44_elementwise_permute/elementwise_permute_4D_fp16.cpp b/example/44_elementwise_permute/elementwise_permute_4D_fp16.cpp index 2d689648f2..8a7b4a5e45 100644 --- a/example/44_elementwise_permute/elementwise_permute_4D_fp16.cpp +++ b/example/44_elementwise_permute/elementwise_permute_4D_fp16.cpp @@ -16,6 +16,10 @@ #include "ck/library/utility/host_tensor.hpp" #include "ck/library/utility/host_tensor_generator.hpp" +using ::ck::DeviceMem; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + using F16 = ck::half_t; using F32 = float; diff --git a/example/44_elementwise_permute/elementwise_permute_4D_fp16_col.cpp b/example/44_elementwise_permute/elementwise_permute_4D_fp16_col.cpp index 6e70a306d3..84e97f0a62 100644 --- a/example/44_elementwise_permute/elementwise_permute_4D_fp16_col.cpp +++ b/example/44_elementwise_permute/elementwise_permute_4D_fp16_col.cpp @@ -17,6 +17,10 @@ #include "ck/library/utility/host_tensor.hpp" #include "ck/library/utility/host_tensor_generator.hpp" +using ::ck::DeviceMem; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + using F16 = ck::half_t; using F32 = float; diff --git a/example/44_elementwise_permute/elementwise_permute_4D_fp16_row.cpp b/example/44_elementwise_permute/elementwise_permute_4D_fp16_row.cpp index 632d88e88a..50ebcd25f8 100644 --- a/example/44_elementwise_permute/elementwise_permute_4D_fp16_row.cpp +++ b/example/44_elementwise_permute/elementwise_permute_4D_fp16_row.cpp @@ -16,6 +16,10 @@ #include "ck/library/utility/host_tensor.hpp" #include "ck/library/utility/host_tensor_generator.hpp" +using ::ck::DeviceMem; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + using F16 = ck::half_t; using F32 = float; diff --git a/example/44_elementwise_permute/elementwise_permute_4D_fp32_col.cpp b/example/44_elementwise_permute/elementwise_permute_4D_fp32_col.cpp index bd54f1c19c..72891a2249 100644 --- a/example/44_elementwise_permute/elementwise_permute_4D_fp32_col.cpp +++ b/example/44_elementwise_permute/elementwise_permute_4D_fp32_col.cpp @@ -16,6 +16,10 @@ #include "ck/library/utility/host_tensor.hpp" #include "ck/library/utility/host_tensor_generator.hpp" +using ::ck::DeviceMem; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + using F16 = ck::half_t; using F32 = float; diff --git a/example/44_elementwise_permute/elementwise_permute_4D_fp32_row.cpp b/example/44_elementwise_permute/elementwise_permute_4D_fp32_row.cpp index 9621d591a9..84b593e9ed 100644 --- a/example/44_elementwise_permute/elementwise_permute_4D_fp32_row.cpp +++ b/example/44_elementwise_permute/elementwise_permute_4D_fp32_row.cpp @@ -16,6 +16,10 @@ #include "ck/library/utility/host_tensor.hpp" #include "ck/library/utility/host_tensor_generator.hpp" +using ::ck::DeviceMem; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + using F16 = ck::half_t; using F32 = float; diff --git a/example/44_elementwise_permute/elementwise_scale_permute_amax_2D_fp16_fp8.cpp b/example/44_elementwise_permute/elementwise_scale_permute_amax_2D_fp16_fp8.cpp index 1ce797e4dd..6785ccefb8 100644 --- a/example/44_elementwise_permute/elementwise_scale_permute_amax_2D_fp16_fp8.cpp +++ b/example/44_elementwise_permute/elementwise_scale_permute_amax_2D_fp16_fp8.cpp @@ -19,6 +19,10 @@ #include "ck/library/utility/host_tensor_generator.hpp" #include "ck/utility/reduction_enums.hpp" +using ::ck::DeviceMem; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + using F16 = ck::half_t; using F32 = float; using F8 = ck::f8_t; diff --git a/example/44_elementwise_permute/elementwise_trinary_4D_fp16.cpp b/example/44_elementwise_permute/elementwise_trinary_4D_fp16.cpp index be4014f636..764b11dfa7 100644 --- a/example/44_elementwise_permute/elementwise_trinary_4D_fp16.cpp +++ b/example/44_elementwise_permute/elementwise_trinary_4D_fp16.cpp @@ -16,6 +16,10 @@ #include "ck/library/utility/host_tensor.hpp" #include "ck/library/utility/host_tensor_generator.hpp" +using ::ck::DeviceMem; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + using F16 = ck::half_t; using F32 = float; diff --git a/example/45_elementwise_normalization/elementwise_layernorm_blockwise.cpp b/example/45_elementwise_normalization/elementwise_layernorm_blockwise.cpp index 21c5ff8d5a..4dcac14a8c 100644 --- a/example/45_elementwise_normalization/elementwise_layernorm_blockwise.cpp +++ b/example/45_elementwise_normalization/elementwise_layernorm_blockwise.cpp @@ -19,6 +19,10 @@ #include "ck/library/utility/host_tensor_generator.hpp" #include "ck/library/reference_tensor_operation/cpu/reference_layernorm.hpp" +using ::ck::DeviceMem; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + using ADataType = ck::half_t; // Input 1 using BDataType = ck::half_t; // Input 2 using XDataType = ck::half_t; diff --git a/example/46_gemm_add_multiply/common.hpp b/example/46_gemm_add_multiply/common.hpp index 2c656cf441..83796a4424 100644 --- a/example/46_gemm_add_multiply/common.hpp +++ b/example/46_gemm_add_multiply/common.hpp @@ -22,6 +22,10 @@ #include "ck/library/utility/host_tensor_generator.hpp" #include "ck/library/utility/literals.hpp" +using ::ck::DeviceMem; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + template using S = ck::Sequence; diff --git a/example/47_gemm_bias_softmax_gemm_permute/gemm_bias_softmax_gemm_permute_xdl.cpp b/example/47_gemm_bias_softmax_gemm_permute/gemm_bias_softmax_gemm_permute_xdl.cpp index 3e69caf51e..464e3b2cf3 100644 --- a/example/47_gemm_bias_softmax_gemm_permute/gemm_bias_softmax_gemm_permute_xdl.cpp +++ b/example/47_gemm_bias_softmax_gemm_permute/gemm_bias_softmax_gemm_permute_xdl.cpp @@ -18,6 +18,10 @@ #include "ck/library/reference_tensor_operation/cpu/reference_batched_gemm.hpp" #include "ck/library/reference_tensor_operation/cpu/reference_softmax.hpp" +using ::ck::DeviceMem; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + using Row = ck::tensor_layout::gemm::RowMajor; using Col = ck::tensor_layout::gemm::ColumnMajor; using Bypass = ck::tensor_layout::BypassLayoutVerification; diff --git a/example/48_pool3d_fwd/pool3d_fwd_common.hpp b/example/48_pool3d_fwd/pool3d_fwd_common.hpp index ef64dd167d..cfdc366ba0 100644 --- a/example/48_pool3d_fwd/pool3d_fwd_common.hpp +++ b/example/48_pool3d_fwd/pool3d_fwd_common.hpp @@ -18,6 +18,11 @@ #include "ck/library/utility/literals.hpp" #include "ck/library/reference_tensor_operation/cpu/reference_pool_fwd.hpp" +using ::ck::DeviceMem; +using ::ck::hip_check_error; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + template std::vector f_tensor_strides_ncdhw(ck::index_t N_, ck::index_t C_, diff --git a/example/49_maxpool2d_bwd/maxpool2d_bwd_common.hpp b/example/49_maxpool2d_bwd/maxpool2d_bwd_common.hpp index 51b7f0015d..abfbc39e1a 100644 --- a/example/49_maxpool2d_bwd/maxpool2d_bwd_common.hpp +++ b/example/49_maxpool2d_bwd/maxpool2d_bwd_common.hpp @@ -19,6 +19,10 @@ #include "ck/library/reference_tensor_operation/cpu/reference_pool_fwd.hpp" #include "ck/library/reference_tensor_operation/cpu/reference_maxpool_bwd.hpp" +using ::ck::DeviceMem; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + template std::vector f_tensor_strides_ncdhw(ck::index_t N_, ck::index_t C_, diff --git a/example/52_im2col_col2im/column_to_image_f32.cpp b/example/52_im2col_col2im/column_to_image_f32.cpp index 047f2a2118..76a13ab42c 100644 --- a/example/52_im2col_col2im/column_to_image_f32.cpp +++ b/example/52_im2col_col2im/column_to_image_f32.cpp @@ -3,6 +3,10 @@ #include "common.hpp" +using ::ck::DeviceMem; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + using InDataType = FP32; // ck::bhalf_t;//FP32; using OutDataType = FP32; // ck::bhalf_t;//FP32; diff --git a/example/52_im2col_col2im/image_to_column_f32.cpp b/example/52_im2col_col2im/image_to_column_f32.cpp index 140bdf8062..90157bdc52 100644 --- a/example/52_im2col_col2im/image_to_column_f32.cpp +++ b/example/52_im2col_col2im/image_to_column_f32.cpp @@ -3,6 +3,10 @@ #include "common.hpp" +using ::ck::DeviceMem; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + using InDataType = FP32; using OutDataType = FP32; diff --git a/example/53_layernorm2d_bwd/layernorm2d_bwd_fp32.cpp b/example/53_layernorm2d_bwd/layernorm2d_bwd_fp32.cpp index 0b0a3e72ad..79c9c627dd 100644 --- a/example/53_layernorm2d_bwd/layernorm2d_bwd_fp32.cpp +++ b/example/53_layernorm2d_bwd/layernorm2d_bwd_fp32.cpp @@ -19,6 +19,10 @@ #include "ck/tensor_operation/gpu/device/impl/device_normalization_bwd_gamma_beta_impl.hpp" #include "ck/library/reference_tensor_operation/cpu/reference_layernorm_bwd.hpp" +using ::ck::DeviceMem; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + using DYDataType = float; using XDataType = float; using GammaDataType = float; diff --git a/example/54_groupnorm_bwd/groupnorm_bwd_fp32.cpp b/example/54_groupnorm_bwd/groupnorm_bwd_fp32.cpp index dcbb472118..b5245a5785 100644 --- a/example/54_groupnorm_bwd/groupnorm_bwd_fp32.cpp +++ b/example/54_groupnorm_bwd/groupnorm_bwd_fp32.cpp @@ -19,6 +19,10 @@ #include "ck/tensor_operation/gpu/device/impl/device_normalization_bwd_gamma_beta_impl.hpp" #include "ck/library/reference_tensor_operation/cpu/reference_groupnorm_bwd.hpp" +using ::ck::DeviceMem; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + using DYDataType = float; using XDataType = float; using GammaDataType = float; diff --git a/example/59_grouped_gemm_multi_ABD/grouped_gemm_multi_abd_xdl_fixed_nk_bias_bf16_i8.cpp b/example/59_grouped_gemm_multi_ABD/grouped_gemm_multi_abd_xdl_fixed_nk_bias_bf16_i8.cpp index 6f30bdaa73..bad76fc4f4 100644 --- a/example/59_grouped_gemm_multi_ABD/grouped_gemm_multi_abd_xdl_fixed_nk_bias_bf16_i8.cpp +++ b/example/59_grouped_gemm_multi_ABD/grouped_gemm_multi_abd_xdl_fixed_nk_bias_bf16_i8.cpp @@ -20,6 +20,11 @@ #include "ck/library/utility/literals.hpp" #include "ck/library/reference_tensor_operation/cpu/reference_gemm.hpp" +using ::ck::DeviceMem; +using ::ck::hip_check_error; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + template using S = ck::Sequence; diff --git a/example/59_grouped_gemm_multi_ABD/grouped_gemm_multi_abd_xdl_fixed_nk_bias_fp16.cpp b/example/59_grouped_gemm_multi_ABD/grouped_gemm_multi_abd_xdl_fixed_nk_bias_fp16.cpp index 78f7d954f0..83f7eab9b2 100644 --- a/example/59_grouped_gemm_multi_ABD/grouped_gemm_multi_abd_xdl_fixed_nk_bias_fp16.cpp +++ b/example/59_grouped_gemm_multi_ABD/grouped_gemm_multi_abd_xdl_fixed_nk_bias_fp16.cpp @@ -20,6 +20,11 @@ #include "ck/library/utility/literals.hpp" #include "ck/library/reference_tensor_operation/cpu/reference_gemm.hpp" +using ::ck::DeviceMem; +using ::ck::hip_check_error; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + template using S = ck::Sequence; diff --git a/example/60_gemm_multi_ABD/gemm_multi_ABD_wmma_bias_fastgelu_bf16_i8.cpp b/example/60_gemm_multi_ABD/gemm_multi_ABD_wmma_bias_fastgelu_bf16_i8.cpp index d40d09540f..45c61a7a79 100644 --- a/example/60_gemm_multi_ABD/gemm_multi_ABD_wmma_bias_fastgelu_bf16_i8.cpp +++ b/example/60_gemm_multi_ABD/gemm_multi_ABD_wmma_bias_fastgelu_bf16_i8.cpp @@ -20,6 +20,10 @@ #include "ck/utility/blkgemmpipe_scheduler.hpp" +using ::ck::DeviceMem; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + template using S = ck::Sequence; diff --git a/example/60_gemm_multi_ABD/gemm_multi_ABD_wmma_fastgelu_bf16_i8.cpp b/example/60_gemm_multi_ABD/gemm_multi_ABD_wmma_fastgelu_bf16_i8.cpp index 102b7f50de..9a245f574f 100644 --- a/example/60_gemm_multi_ABD/gemm_multi_ABD_wmma_fastgelu_bf16_i8.cpp +++ b/example/60_gemm_multi_ABD/gemm_multi_ABD_wmma_fastgelu_bf16_i8.cpp @@ -20,6 +20,10 @@ #include "ck/utility/blkgemmpipe_scheduler.hpp" +using ::ck::DeviceMem; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + template using S = ck::Sequence; diff --git a/example/60_gemm_multi_ABD/gemm_multi_ABD_wmma_fp16.cpp b/example/60_gemm_multi_ABD/gemm_multi_ABD_wmma_fp16.cpp index aeaa5fe776..edadaac3db 100644 --- a/example/60_gemm_multi_ABD/gemm_multi_ABD_wmma_fp16.cpp +++ b/example/60_gemm_multi_ABD/gemm_multi_ABD_wmma_fp16.cpp @@ -18,6 +18,10 @@ #include "ck/library/reference_tensor_operation/cpu/reference_gemm.hpp" #include "ck/library/utility/check_err.hpp" +using ::ck::DeviceMem; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + template using S = ck::Sequence; diff --git a/example/60_gemm_multi_ABD/gemm_multi_ABD_wmma_multiply_bias_fastgelu_bf16_i8.cpp b/example/60_gemm_multi_ABD/gemm_multi_ABD_wmma_multiply_bias_fastgelu_bf16_i8.cpp index 9363953a6e..0cecbf8cc7 100644 --- a/example/60_gemm_multi_ABD/gemm_multi_ABD_wmma_multiply_bias_fastgelu_bf16_i8.cpp +++ b/example/60_gemm_multi_ABD/gemm_multi_ABD_wmma_multiply_bias_fastgelu_bf16_i8.cpp @@ -20,6 +20,10 @@ #include "ck/utility/blkgemmpipe_scheduler.hpp" +using ::ck::DeviceMem; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + template using S = ck::Sequence; diff --git a/example/60_gemm_multi_ABD/gemm_multi_ABD_xdl_bias_fastgelu_bf16_i8.cpp b/example/60_gemm_multi_ABD/gemm_multi_ABD_xdl_bias_fastgelu_bf16_i8.cpp index a599f9d032..1240be87d2 100644 --- a/example/60_gemm_multi_ABD/gemm_multi_ABD_xdl_bias_fastgelu_bf16_i8.cpp +++ b/example/60_gemm_multi_ABD/gemm_multi_ABD_xdl_bias_fastgelu_bf16_i8.cpp @@ -20,6 +20,10 @@ #include "ck/utility/blkgemmpipe_scheduler.hpp" +using ::ck::DeviceMem; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + template using S = ck::Sequence; diff --git a/example/60_gemm_multi_ABD/gemm_multi_ABD_xdl_fastgelu_bf16_i8.cpp b/example/60_gemm_multi_ABD/gemm_multi_ABD_xdl_fastgelu_bf16_i8.cpp index d7e316e1e0..6530d8cb45 100644 --- a/example/60_gemm_multi_ABD/gemm_multi_ABD_xdl_fastgelu_bf16_i8.cpp +++ b/example/60_gemm_multi_ABD/gemm_multi_ABD_xdl_fastgelu_bf16_i8.cpp @@ -20,6 +20,10 @@ #include "ck/utility/blkgemmpipe_scheduler.hpp" +using ::ck::DeviceMem; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + template using S = ck::Sequence; diff --git a/example/60_gemm_multi_ABD/gemm_multi_ABD_xdl_fp16.cpp b/example/60_gemm_multi_ABD/gemm_multi_ABD_xdl_fp16.cpp index 2a44c8ad2a..6bae806b99 100644 --- a/example/60_gemm_multi_ABD/gemm_multi_ABD_xdl_fp16.cpp +++ b/example/60_gemm_multi_ABD/gemm_multi_ABD_xdl_fp16.cpp @@ -18,6 +18,10 @@ #include "ck/library/reference_tensor_operation/cpu/reference_gemm.hpp" #include "ck/library/utility/check_err.hpp" +using ::ck::DeviceMem; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + template using S = ck::Sequence; diff --git a/example/60_gemm_multi_ABD/gemm_multi_ABD_xdl_multiply_bias_fastgelu_bf16_i8.cpp b/example/60_gemm_multi_ABD/gemm_multi_ABD_xdl_multiply_bias_fastgelu_bf16_i8.cpp index 83cc61284e..199cb6288f 100644 --- a/example/60_gemm_multi_ABD/gemm_multi_ABD_xdl_multiply_bias_fastgelu_bf16_i8.cpp +++ b/example/60_gemm_multi_ABD/gemm_multi_ABD_xdl_multiply_bias_fastgelu_bf16_i8.cpp @@ -20,6 +20,10 @@ #include "ck/utility/blkgemmpipe_scheduler.hpp" +using ::ck::DeviceMem; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + template using S = ck::Sequence; diff --git a/example/61_contraction_multi_ABD/contraction_multi_ABD_xdl_fp16.cpp b/example/61_contraction_multi_ABD/contraction_multi_ABD_xdl_fp16.cpp index a9a30b4c27..c522fd56fa 100644 --- a/example/61_contraction_multi_ABD/contraction_multi_ABD_xdl_fp16.cpp +++ b/example/61_contraction_multi_ABD/contraction_multi_ABD_xdl_fp16.cpp @@ -19,6 +19,10 @@ #include "ck/library/utility/check_err.hpp" #include "ck/library/utility/numeric.hpp" +using ::ck::DeviceMem; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + using Row = ck::tensor_layout::gemm::RowMajor; using Bypass = ck::tensor_layout::BypassLayoutVerification; diff --git a/example/61_contraction_multi_ABD/contraction_multi_ABD_xdl_fp8.cpp b/example/61_contraction_multi_ABD/contraction_multi_ABD_xdl_fp8.cpp index 4f7414abfa..7eed258191 100644 --- a/example/61_contraction_multi_ABD/contraction_multi_ABD_xdl_fp8.cpp +++ b/example/61_contraction_multi_ABD/contraction_multi_ABD_xdl_fp8.cpp @@ -19,6 +19,10 @@ #include "ck/library/utility/check_err.hpp" #include "ck/library/utility/numeric.hpp" +using ::ck::DeviceMem; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + using Row = ck::tensor_layout::gemm::RowMajor; using Bypass = ck::tensor_layout::BypassLayoutVerification; diff --git a/example/62_convnd_activ/binary/convnd_bwd_data_xdl_bilinear_residual_fp16.cpp b/example/62_convnd_activ/binary/convnd_bwd_data_xdl_bilinear_residual_fp16.cpp index 2710dd6b63..125d592835 100644 --- a/example/62_convnd_activ/binary/convnd_bwd_data_xdl_bilinear_residual_fp16.cpp +++ b/example/62_convnd_activ/binary/convnd_bwd_data_xdl_bilinear_residual_fp16.cpp @@ -22,6 +22,10 @@ #include "ck/library/reference_tensor_operation/cpu/reference_conv_bwd_data.hpp" #include "ck/library/utility/convolution_host_tensor_descriptor_helper.hpp" +using ::ck::DeviceMem; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + constexpr ck::index_t NDimSpatial = 3; using InDataType = ck::half_t; using WeiDataType = ck::half_t; diff --git a/example/62_convnd_activ/binary/convnd_bwd_weight_xdl_bilinear_residual_fp16.cpp b/example/62_convnd_activ/binary/convnd_bwd_weight_xdl_bilinear_residual_fp16.cpp index cb37ebf575..382640fdc3 100644 --- a/example/62_convnd_activ/binary/convnd_bwd_weight_xdl_bilinear_residual_fp16.cpp +++ b/example/62_convnd_activ/binary/convnd_bwd_weight_xdl_bilinear_residual_fp16.cpp @@ -21,6 +21,10 @@ #include "ck/library/reference_tensor_operation/cpu/reference_conv_bwd_weight.hpp" #include "ck/library/utility/convolution_host_tensor_descriptor_helper.hpp" +using ::ck::DeviceMem; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + constexpr ck::index_t NDimSpatial = 3; using InDataType = ck::half_t; using WeiDataType = ck::half_t; diff --git a/example/62_convnd_activ/binary/convnd_fwd_xdl_bilinear_residual_fp16.cpp b/example/62_convnd_activ/binary/convnd_fwd_xdl_bilinear_residual_fp16.cpp index 616d0cc9e8..610a805816 100644 --- a/example/62_convnd_activ/binary/convnd_fwd_xdl_bilinear_residual_fp16.cpp +++ b/example/62_convnd_activ/binary/convnd_fwd_xdl_bilinear_residual_fp16.cpp @@ -21,6 +21,10 @@ #include "ck/library/reference_tensor_operation/cpu/reference_conv_fwd.hpp" #include "ck/library/utility/convolution_host_tensor_descriptor_helper.hpp" +using ::ck::DeviceMem; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + constexpr ck::index_t NDimSpatial = 3; using InDataType = ck::half_t; using WeiDataType = ck::half_t; @@ -263,4 +267,8 @@ bool run_grouped_conv(bool do_verification, #include "../run_convnd_activ_example.inc" +using ::ck::DeviceMem; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + int main(int argc, char* argv[]) { return !run_convnd_example(argc, argv); } diff --git a/example/62_convnd_activ/convinvscale/convnd_fwd_convinvscale_common.hpp b/example/62_convnd_activ/convinvscale/convnd_fwd_convinvscale_common.hpp index 4b2ebf8484..91a73b56a3 100644 --- a/example/62_convnd_activ/convinvscale/convnd_fwd_convinvscale_common.hpp +++ b/example/62_convnd_activ/convinvscale/convnd_fwd_convinvscale_common.hpp @@ -22,6 +22,10 @@ using PassThrough = ck::tensor_operation::element_wise::PassThrough; using ConvInvscale = ck::tensor_operation::element_wise::ConvInvscale; +using ::ck::DeviceMem; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + void print_helper_msg() { std::cout << "arg1: verification (0=no, 1=yes)\n" diff --git a/example/62_convnd_activ/convnd_fwd_xdl_scaleadd_scaleadd_relu_bcasted_bias_fp16.cpp b/example/62_convnd_activ/convnd_fwd_xdl_scaleadd_scaleadd_relu_bcasted_bias_fp16.cpp index 0a802ee27d..2aae48d994 100644 --- a/example/62_convnd_activ/convnd_fwd_xdl_scaleadd_scaleadd_relu_bcasted_bias_fp16.cpp +++ b/example/62_convnd_activ/convnd_fwd_xdl_scaleadd_scaleadd_relu_bcasted_bias_fp16.cpp @@ -22,6 +22,10 @@ #include "ck/library/reference_tensor_operation/cpu/reference_conv_fwd.hpp" #include "ck/library/utility/convolution_host_tensor_descriptor_helper.hpp" +using ::ck::DeviceMem; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + constexpr ck::index_t NDimSpatial = 3; using InDataType = ck::half_t; using WeiDataType = ck::half_t; diff --git a/example/62_convnd_activ/convnd_fwd_xdl_scaleadd_scaleadd_relu_fp16.cpp b/example/62_convnd_activ/convnd_fwd_xdl_scaleadd_scaleadd_relu_fp16.cpp index 3266c55d7c..17be982924 100644 --- a/example/62_convnd_activ/convnd_fwd_xdl_scaleadd_scaleadd_relu_fp16.cpp +++ b/example/62_convnd_activ/convnd_fwd_xdl_scaleadd_scaleadd_relu_fp16.cpp @@ -21,6 +21,10 @@ #include "ck/library/reference_tensor_operation/cpu/reference_conv_fwd.hpp" #include "ck/library/utility/convolution_host_tensor_descriptor_helper.hpp" +using ::ck::DeviceMem; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + constexpr ck::index_t NDimSpatial = 3; using InDataType = ck::half_t; using WeiDataType = ck::half_t; diff --git a/example/62_convnd_activ/convscale/convnd_fwd_convscale_common.hpp b/example/62_convnd_activ/convscale/convnd_fwd_convscale_common.hpp index bf560f8a43..79f14992c2 100644 --- a/example/62_convnd_activ/convscale/convnd_fwd_convscale_common.hpp +++ b/example/62_convnd_activ/convscale/convnd_fwd_convscale_common.hpp @@ -22,6 +22,10 @@ using PassThrough = ck::tensor_operation::element_wise::PassThrough; using ConvScale = ck::tensor_operation::element_wise::ConvScale; +using ::ck::DeviceMem; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + void print_helper_msg() { std::cout << "arg1: verification (0=no, 1=yes)\n" diff --git a/example/62_convnd_activ/convscale_add/convnd_fwd_convscale_add_common.hpp b/example/62_convnd_activ/convscale_add/convnd_fwd_convscale_add_common.hpp index b588d71087..c9b59c1e6d 100644 --- a/example/62_convnd_activ/convscale_add/convnd_fwd_convscale_add_common.hpp +++ b/example/62_convnd_activ/convscale_add/convnd_fwd_convscale_add_common.hpp @@ -17,6 +17,10 @@ using PassThrough = ck::tensor_operation::element_wise::PassThrough; using ConvScaleAdd = ck::tensor_operation::element_wise::ConvScaleAdd; +using ::ck::DeviceMem; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + void print_helper_msg() { std::cout << "arg1: verification (0=no, 1=yes)\n" diff --git a/example/62_convnd_activ/convscale_reduce/convnd_fwd_convscale_reduce_common.hpp b/example/62_convnd_activ/convscale_reduce/convnd_fwd_convscale_reduce_common.hpp index f521c51d67..b99c91de1c 100644 --- a/example/62_convnd_activ/convscale_reduce/convnd_fwd_convscale_reduce_common.hpp +++ b/example/62_convnd_activ/convscale_reduce/convnd_fwd_convscale_reduce_common.hpp @@ -23,6 +23,10 @@ #include "ck/tensor_operation/gpu/device/reduction_operator_mapping.hpp" #include "ck/utility/type.hpp" +using ::ck::DeviceMem; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + namespace ew = ck::tensor_operation::element_wise; using PassThrough = ew::PassThrough; diff --git a/example/62_convnd_activ/convscale_relu/convnd_fwd_convscale_relu_common.hpp b/example/62_convnd_activ/convscale_relu/convnd_fwd_convscale_relu_common.hpp index d2dacc2052..17c162305e 100644 --- a/example/62_convnd_activ/convscale_relu/convnd_fwd_convscale_relu_common.hpp +++ b/example/62_convnd_activ/convscale_relu/convnd_fwd_convscale_relu_common.hpp @@ -15,6 +15,10 @@ #include "ck/library/reference_tensor_operation/cpu/reference_conv_fwd.hpp" #include "ck/tensor_operation/gpu/element/unary_element_wise_operation.hpp" +using ::ck::DeviceMem; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + using PassThrough = ck::tensor_operation::element_wise::PassThrough; using ConvScaleRelu = ck::tensor_operation::element_wise::ConvScaleRelu; diff --git a/example/62_convnd_activ/dynamic_unary/convnd_fwd_activ_dynamic_unary_common.hpp b/example/62_convnd_activ/dynamic_unary/convnd_fwd_activ_dynamic_unary_common.hpp index 4af7f4535a..de5cd96951 100644 --- a/example/62_convnd_activ/dynamic_unary/convnd_fwd_activ_dynamic_unary_common.hpp +++ b/example/62_convnd_activ/dynamic_unary/convnd_fwd_activ_dynamic_unary_common.hpp @@ -23,6 +23,10 @@ #include "ck/library/reference_tensor_operation/cpu/reference_conv_fwd.hpp" #include "ck/library/utility/convolution_host_tensor_descriptor_helper.hpp" +using ::ck::DeviceMem; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + constexpr ck::index_t NDimSpatial = 3; using InDataType = ck::half_t; using WeiDataType = ck::half_t; diff --git a/example/62_convnd_activ/multi_AB/convnd_fwd_activ_multi_ab_common.hpp b/example/62_convnd_activ/multi_AB/convnd_fwd_activ_multi_ab_common.hpp index 566aa50d23..edbca0c7a1 100644 --- a/example/62_convnd_activ/multi_AB/convnd_fwd_activ_multi_ab_common.hpp +++ b/example/62_convnd_activ/multi_AB/convnd_fwd_activ_multi_ab_common.hpp @@ -21,6 +21,10 @@ #include "ck/library/reference_tensor_operation/cpu/reference_conv_fwd.hpp" #include "ck/library/utility/convolution_host_tensor_descriptor_helper.hpp" +using ::ck::DeviceMem; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + constexpr ck::index_t NDimSpatial = 3; template diff --git a/example/62_convnd_activ/unary/convnd_fwd_activ_unary_common.hpp b/example/62_convnd_activ/unary/convnd_fwd_activ_unary_common.hpp index dd171d9ed3..67f8b3e1a7 100644 --- a/example/62_convnd_activ/unary/convnd_fwd_activ_unary_common.hpp +++ b/example/62_convnd_activ/unary/convnd_fwd_activ_unary_common.hpp @@ -23,6 +23,10 @@ #include "ck/library/reference_tensor_operation/cpu/reference_conv_fwd.hpp" #include "ck/library/utility/convolution_host_tensor_descriptor_helper.hpp" +using ::ck::DeviceMem; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + constexpr ck::index_t NDimSpatial = 3; using InDataType = ck::half_t; using WeiDataType = ck::half_t; diff --git a/example/63_layernorm4d_fwd/layernorm4d_fwd_fp16.cpp b/example/63_layernorm4d_fwd/layernorm4d_fwd_fp16.cpp index 659cc3554d..4453cb1ddf 100644 --- a/example/63_layernorm4d_fwd/layernorm4d_fwd_fp16.cpp +++ b/example/63_layernorm4d_fwd/layernorm4d_fwd_fp16.cpp @@ -3,6 +3,10 @@ #include "common.hpp" +using ::ck::DeviceMem; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + using XDataType = ck::half_t; using GammaDataType = ck::half_t; using BetaDataType = ck::half_t; diff --git a/example/63_layernorm4d_fwd/layernorm4d_fwd_splitk_fp16.cpp b/example/63_layernorm4d_fwd/layernorm4d_fwd_splitk_fp16.cpp index 415635a0e3..e9bd64d9ec 100644 --- a/example/63_layernorm4d_fwd/layernorm4d_fwd_splitk_fp16.cpp +++ b/example/63_layernorm4d_fwd/layernorm4d_fwd_splitk_fp16.cpp @@ -3,6 +3,10 @@ #include "common.hpp" +using ::ck::DeviceMem; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + using XDataType = ck::half_t; using GammaDataType = ck::half_t; using BetaDataType = ck::half_t; diff --git a/example/64_fpAintB_gemm/common.hpp b/example/64_fpAintB_gemm/common.hpp index 4fb4c41d05..65b2b1b6a3 100644 --- a/example/64_fpAintB_gemm/common.hpp +++ b/example/64_fpAintB_gemm/common.hpp @@ -22,6 +22,11 @@ #include "ck/library/utility/literals.hpp" #include "ck/library/reference_tensor_operation/cpu/reference_fpAintB_gemm.hpp" +using ::ck::DeviceMem; +using ::ck::HostTensorDescriptor; +using ::ck::make_ParallelTensorFunctor; +using ::ck::Tensor; + struct ProblemSize final { ck::index_t M = 3840; diff --git a/example/65_gemm_multiply_multiply/gemm_add_add_wmma_fp16.cpp b/example/65_gemm_multiply_multiply/gemm_add_add_wmma_fp16.cpp index 54abab2f60..8b71caa5cd 100644 --- a/example/65_gemm_multiply_multiply/gemm_add_add_wmma_fp16.cpp +++ b/example/65_gemm_multiply_multiply/gemm_add_add_wmma_fp16.cpp @@ -21,6 +21,10 @@ #include "ck/utility/blkgemmpipe_scheduler.hpp" +using ::ck::DeviceMem; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + template using S = ck::Sequence; diff --git a/example/65_gemm_multiply_multiply/gemm_add_add_xdl_fp16.cpp b/example/65_gemm_multiply_multiply/gemm_add_add_xdl_fp16.cpp index fe8fd9c100..6c8c711da1 100644 --- a/example/65_gemm_multiply_multiply/gemm_add_add_xdl_fp16.cpp +++ b/example/65_gemm_multiply_multiply/gemm_add_add_xdl_fp16.cpp @@ -21,6 +21,10 @@ #include "ck/utility/blkgemmpipe_scheduler.hpp" +using ::ck::DeviceMem; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + template using S = ck::Sequence; diff --git a/example/65_gemm_multiply_multiply/gemm_multiply_multiply_xdl_fp16_bpreshuffle.cpp b/example/65_gemm_multiply_multiply/gemm_multiply_multiply_xdl_fp16_bpreshuffle.cpp index 8b8cee9e52..24b69fdd8d 100644 --- a/example/65_gemm_multiply_multiply/gemm_multiply_multiply_xdl_fp16_bpreshuffle.cpp +++ b/example/65_gemm_multiply_multiply/gemm_multiply_multiply_xdl_fp16_bpreshuffle.cpp @@ -22,6 +22,10 @@ #include "ck/utility/blkgemmpipe_scheduler.hpp" +using ::ck::DeviceMem; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + template using S = ck::Sequence; diff --git a/example/65_gemm_multiply_multiply/gemm_multiply_multiply_xdl_fp8.cpp b/example/65_gemm_multiply_multiply/gemm_multiply_multiply_xdl_fp8.cpp index 43637e4a1f..0982ef9830 100644 --- a/example/65_gemm_multiply_multiply/gemm_multiply_multiply_xdl_fp8.cpp +++ b/example/65_gemm_multiply_multiply/gemm_multiply_multiply_xdl_fp8.cpp @@ -21,6 +21,10 @@ #include "ck/utility/blkgemmpipe_scheduler.hpp" +using ::ck::DeviceMem; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + template using S = ck::Sequence; diff --git a/example/65_gemm_multiply_multiply/gemm_multiply_multiply_xdl_fp8_ab_scale.cpp b/example/65_gemm_multiply_multiply/gemm_multiply_multiply_xdl_fp8_ab_scale.cpp index 3b21f95119..6fde02553a 100644 --- a/example/65_gemm_multiply_multiply/gemm_multiply_multiply_xdl_fp8_ab_scale.cpp +++ b/example/65_gemm_multiply_multiply/gemm_multiply_multiply_xdl_fp8_ab_scale.cpp @@ -21,6 +21,10 @@ #include "ck/utility/blkgemmpipe_scheduler.hpp" +using ::ck::DeviceMem; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + template using S = ck::Sequence; diff --git a/example/65_gemm_multiply_multiply/gemm_multiply_multiply_xdl_fp8_blockscale_bpreshuffle.cpp b/example/65_gemm_multiply_multiply/gemm_multiply_multiply_xdl_fp8_blockscale_bpreshuffle.cpp index d64266bccf..3fd2a2ef4f 100644 --- a/example/65_gemm_multiply_multiply/gemm_multiply_multiply_xdl_fp8_blockscale_bpreshuffle.cpp +++ b/example/65_gemm_multiply_multiply/gemm_multiply_multiply_xdl_fp8_blockscale_bpreshuffle.cpp @@ -21,6 +21,10 @@ #include "ck/utility/blkgemmpipe_scheduler.hpp" +using ::ck::DeviceMem; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + template using S = ck::Sequence; diff --git a/example/65_gemm_multiply_multiply/gemm_multiply_multiply_xdl_fp8_bpreshuffle.cpp b/example/65_gemm_multiply_multiply/gemm_multiply_multiply_xdl_fp8_bpreshuffle.cpp index 3ee4955ae4..3c2499eafa 100644 --- a/example/65_gemm_multiply_multiply/gemm_multiply_multiply_xdl_fp8_bpreshuffle.cpp +++ b/example/65_gemm_multiply_multiply/gemm_multiply_multiply_xdl_fp8_bpreshuffle.cpp @@ -21,6 +21,10 @@ #include "ck/utility/blkgemmpipe_scheduler.hpp" +using ::ck::DeviceMem; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + template using S = ck::Sequence; diff --git a/example/65_gemm_multiply_multiply/gemm_multiply_multiply_xdl_int8.cpp b/example/65_gemm_multiply_multiply/gemm_multiply_multiply_xdl_int8.cpp index cc01d01e64..3530ca828b 100644 --- a/example/65_gemm_multiply_multiply/gemm_multiply_multiply_xdl_int8.cpp +++ b/example/65_gemm_multiply_multiply/gemm_multiply_multiply_xdl_int8.cpp @@ -21,6 +21,11 @@ #include "ck/utility/blkgemmpipe_scheduler.hpp" +using ::ck::DeviceMem; +using ::ck::hip_check_error; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + template using S = ck::Sequence; diff --git a/example/65_gemm_multiply_multiply/moe_gemm1_xdl_fp8.cpp b/example/65_gemm_multiply_multiply/moe_gemm1_xdl_fp8.cpp index 2cb2dc17f4..aeb34d88fd 100644 --- a/example/65_gemm_multiply_multiply/moe_gemm1_xdl_fp8.cpp +++ b/example/65_gemm_multiply_multiply/moe_gemm1_xdl_fp8.cpp @@ -21,6 +21,10 @@ #include "ck/utility/blkgemmpipe_scheduler.hpp" +using ::ck::DeviceMem; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + template using S = ck::Sequence; diff --git a/example/65_gemm_multiply_multiply/moe_gemm1_xdl_fp8_blockscale.cpp b/example/65_gemm_multiply_multiply/moe_gemm1_xdl_fp8_blockscale.cpp index bca5ffec78..eb568950c3 100644 --- a/example/65_gemm_multiply_multiply/moe_gemm1_xdl_fp8_blockscale.cpp +++ b/example/65_gemm_multiply_multiply/moe_gemm1_xdl_fp8_blockscale.cpp @@ -21,6 +21,10 @@ #include "ck/utility/blkgemmpipe_scheduler.hpp" +using ::ck::DeviceMem; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + template using S = ck::Sequence; diff --git a/example/65_gemm_multiply_multiply/moe_gemm1_xdl_pk_i4.cpp b/example/65_gemm_multiply_multiply/moe_gemm1_xdl_pk_i4.cpp index d14885e7f2..f31ecae7e7 100644 --- a/example/65_gemm_multiply_multiply/moe_gemm1_xdl_pk_i4.cpp +++ b/example/65_gemm_multiply_multiply/moe_gemm1_xdl_pk_i4.cpp @@ -21,6 +21,10 @@ #include "ck/utility/blkgemmpipe_scheduler.hpp" +using ::ck::DeviceMem; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + template using S = ck::Sequence; diff --git a/example/65_gemm_multiply_multiply/moe_gemm2_xdl_fp8.cpp b/example/65_gemm_multiply_multiply/moe_gemm2_xdl_fp8.cpp index d80c75abe8..974b0ba466 100644 --- a/example/65_gemm_multiply_multiply/moe_gemm2_xdl_fp8.cpp +++ b/example/65_gemm_multiply_multiply/moe_gemm2_xdl_fp8.cpp @@ -21,6 +21,10 @@ #include "ck/utility/blkgemmpipe_scheduler.hpp" +using ::ck::DeviceMem; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + template using S = ck::Sequence; diff --git a/example/65_gemm_multiply_multiply/moe_gemm2_xdl_fp8_blockscale.cpp b/example/65_gemm_multiply_multiply/moe_gemm2_xdl_fp8_blockscale.cpp index 02369f344e..66118fb1d1 100644 --- a/example/65_gemm_multiply_multiply/moe_gemm2_xdl_fp8_blockscale.cpp +++ b/example/65_gemm_multiply_multiply/moe_gemm2_xdl_fp8_blockscale.cpp @@ -21,6 +21,10 @@ #include "ck/utility/blkgemmpipe_scheduler.hpp" +using ::ck::DeviceMem; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + template using S = ck::Sequence; diff --git a/example/65_gemm_multiply_multiply/moe_gemm2_xdl_pk_i4.cpp b/example/65_gemm_multiply_multiply/moe_gemm2_xdl_pk_i4.cpp index cafea72559..617bb2461d 100644 --- a/example/65_gemm_multiply_multiply/moe_gemm2_xdl_pk_i4.cpp +++ b/example/65_gemm_multiply_multiply/moe_gemm2_xdl_pk_i4.cpp @@ -21,6 +21,10 @@ #include "ck/utility/blkgemmpipe_scheduler.hpp" +using ::ck::DeviceMem; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + template using S = ck::Sequence; diff --git a/example/66_complex_contraction_bilinear/run_complex_contraction_bilinear_example.inc b/example/66_complex_contraction_bilinear/run_complex_contraction_bilinear_example.inc index b08d12de86..9a0736fe33 100644 --- a/example/66_complex_contraction_bilinear/run_complex_contraction_bilinear_example.inc +++ b/example/66_complex_contraction_bilinear/run_complex_contraction_bilinear_example.inc @@ -15,6 +15,10 @@ #include "ck/library/utility/numeric.hpp" #include "ck/library/reference_tensor_operation/cpu/reference_contraction.hpp" +using ::ck::DeviceMem; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + int run_complex_contraction_bilinear_example(int argc, char* argv[]) { bool do_verification = true; diff --git a/example/67_gemm_microscaling/gemm_mx_common.hpp b/example/67_gemm_microscaling/gemm_mx_common.hpp index 2d0585c880..b056e7358d 100644 --- a/example/67_gemm_microscaling/gemm_mx_common.hpp +++ b/example/67_gemm_microscaling/gemm_mx_common.hpp @@ -20,6 +20,10 @@ #include "ck/library/utility/fill.hpp" #include "ck/library/utility/host_tensor.hpp" +using ::ck::DeviceMem; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + template using S = ck::Sequence; diff --git a/example/67_gemm_microscaling/moe_gemm1_xdl_mx_fp4.cpp b/example/67_gemm_microscaling/moe_gemm1_xdl_mx_fp4.cpp index 0c51a24679..9962e80fca 100644 --- a/example/67_gemm_microscaling/moe_gemm1_xdl_mx_fp4.cpp +++ b/example/67_gemm_microscaling/moe_gemm1_xdl_mx_fp4.cpp @@ -21,6 +21,10 @@ #include "ck/library/utility/fill.hpp" #include "ck/utility/blkgemmpipe_scheduler.hpp" +using ::ck::DeviceMem; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + template using S = ck::Sequence; diff --git a/example/67_gemm_microscaling/moe_gemm1_xdl_mx_fp4_bns.cpp b/example/67_gemm_microscaling/moe_gemm1_xdl_mx_fp4_bns.cpp index b6d5d8f211..8354c25e22 100644 --- a/example/67_gemm_microscaling/moe_gemm1_xdl_mx_fp4_bns.cpp +++ b/example/67_gemm_microscaling/moe_gemm1_xdl_mx_fp4_bns.cpp @@ -21,6 +21,10 @@ #include "ck/library/utility/fill.hpp" #include "ck/utility/blkgemmpipe_scheduler.hpp" +using ::ck::DeviceMem; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + template using S = ck::Sequence; diff --git a/example/67_gemm_microscaling/moe_gemm1_xdl_mx_fp4_bpreshuffle.cpp b/example/67_gemm_microscaling/moe_gemm1_xdl_mx_fp4_bpreshuffle.cpp index ebb73ca7e0..74006909ee 100644 --- a/example/67_gemm_microscaling/moe_gemm1_xdl_mx_fp4_bpreshuffle.cpp +++ b/example/67_gemm_microscaling/moe_gemm1_xdl_mx_fp4_bpreshuffle.cpp @@ -21,6 +21,10 @@ #include "ck/library/utility/fill.hpp" #include "ck/utility/blkgemmpipe_scheduler.hpp" +using ::ck::DeviceMem; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + template using S = ck::Sequence; diff --git a/example/67_gemm_microscaling/moe_gemm2_xdl_mx_fp4.cpp b/example/67_gemm_microscaling/moe_gemm2_xdl_mx_fp4.cpp index 61a63b47ac..d3ecbd226a 100644 --- a/example/67_gemm_microscaling/moe_gemm2_xdl_mx_fp4.cpp +++ b/example/67_gemm_microscaling/moe_gemm2_xdl_mx_fp4.cpp @@ -21,6 +21,10 @@ #include "ck/library/utility/fill.hpp" #include "ck/utility/blkgemmpipe_scheduler.hpp" +using ::ck::DeviceMem; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + template using S = ck::Sequence; diff --git a/example/67_gemm_microscaling/moe_gemm2_xdl_mx_fp4_bns.cpp b/example/67_gemm_microscaling/moe_gemm2_xdl_mx_fp4_bns.cpp index 2670468c4b..62d9868881 100644 --- a/example/67_gemm_microscaling/moe_gemm2_xdl_mx_fp4_bns.cpp +++ b/example/67_gemm_microscaling/moe_gemm2_xdl_mx_fp4_bns.cpp @@ -21,6 +21,10 @@ #include "ck/library/utility/fill.hpp" #include "ck/utility/blkgemmpipe_scheduler.hpp" +using ::ck::DeviceMem; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + template using S = ck::Sequence; diff --git a/example/67_gemm_microscaling/moe_gemm2_xdl_mx_fp4_bpreshuffle.cpp b/example/67_gemm_microscaling/moe_gemm2_xdl_mx_fp4_bpreshuffle.cpp index c3454be84a..310737eaa8 100644 --- a/example/67_gemm_microscaling/moe_gemm2_xdl_mx_fp4_bpreshuffle.cpp +++ b/example/67_gemm_microscaling/moe_gemm2_xdl_mx_fp4_bpreshuffle.cpp @@ -21,6 +21,10 @@ #include "ck/library/utility/fill.hpp" #include "ck/utility/blkgemmpipe_scheduler.hpp" +using ::ck::DeviceMem; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + template using S = ck::Sequence; diff --git a/example/68_gemm_add/common.hpp b/example/68_gemm_add/common.hpp index 38e77a160f..a83e540cc6 100644 --- a/example/68_gemm_add/common.hpp +++ b/example/68_gemm_add/common.hpp @@ -29,6 +29,10 @@ #include "ck/library/utility/host_tensor_generator.hpp" #include "ck/library/utility/literals.hpp" +using ::ck::DeviceMem; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + template using S = ck::Sequence; diff --git a/example/69_gemm_add_relu/common.hpp b/example/69_gemm_add_relu/common.hpp index 311cbb2dfb..9d2bb0ba5c 100644 --- a/example/69_gemm_add_relu/common.hpp +++ b/example/69_gemm_add_relu/common.hpp @@ -29,6 +29,10 @@ #include "ck/library/utility/host_tensor_generator.hpp" #include "ck/library/utility/literals.hpp" +using ::ck::DeviceMem; +using ::ck::HostTensorDescriptor; +using ::ck::Tensor; + template using S = ck::Sequence; diff --git a/experimental/builder/test/conv/test_conv_traits.cpp b/experimental/builder/test/conv/test_conv_traits.cpp index ca453d2ad4..b666db0836 100644 --- a/experimental/builder/test/conv/test_conv_traits.cpp +++ b/experimental/builder/test/conv/test_conv_traits.cpp @@ -1,5 +1,5 @@ +// Copyright (C) Advanced Micro Devices, Inc., or its affiliates. // SPDX-License-Identifier: MIT -// Copyright (c) 2025, Advanced Micro Devices, Inc. All rights reserved. #include #include diff --git a/include/ck/host_utility/hip_check_error.hpp b/include/ck/host_utility/hip_check_error.hpp index 92ee97e8fe..0035e75d52 100644 --- a/include/ck/host_utility/hip_check_error.hpp +++ b/include/ck/host_utility/hip_check_error.hpp @@ -6,6 +6,8 @@ #include #include +namespace ck { + // To be removed, which really does not tell the location of failed HIP functional call inline void hip_check_error(hipError_t x) { @@ -30,3 +32,5 @@ inline void hip_check_error(hipError_t x) throw std::runtime_error(ostr.str()); \ } \ } while(0) + +} // namespace ck diff --git a/include/ck/host_utility/io.hpp b/include/ck/host_utility/io.hpp index 9608052001..db45199b17 100644 --- a/include/ck/host_utility/io.hpp +++ b/include/ck/host_utility/io.hpp @@ -10,6 +10,8 @@ #include "ck/tensor_description/tensor_descriptor.hpp" +namespace ck { + template std::ostream& operator<<(std::ostream& os, const std::vector& v) { @@ -25,17 +27,19 @@ std::ostream& operator<<(std::ostream& os, const std::array& v) } template -std::ostream& operator<<(std::ostream& os, const ck::TensorDescriptor& desc) +std::ostream& operator<<(std::ostream& os, const TensorDescriptor& desc) { - constexpr ck::index_t nDim = ck::remove_cvref_t::GetNumOfDimension(); + constexpr index_t nDim = remove_cvref_t::GetNumOfDimension(); os << "{"; - ck::static_for<0, nDim - 1, 1>{}([&](auto i) { os << desc.GetLength(i) << ", "; }); + static_for<0, nDim - 1, 1>{}([&](auto i) { os << desc.GetLength(i) << ", "; }); - os << desc.GetLength(ck::Number{}); + os << desc.GetLength(Number{}); os << "}"; return os; } + +} // namespace ck diff --git a/include/ck/host_utility/kernel_launch.hpp b/include/ck/host_utility/kernel_launch.hpp index 80322b66be..c923ee0cbe 100644 --- a/include/ck/host_utility/kernel_launch.hpp +++ b/include/ck/host_utility/kernel_launch.hpp @@ -10,6 +10,8 @@ #include "ck/stream_config.hpp" #include "ck/host_utility/hip_check_error.hpp" +namespace ck { + template float launch_and_time_kernel(const StreamConfig& stream_config, F kernel, @@ -167,4 +169,7 @@ float launch_and_time_kernel_with_preprocess(const StreamConfig& stream_config, return 0; #endif } + +} // namespace ck + #endif diff --git a/include/ck/host_utility/stream_utility.hpp b/include/ck/host_utility/stream_utility.hpp index 2304921108..7dad8eadac 100644 --- a/include/ck/host_utility/stream_utility.hpp +++ b/include/ck/host_utility/stream_utility.hpp @@ -8,6 +8,8 @@ #include "ck/stream_config.hpp" #include "ck/host_utility/hip_check_error.hpp" +namespace ck { + static inline int getAvailableComputeUnitCount(const StreamConfig& stream_config) { constexpr int MAX_MASK_DWORDS = 64; @@ -41,3 +43,5 @@ static inline int getAvailableComputeUnitCount(const StreamConfig& stream_config return (ret); }; + +} // namespace ck diff --git a/include/ck/library/utility/device_memory.hpp b/include/ck/library/utility/device_memory.hpp index 54ecdc1972..b0ee766ff5 100644 --- a/include/ck/library/utility/device_memory.hpp +++ b/include/ck/library/utility/device_memory.hpp @@ -5,6 +5,8 @@ #include +namespace ck { + template __global__ void set_buffer_value(T* p, T x, uint64_t buffer_element_size) { @@ -48,3 +50,5 @@ void DeviceMem::SetValue(T x) const set_buffer_value<<<1, 1024>>>(static_cast(mpDeviceBuf), x, mMemSize / sizeof(T)); } + +} // namespace ck diff --git a/include/ck/library/utility/host_tensor.hpp b/include/ck/library/utility/host_tensor.hpp index 4e7ea80219..05bc4ded12 100644 --- a/include/ck/library/utility/host_tensor.hpp +++ b/include/ck/library/utility/host_tensor.hpp @@ -23,6 +23,8 @@ #include "ck/tensor_operation/gpu/device/tensor_layout.hpp" +namespace ck { + template std::ostream& LogRange(std::ostream& os, Range&& range, std::string delim) { @@ -1159,3 +1161,5 @@ struct Tensor Descriptor mDesc; Data mData; }; + +} // namespace ck diff --git a/library/include/ck/library/reference_tensor_operation/gpu/reference_gemm.hpp b/library/include/ck/library/reference_tensor_operation/gpu/reference_gemm.hpp index 7b8437f022..c05c0605bf 100644 --- a/library/include/ck/library/reference_tensor_operation/gpu/reference_gemm.hpp +++ b/library/include/ck/library/reference_tensor_operation/gpu/reference_gemm.hpp @@ -8,6 +8,7 @@ #include "ck/tensor_operation/gpu/element/unary_element_wise_operation.hpp" #include "ck/tensor_operation/gpu/device/device_base.hpp" +#include "ck/host_utility/kernel_launch.hpp" namespace ck { @@ -209,20 +210,20 @@ struct ReferenceGemm : public device::BaseOperator ComputeTypeA, ComputeTypeB>; - return launch_and_time_kernel(stream_config, - kernel, - grid_dim, - block_dim, - 0, - arg.p_a_grid_, - arg.p_b_grid_, - arg.p_c_grid_, - arg.m_, - arg.n_, - arg.k_, - arg.a_element_op_, - arg.b_element_op_, - arg.c_element_op_); + return ck::launch_and_time_kernel(stream_config, + kernel, + grid_dim, + block_dim, + 0, + arg.p_a_grid_, + arg.p_b_grid_, + arg.p_c_grid_, + arg.m_, + arg.n_, + arg.k_, + arg.a_element_op_, + arg.b_element_op_, + arg.c_element_op_); }; return launch_kernel(); diff --git a/library/src/utility/convolution_parameter.cpp b/library/src/utility/convolution_parameter.cpp index 634b7f0890..edc6e13826 100644 --- a/library/src/utility/convolution_parameter.cpp +++ b/library/src/utility/convolution_parameter.cpp @@ -1,5 +1,5 @@ +// Copyright (C) Advanced Micro Devices, Inc., or its affiliates. // SPDX-License-Identifier: MIT -// Copyright (c) 2018-2024, Advanced Micro Devices, Inc. All rights reserved. #include "ck/host_utility/io.hpp" @@ -215,6 +215,7 @@ ck::utils::conv::ConvParam parse_conv_param(int num_dim_spatial, int arg_idx, ch std::ostream& operator<<(std::ostream& os, const ck::utils::conv::ConvParam& p) { + using ck::operator<<; os << "ConvParam {" << "\nnum_dim_spatial: " << p.num_dim_spatial_ << "\nG: " << p.G_ << "\nN: " << p.N_ << "\nK: " << p.K_ << "\nC: " << p.C_ << "\nfilter_spatial_lengths: " << p.filter_spatial_lengths_ diff --git a/library/src/utility/device_memory.cpp b/library/src/utility/device_memory.cpp index 61b6326b57..6e7b58029a 100644 --- a/library/src/utility/device_memory.cpp +++ b/library/src/utility/device_memory.cpp @@ -1,10 +1,12 @@ +// Copyright (c) Advanced Micro Devices, Inc., or its affiliates. // SPDX-License-Identifier: MIT -// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved. #include "ck/host_utility/hip_check_error.hpp" #include "ck/library/utility/device_memory.hpp" +namespace ck { + DeviceMem::DeviceMem(std::size_t mem_size) : mMemSize(mem_size) { hip_check_error(hipMalloc(static_cast(&mpDeviceBuf), mMemSize)); @@ -74,3 +76,5 @@ DeviceMem::~DeviceMem() hip_check_error(hipFree(mpDeviceBuf)); } } + +} // namespace ck diff --git a/library/src/utility/host_tensor.cpp b/library/src/utility/host_tensor.cpp index cc394f2535..4f6cfaa346 100644 --- a/library/src/utility/host_tensor.cpp +++ b/library/src/utility/host_tensor.cpp @@ -1,10 +1,12 @@ +// Copyright (c) Advanced Micro Devices, Inc., or its affiliates. // SPDX-License-Identifier: MIT -// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved. #include #include "ck/library/utility/host_tensor.hpp" +namespace ck { + std::size_t HostTensorDescriptor::GetNumOfDimension() const { return mLens.size(); } std::size_t HostTensorDescriptor::GetElementSize() const @@ -56,3 +58,5 @@ std::ostream& operator<<(std::ostream& os, HostTensorDescriptor::ChosenLayout ta } return os; } + +} // namespace ck diff --git a/test/conv_tensor_rearrange/test_conv_tensor_rearrange_interface.cpp b/test/conv_tensor_rearrange/test_conv_tensor_rearrange_interface.cpp index 36d31d53fa..5de0cc13a7 100644 --- a/test/conv_tensor_rearrange/test_conv_tensor_rearrange_interface.cpp +++ b/test/conv_tensor_rearrange/test_conv_tensor_rearrange_interface.cpp @@ -19,6 +19,8 @@ #include +using ::ck::HostTensorDescriptor; + using DataType = float; using ImLayout = ck::tensor_layout::convolution::GNWC; diff --git a/test/data_type/test_bf6.cpp b/test/data_type/test_bf6.cpp index 904cd302dc..51361ddbb6 100644 --- a/test/data_type/test_bf6.cpp +++ b/test/data_type/test_bf6.cpp @@ -8,6 +8,8 @@ #include "ck/utility/scaled_type_convert.hpp" #include "ck/library/utility/device_memory.hpp" +using ::ck::DeviceMem; + using ck::bf6_convert_rne; using ck::bf6_convert_sr; using ck::bf6_t; diff --git a/test/data_type/test_bf8_ocp.cpp b/test/data_type/test_bf8_ocp.cpp index 285e7e69fc..9b9b6e3b53 100644 --- a/test/data_type/test_bf8_ocp.cpp +++ b/test/data_type/test_bf8_ocp.cpp @@ -6,6 +6,8 @@ #include "ck/utility/data_type.hpp" #include "ck/utility/type_convert.hpp" +using ::ck::DeviceMem; + using ck::bf8_ocp_t; using ck::bf8x2_ocp_t; using ck::bhalf2_t; diff --git a/test/data_type/test_bhalf.cpp b/test/data_type/test_bhalf.cpp index ad31e194b8..6d3d9e0f0b 100644 --- a/test/data_type/test_bhalf.cpp +++ b/test/data_type/test_bhalf.cpp @@ -9,6 +9,8 @@ #include "ck/utility/type_convert.hpp" #include "ck/host_utility/hip_check_error.hpp" +using ::ck::hip_check_error; + using ck::bhalf_t; using ck::type_convert; diff --git a/test/data_type/test_fp6.cpp b/test/data_type/test_fp6.cpp index 14afe3e2e4..7a51ac514f 100644 --- a/test/data_type/test_fp6.cpp +++ b/test/data_type/test_fp6.cpp @@ -8,6 +8,8 @@ #include "ck/utility/scaled_type_convert.hpp" #include "ck/library/utility/device_memory.hpp" +using ::ck::DeviceMem; + using ck::e8m0_bexp_t; using ck::f6_convert_rne; using ck::f6_convert_sr; diff --git a/test/data_type/test_fp8_ocp.cpp b/test/data_type/test_fp8_ocp.cpp index bf562112c8..552b4ca871 100644 --- a/test/data_type/test_fp8_ocp.cpp +++ b/test/data_type/test_fp8_ocp.cpp @@ -6,6 +6,8 @@ #include "ck/utility/data_type.hpp" #include "ck/utility/type_convert.hpp" +using ::ck::DeviceMem; + using ck::bhalf2_t; using ck::bhalf_t; using ck::f8_convert_rne; diff --git a/test/data_type/test_int4.cpp b/test/data_type/test_int4.cpp index 07549c1c48..b0ec1a6646 100644 --- a/test/data_type/test_int4.cpp +++ b/test/data_type/test_int4.cpp @@ -14,6 +14,8 @@ #include "ck/utility/get_id.hpp" #include "ck/library/utility/device_memory.hpp" +using ::ck::DeviceMem; + using ck::int4_t; TEST(Int4, BaseArithmetic) diff --git a/test/data_type/test_mx_bf8.cpp b/test/data_type/test_mx_bf8.cpp index 9f947e1d78..cc0cbb2b3b 100644 --- a/test/data_type/test_mx_bf8.cpp +++ b/test/data_type/test_mx_bf8.cpp @@ -5,6 +5,8 @@ #include "ck/library/utility/device_memory.hpp" #include "ck/utility/scaled_type_convert.hpp" +using ::ck::DeviceMem; + using ck::bf8_ocp_t; using ck::bf8x16_ocp_t; using ck::bf8x2_ocp_t; diff --git a/test/data_type/test_mx_fp4.cpp b/test/data_type/test_mx_fp4.cpp index c8059fa097..1a31e10fac 100644 --- a/test/data_type/test_mx_fp4.cpp +++ b/test/data_type/test_mx_fp4.cpp @@ -5,6 +5,8 @@ #include "ck/library/utility/device_memory.hpp" #include "ck/utility/scaled_type_convert.hpp" +using ::ck::DeviceMem; + using ck::e8m0_bexp_t; using ck::float16_t; using ck::float2_t; diff --git a/test/data_type/test_mx_fp8.cpp b/test/data_type/test_mx_fp8.cpp index 3a2bd4b88b..7d6c660bc8 100644 --- a/test/data_type/test_mx_fp8.cpp +++ b/test/data_type/test_mx_fp8.cpp @@ -19,6 +19,8 @@ using ck::scaled_type_convert; using ck::type_convert; using ck::fp8_impl::fp8x2_storage_t; +using ::ck::DeviceMem; + constexpr uint64_t test_size = 256 * 256 + 2 + 4 + 6; /** diff --git a/test/data_type/test_pk_i4.cpp b/test/data_type/test_pk_i4.cpp index 52273d45de..101c9f926f 100644 --- a/test/data_type/test_pk_i4.cpp +++ b/test/data_type/test_pk_i4.cpp @@ -16,6 +16,8 @@ #include "ck/tensor_operation/gpu/element/unary_element_wise_operation.hpp" +using ::ck::DeviceMem; + using ck::bhalf2_t; using ck::bhalf_t; using ck::float2_t; diff --git a/test/grouped_convnd_bwd_weight/test_grouped_conv_bwd_weight_xdl_bilinear.cpp b/test/grouped_convnd_bwd_weight/test_grouped_conv_bwd_weight_xdl_bilinear.cpp index b3ed49ed8c..2c4b847a5d 100644 --- a/test/grouped_convnd_bwd_weight/test_grouped_conv_bwd_weight_xdl_bilinear.cpp +++ b/test/grouped_convnd_bwd_weight/test_grouped_conv_bwd_weight_xdl_bilinear.cpp @@ -23,6 +23,9 @@ #include "ck/library/utility/convolution_host_tensor_descriptor_helper.hpp" #include "ck/library/reference_tensor_operation/cpu/reference_conv_bwd_weight.hpp" +using ::ck::DeviceMem; +using ::ck::Tensor; + static ck::index_t param_mask = 0xffff; static ck::index_t instance_index = -1; template diff --git a/test/magic_number_division/magic_number_division.cpp b/test/magic_number_division/magic_number_division.cpp index d21d57a157..1311f40d32 100644 --- a/test/magic_number_division/magic_number_division.cpp +++ b/test/magic_number_division/magic_number_division.cpp @@ -13,6 +13,8 @@ #include "ck/library/utility/host_tensor.hpp" #include "ck/library/utility/host_tensor_generator.hpp" +using ::ck::DeviceMem; + __global__ void gpu_magic_number_division(uint32_t magic_multiplier, uint32_t magic_shift, const int32_t* p_dividend, diff --git a/test/reference_conv_fwd/reference_conv_fwd.cpp b/test/reference_conv_fwd/reference_conv_fwd.cpp index 45345cccfa..298cb96af4 100644 --- a/test/reference_conv_fwd/reference_conv_fwd.cpp +++ b/test/reference_conv_fwd/reference_conv_fwd.cpp @@ -22,6 +22,8 @@ namespace { +using ::ck::Tensor; + using InElementOp = ck::tensor_operation::element_wise::PassThrough; using WeiElementOp = ck::tensor_operation::element_wise::PassThrough; using OutElementOp = ck::tensor_operation::element_wise::PassThrough; diff --git a/test/wrapper/test_wrapper_copy.cpp b/test/wrapper/test_wrapper_copy.cpp index 4721006435..b25bf8f1f0 100644 --- a/test/wrapper/test_wrapper_copy.cpp +++ b/test/wrapper/test_wrapper_copy.cpp @@ -16,6 +16,8 @@ #include "ck/wrapper/tensor.hpp" #include "ck/wrapper/operations/copy.hpp" +using ::ck::DeviceMem; + // Test copy from Global to Global through LDS and VGPR template void CheckResult(const std::vector& a_data, const std::vector& b_data, diff --git a/test/wrapper/test_wrapper_tensor.cpp b/test/wrapper/test_wrapper_tensor.cpp index 3c7d877528..25b8626cac 100644 --- a/test/wrapper/test_wrapper_tensor.cpp +++ b/test/wrapper/test_wrapper_tensor.cpp @@ -16,6 +16,9 @@ #include "ck/wrapper/layout.hpp" #include "ck/wrapper/tensor.hpp" +using ::ck::DeviceMem; +using ::ck::launch_and_time_kernel; + // Compare data in tensor with offset from layout. // Data and offset should match if physical memory has been initialized with // sequentially increasing values from 0.