diff --git a/include/ck/tensor_operation/gpu/block/blockwise_gemm_pipeline_xdlops_v1_ab_scale.hpp b/include/ck/tensor_operation/gpu/block/blockwise_gemm_pipeline_xdlops_v1_ab_scale.hpp index 121593d3cc..821bbb0051 100644 --- a/include/ck/tensor_operation/gpu/block/blockwise_gemm_pipeline_xdlops_v1_ab_scale.hpp +++ b/include/ck/tensor_operation/gpu/block/blockwise_gemm_pipeline_xdlops_v1_ab_scale.hpp @@ -308,7 +308,7 @@ struct BlockwiseGemmXdlops_pipeline_v1_ab_scale::type; - xdlops_gemm.template Run( + xdlops_gemm.template Run<>( a_thread_vec.template AsType(), b_thread_vec.template AsType(), c_thread_buf_per_scale.GetVectorTypeReference(I0)); @@ -390,9 +390,10 @@ struct BlockwiseGemmXdlops_pipeline_v1_ab_scale::type; - xdlops_gemm.template Run(a_thread_vec.template AsType(), - b_thread_vec.template AsType(), - c_thread_buf_per_scale.GetVectorTypeReference(I0)); + xdlops_gemm.template Run<>( + a_thread_vec.template AsType(), + b_thread_vec.template AsType(), + c_thread_buf_per_scale.GetVectorTypeReference(I0)); }); static_for<0, xdlops_gemm.GetRegSizePerXdlops(), 1>{}([&](auto t) { constexpr index_t c_offset = diff --git a/include/ck/tensor_operation/gpu/block/blockwise_gemm_pipeline_xdlops_v2_ab_scale.hpp b/include/ck/tensor_operation/gpu/block/blockwise_gemm_pipeline_xdlops_v2_ab_scale.hpp index cb7cf605be..40fa776484 100644 --- a/include/ck/tensor_operation/gpu/block/blockwise_gemm_pipeline_xdlops_v2_ab_scale.hpp +++ b/include/ck/tensor_operation/gpu/block/blockwise_gemm_pipeline_xdlops_v2_ab_scale.hpp @@ -350,7 +350,7 @@ struct BlockwiseGemmXdlops_pipeline_v2_ab_scale::type; - xdlops_gemm.template Run( + xdlops_gemm.template Run<>( a_thread_vec.template AsType(), b_thread_vec.template AsType(), c_thread_buf_per_scale.GetVectorTypeReference(I0)); @@ -443,7 +443,7 @@ struct BlockwiseGemmXdlops_pipeline_v2_ab_scale::type; - xdlops_gemm.template Run( + xdlops_gemm.template Run<>( a_thread_vec.template AsType(), b_thread_vec.template AsType(), c_thread_buf_per_scale.GetVectorTypeReference(I0)); @@ -518,9 +518,10 @@ struct BlockwiseGemmXdlops_pipeline_v2_ab_scale::type; - xdlops_gemm.template Run(a_thread_vec.template AsType(), - b_thread_vec.template AsType(), - c_thread_buf_per_scale.GetVectorTypeReference(I0)); + xdlops_gemm.template Run<>( + a_thread_vec.template AsType(), + b_thread_vec.template AsType(), + c_thread_buf_per_scale.GetVectorTypeReference(I0)); }); static_for<0, xdlops_gemm.GetRegSizePerXdlops(), 1>{}([&](auto t) { constexpr index_t c_offset = @@ -575,9 +576,10 @@ struct BlockwiseGemmXdlops_pipeline_v2_ab_scale::type; - xdlops_gemm.template Run(a_thread_vec.template AsType(), - b_thread_vec.template AsType(), - c_thread_buf_per_scale.GetVectorTypeReference(I0)); + xdlops_gemm.template Run<>( + a_thread_vec.template AsType(), + b_thread_vec.template AsType(), + c_thread_buf_per_scale.GetVectorTypeReference(I0)); }); static_for<0, xdlops_gemm.GetRegSizePerXdlops(), 1>{}([&](auto t) { constexpr index_t c_offset = diff --git a/include/ck/tensor_operation/gpu/block/blockwise_gemm_pipeline_xdlops_v3_ab_scale.hpp b/include/ck/tensor_operation/gpu/block/blockwise_gemm_pipeline_xdlops_v3_ab_scale.hpp index 66c9a5c339..de542866a6 100644 --- a/include/ck/tensor_operation/gpu/block/blockwise_gemm_pipeline_xdlops_v3_ab_scale.hpp +++ b/include/ck/tensor_operation/gpu/block/blockwise_gemm_pipeline_xdlops_v3_ab_scale.hpp @@ -427,7 +427,7 @@ struct BlockwiseGemmXdlops_pipeline_v3_ab_scale::type; - xdlops_gemm.template Run( + xdlops_gemm.template Run<>( a_thread_vec.template AsType(), b_thread_vec.template AsType(), c_thread_buf_per_scale.GetVectorTypeReference(I0)); @@ -504,9 +504,10 @@ struct BlockwiseGemmXdlops_pipeline_v3_ab_scale::type; - xdlops_gemm.template Run(a_thread_vec.template AsType(), - b_thread_vec.template AsType(), - c_thread_buf_per_scale.GetVectorTypeReference(I0)); + xdlops_gemm.template Run<>( + a_thread_vec.template AsType(), + b_thread_vec.template AsType(), + c_thread_buf_per_scale.GetVectorTypeReference(I0)); }); static_for<0, xdlops_gemm.GetRegSizePerXdlops(), 1>{}([&](auto t) { constexpr index_t c_offset =