From 8e42e29d7b64dfa12d15bb85932ce9dd0f334065 Mon Sep 17 00:00:00 2001 From: Bartlomiej Kocot Date: Fri, 13 Jun 2025 15:45:29 +0000 Subject: [PATCH] test gk bias --- ...ofile_grouped_conv_fwd_bias_clamp_impl.hpp | 46 ++++++++- .../CMakeLists.txt | 3 + .../test_grouped_convnd_fwd_bias_clamp.cpp | 3 +- .../test_grouped_convnd_fwd_gk_bias_clamp.cpp | 93 +++++++++++++++++++ 4 files changed, 140 insertions(+), 5 deletions(-) create mode 100644 test/grouped_convnd_fwd_activation/test_grouped_convnd_fwd_gk_bias_clamp.cpp diff --git a/profiler/include/profiler/profile_grouped_conv_fwd_bias_clamp_impl.hpp b/profiler/include/profiler/profile_grouped_conv_fwd_bias_clamp_impl.hpp index 3ef9f4505d..7035f3e52f 100644 --- a/profiler/include/profiler/profile_grouped_conv_fwd_bias_clamp_impl.hpp +++ b/profiler/include/profiler/profile_grouped_conv_fwd_bias_clamp_impl.hpp @@ -25,6 +25,23 @@ namespace ck { namespace profiler { +template +auto get_bias_desc(ck::index_t G, ck::index_t K) +{ + if constexpr(NDimSpatial == 1) + { + return HostTensorDescriptor({G, 1, K, 1}, {K, 0, 1, 0}); + } + else if constexpr(NDimSpatial == 2) + { + return HostTensorDescriptor({G, 1, K, 1, 1}, {K, 0, 1, 0, 0}); + } + else + { + return HostTensorDescriptor({G, 1, K, 1, 1, 1}, {K, 0, 1, 0, 0, 0}); + } +} + template + typename IndexType = ck::index_t, + bool BiasGK = false> bool profile_grouped_conv_fwd_bias_clamp_impl(int do_verification, int init_method, bool do_log, @@ -61,12 +79,16 @@ bool profile_grouped_conv_fwd_bias_clamp_impl(int do_verification, const auto out_g_n_k_wos_desc = ck::utils::conv::make_output_host_tensor_descriptor_g_n_k_wos_packed(conv_param); + const index_t G = conv_param.G_; + const index_t K = conv_param.K_; + std::array a_g_n_c_wis_lengths{}; std::array a_g_n_c_wis_strides{}; std::array b_g_k_c_xs_lengths{}; std::array b_g_k_c_xs_strides{}; std::array e_g_n_k_wos_lengths{}; std::array e_g_n_k_wos_strides{}; + std::array d_g_n_k_wos_strides{}; std::array conv_filter_strides{}; std::array conv_filter_dilations{}; std::array input_left_pads{}; @@ -80,6 +102,7 @@ bool profile_grouped_conv_fwd_bias_clamp_impl(int do_verification, copy(wei_g_k_c_xs_desc.GetStrides(), b_g_k_c_xs_strides); copy(out_g_n_k_wos_desc.GetLengths(), e_g_n_k_wos_lengths); copy(out_g_n_k_wos_desc.GetStrides(), e_g_n_k_wos_strides); + copy(out_g_n_k_wos_desc.GetStrides(), d_g_n_k_wos_strides); copy(conv_param.conv_filter_strides_, conv_filter_strides); copy(conv_param.conv_filter_dilations_, conv_filter_dilations); copy(conv_param.input_left_pads_, input_left_pads); @@ -89,7 +112,8 @@ bool profile_grouped_conv_fwd_bias_clamp_impl(int do_verification, Tensor weight(wei_g_k_c_xs_desc); Tensor host_output(out_g_n_k_wos_desc); Tensor device_output(out_g_n_k_wos_desc); - Tensor bias(out_g_n_k_wos_desc); + const auto bias_desc = BiasGK ? get_bias_desc(G, K) : out_g_n_k_wos_desc; + Tensor bias(bias_desc); std::cout << "input: " << input.mDesc << std::endl; std::cout << "weight: " << weight.mDesc << std::endl; @@ -113,7 +137,11 @@ bool profile_grouped_conv_fwd_bias_clamp_impl(int do_verification, DeviceMem in_device_buf(sizeof(InDataType) * input.mDesc.GetElementSpaceSize()); DeviceMem wei_device_buf(sizeof(WeiDataType) * weight.mDesc.GetElementSpaceSize()); DeviceMem out_device_buf(sizeof(OutDataType) * device_output.mDesc.GetElementSpaceSize()); - DeviceMem bias_device_buf(sizeof(OutDataType) * bias.mDesc.GetElementSpaceSize()); + + const std::size_t bias_dev_buf_size = + BiasGK ? sizeof(OutDataType) * G * K + : sizeof(OutDataType) * device_output.mDesc.GetElementSpaceSize(); + DeviceMem bias_device_buf(bias_dev_buf_size); in_device_buf.ToDevice(input.mData.data()); wei_device_buf.ToDevice(weight.mData.data()); @@ -244,6 +272,16 @@ bool profile_grouped_conv_fwd_bias_clamp_impl(int do_verification, std::cout << "ckProfiler found " << op_ptrs.size() << " instances" << std::endl; + if constexpr(BiasGK) + { + constexpr ck::index_t spatial_offset = 3; + d_g_n_k_wos_strides[1] = 0; + for(int i = 0; i < NDimSpatial; i++) + { + d_g_n_k_wos_strides[i + spatial_offset] = 0; + } + } + for(auto& op_ptr : op_ptrs) { auto argument_ptr = op_ptr->MakeArgumentPointer(in_device_buf.GetDeviceBuffer(), @@ -255,7 +293,7 @@ bool profile_grouped_conv_fwd_bias_clamp_impl(int do_verification, b_g_k_c_xs_lengths, b_g_k_c_xs_strides, {e_g_n_k_wos_lengths}, - {e_g_n_k_wos_strides}, + {d_g_n_k_wos_strides}, e_g_n_k_wos_lengths, e_g_n_k_wos_strides, conv_filter_strides, diff --git a/test/grouped_convnd_fwd_activation/CMakeLists.txt b/test/grouped_convnd_fwd_activation/CMakeLists.txt index 2a6528ff64..8bded647b6 100644 --- a/test/grouped_convnd_fwd_activation/CMakeLists.txt +++ b/test/grouped_convnd_fwd_activation/CMakeLists.txt @@ -2,6 +2,9 @@ if(GPU_TARGETS MATCHES "gfx9") add_gtest_executable(test_grouped_convnd_fwd_bias_clamp test_grouped_convnd_fwd_bias_clamp.cpp) target_link_libraries(test_grouped_convnd_fwd_bias_clamp PRIVATE utility device_grouped_conv2d_fwd_bias_clamp_instance device_grouped_conv3d_fwd_bias_clamp_instance) + add_gtest_executable(test_grouped_convnd_fwd_gk_bias_clamp test_grouped_convnd_fwd_gk_bias_clamp.cpp) + target_link_libraries(test_grouped_convnd_fwd_gk_bias_clamp PRIVATE utility device_grouped_conv2d_fwd_bias_clamp_instance device_grouped_conv3d_fwd_bias_clamp_instance) + add_gtest_executable(test_grouped_convnd_fwd_clamp test_grouped_convnd_fwd_clamp.cpp) target_link_libraries(test_grouped_convnd_fwd_clamp PRIVATE utility device_grouped_conv2d_fwd_clamp_instance device_grouped_conv3d_fwd_clamp_instance) endif() diff --git a/test/grouped_convnd_fwd_activation/test_grouped_convnd_fwd_bias_clamp.cpp b/test/grouped_convnd_fwd_activation/test_grouped_convnd_fwd_bias_clamp.cpp index 7d5437d247..f3a569115a 100644 --- a/test/grouped_convnd_fwd_activation/test_grouped_convnd_fwd_bias_clamp.cpp +++ b/test/grouped_convnd_fwd_activation/test_grouped_convnd_fwd_bias_clamp.cpp @@ -41,7 +41,8 @@ class TestGroupedConvndFwd : public ::testing::Test DataType, DataType, DataType, - IndexType>( + IndexType, + false /*BiasGK*/>( true, // do_verification 1, // init_method: integer value false, // do_log diff --git a/test/grouped_convnd_fwd_activation/test_grouped_convnd_fwd_gk_bias_clamp.cpp b/test/grouped_convnd_fwd_activation/test_grouped_convnd_fwd_gk_bias_clamp.cpp new file mode 100644 index 0000000000..0a41eac286 --- /dev/null +++ b/test/grouped_convnd_fwd_activation/test_grouped_convnd_fwd_gk_bias_clamp.cpp @@ -0,0 +1,93 @@ +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +#include +#include +#include +#include +#include + +#include "profiler/profile_grouped_conv_fwd_bias_clamp_impl.hpp" + +#include "ck/tensor_operation/gpu/element/element_wise_operation.hpp" + +using AddClamp = ck::tensor_operation::element_wise::AddClamp; + +template +class TestGroupedConvndFwd : public ::testing::Test +{ + protected: + using DataType = std::tuple_element_t<0, Tuple>; + using InLayout = std::tuple_element_t<1, Tuple>; + using WeiLayout = std::tuple_element_t<2, Tuple>; + using OutLayout = std::tuple_element_t<3, Tuple>; + using IndexType = ck::index_t; + + std::vector conv_params; + + template + void Run() + { + EXPECT_FALSE(conv_params.empty()); + bool pass = true; + for(auto& param : conv_params) + { + pass = pass && ck::profiler::profile_grouped_conv_fwd_bias_clamp_impl( + true, // do_verification + 1, // init_method: integer value + false, // do_log + false, // time_kernel + param); + } + EXPECT_TRUE(pass); + } +}; + +using namespace ck::tensor_layout::convolution; + +using KernelTypes2d = ::testing::Types>; + +using KernelTypes3d = ::testing::Types>; + +template +class TestGroupedConvndFwd2d : public TestGroupedConvndFwd +{ +}; + +template +class TestGroupedConvndFwd3d : public TestGroupedConvndFwd +{ +}; + +TYPED_TEST_SUITE(TestGroupedConvndFwd2d, KernelTypes2d); +TYPED_TEST_SUITE(TestGroupedConvndFwd3d, KernelTypes3d); + +TYPED_TEST(TestGroupedConvndFwd2d, Test2D) +{ + this->conv_params.clear(); + this->conv_params.push_back( + {2, 2, 32, 128, 256, {1, 1}, {7, 7}, {2, 2}, {1, 1}, {0, 0}, {0, 0}}); + this->conv_params.push_back( + {2, 2, 32, 128, 256, {3, 3}, {14, 14}, {1, 1}, {1, 1}, {1, 1}, {1, 1}}); + this->template Run<2>(); +} + +TYPED_TEST(TestGroupedConvndFwd3d, Test3D) +{ + this->conv_params.clear(); + this->conv_params.push_back( + {3, 2, 32, 128, 256, {1, 1, 1}, {7, 7, 7}, {2, 2, 2}, {1, 1, 1}, {0, 0, 0}, {0, 0, 0}}); + this->conv_params.push_back( + {3, 2, 32, 128, 256, {3, 3, 3}, {14, 14, 3}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}}); + this->template Run<3>(); +}