Adding remaining conv, dynamic_op, and scaleadd_scaleadd_relu flavors for grouped conv fwd (#3529)

* Adding remaining flavors for grouped conv fwd

As titled. Following variants are added:
- grouped_conv2d_fwd_dynamic_op
- grouped_conv3d_fwd_dynamic_op
- grouped_conv3d_fwd_bilinear
- grouped_conv3d_fwd_convscale
- grouped_conv3d_fwd_convinvscale
- grouped_conv3d_fwd_convscale_add
- grouped_conv3d_fwd_convscale_relu
- grouped_conv3d_fwd_scale
- grouped_conv3d_fwd_combconvscale
- grouped_conv3d_fwd_scaleadd_scaleadd_relu

* Fix incomplete parsing of types from source names in add_instance_library() cmakelists function so we don't build f8 on RDNA3.

* Do not build f8 / bf8 only flavor tests on RDNA3

* Make sure we have proper generic instances for all instance lists related to the post-ces extra flavors, with scalarPerVector = 1. Then disable all but one generic instance per instance list to reduce compile time.

* Post rebase fix: Template parameters for Grouped Conv Fwd Device Impl got tweaked upstream.

* adding int8 and fp16 overloads to the elementwise operations

* fixed copilot nits

* Addressing review comments:

- removed unnecessary examples for dynamic op
- removed unnecessary conv specalizations for all the flavors
- removed spurious bilinear and scale source files

* clang-format

* reduced no of tests

---------

Co-authored-by: Wojciech Laskowski <wojciech.laskowski@streamhpc.com>
This commit is contained in:
Kiefer van Teutem
2026-01-30 17:02:14 +01:00
committed by GitHub
parent 6a6177a246
commit 2377a62837
72 changed files with 5178 additions and 34 deletions

View File

@@ -7,6 +7,9 @@ if(GPU_TARGETS MATCHES "gfx9|gfx11|gfx12")
add_gtest_executable(test_grouped_convnd_fwd_bilinear test_grouped_convnd_fwd_bilinear.cpp)
target_link_libraries(test_grouped_convnd_fwd_bilinear PRIVATE utility device_grouped_conv3d_fwd_bilinear_instance)
add_gtest_executable(test_grouped_convnd_fwd_dynamic_op test_grouped_convnd_fwd_dynamic_op.cpp)
target_link_libraries(test_grouped_convnd_fwd_dynamic_op PRIVATE utility device_grouped_conv2d_fwd_dynamic_op_instance device_grouped_conv3d_fwd_dynamic_op_instance)
add_gtest_executable(test_grouped_convnd_fwd_scaleadd_ab test_grouped_convnd_fwd_scaleadd_ab.cpp)
target_link_libraries(test_grouped_convnd_fwd_scaleadd_ab PRIVATE utility device_grouped_conv3d_fwd_scaleadd_ab_instance)

View File

@@ -0,0 +1,180 @@
// Copyright (c) Advanced Micro Devices, Inc., or its affiliates.
// SPDX-License-Identifier: MIT
#include <cstdlib>
#include <iostream>
#include <initializer_list>
#include <vector>
#include <gtest/gtest.h>
#include "ck/utility/common_header.hpp"
#include "ck/host_utility/device_prop.hpp"
#include "profiler/profile_grouped_conv_fwd_impl.hpp"
#include "ck/tensor_operation/gpu/element/unary_element_wise_operation.hpp"
using I8 = int8_t;
using F8 = ck::f8_t;
using BF8 = ck::bf8_t;
using F16 = ck::half_t;
using BF16 = ck::bhalf_t;
using F32 = float;
template <typename Tuple>
class TestGroupedConvndFwdDynamicOp : public ::testing::Test
{
protected:
using InDataType = std::tuple_element_t<0, Tuple>;
using WeiDataType = std::tuple_element_t<1, Tuple>;
using OutDataType = std::tuple_element_t<2, Tuple>;
using AComputeType = std::tuple_element_t<3, Tuple>;
using BComputeType = std::tuple_element_t<4, Tuple>;
using InLayout = std::tuple_element_t<5, Tuple>;
using WeiLayout = std::tuple_element_t<6, Tuple>;
using OutLayout = std::tuple_element_t<7, Tuple>;
using IndexType = ck::index_t;
std::vector<ck::utils::conv::ConvParam> conv_params;
template <ck::index_t NDimSpatial>
void Run()
{
EXPECT_FALSE(conv_params.empty());
bool pass = true;
const auto dynamic_op = ck::tensor_operation::element_wise::DynamicUnaryOp{
ck::tensor_operation::element_wise::PassThrough{}};
for(auto& param : conv_params)
{
if(ck::get_device_name() == "gfx908" || ck::get_device_name() == "gfx90a")
{
if(std::is_same<InDataType, F8>::value || std::is_same<InDataType, BF8>::value)
{
printf("Skipping FP8 / BF8 tests on CDNA1/2.\n");
continue;
}
}
pass = pass && ck::profiler::profile_grouped_conv_fwd_impl<
NDimSpatial,
InLayout,
WeiLayout,
OutLayout,
InDataType,
WeiDataType,
OutDataType,
AComputeType,
BComputeType,
IndexType,
ck::tensor_operation::element_wise::DynamicUnaryOp>(
true, // do_verification
1, // init_method: integer value
false, // do_log
true, // time_kernel
param,
dynamic_op);
}
EXPECT_TRUE(pass);
}
};
using namespace ck::tensor_layout::convolution;
using KernelTypes2d =
::testing::Types<std::tuple<F16, F16, F16, F16, F16, NHWGC, GKYXC, NHWGK>,
std::tuple<BF16, BF16, BF16, BF16, BF16, NHWGC, GKYXC, NHWGK>,
std::tuple<I8, I8, I8, I8, I8, NHWGC, GKYXC, NHWGK>,
std::tuple<F32, F32, F32, F32, F32, NHWGC, GKYXC, NHWGK>>;
using KernelTypes3d =
::testing::Types<std::tuple<F16, F16, F16, F16, F16, NDHWGC, GKZYXC, NDHWGK>,
std::tuple<BF16, BF16, BF16, BF16, BF16, NDHWGC, GKZYXC, NDHWGK>,
std::tuple<I8, I8, I8, I8, I8, NDHWGC, GKZYXC, NDHWGK>,
std::tuple<F32, F32, F32, F32, F32, NDHWGC, GKZYXC, NDHWGK>>;
template <typename Tuple>
class TestGroupedConvndFwdDynamicOp2d : public TestGroupedConvndFwdDynamicOp<Tuple>
{
};
template <typename Tuple>
class TestGroupedConvndFwdDynamicOp3d : public TestGroupedConvndFwdDynamicOp<Tuple>
{
};
TYPED_TEST_SUITE(TestGroupedConvndFwdDynamicOp2d, KernelTypes2d);
TYPED_TEST_SUITE(TestGroupedConvndFwdDynamicOp3d, KernelTypes3d);
TYPED_TEST(TestGroupedConvndFwdDynamicOp2d, Test2D)
{
this->conv_params.clear();
this->conv_params.push_back(
{2, 3, 5, 96, 200, {1, 1}, {73, 128}, {1, 1}, {1, 1}, {0, 0}, {0, 0}});
this->conv_params.push_back(
{2, 1, 1, 32, 32, {1, 1}, {128, 128}, {1, 1}, {1, 1}, {0, 0}, {0, 0}});
this->conv_params.push_back(
{2, 1, 1, 32, 32, {2, 2}, {128, 128}, {1, 1}, {1, 1}, {0, 0}, {0, 0}});
this->conv_params.push_back(
{2, 1, 1, 32, 32, {3, 3}, {128, 128}, {1, 1}, {1, 1}, {0, 0}, {0, 0}});
this->conv_params.push_back(
{2, 1, 1, 32, 32, {5, 5}, {128, 128}, {1, 1}, {1, 1}, {0, 0}, {0, 0}});
this->conv_params.push_back(
{2, 1, 1, 32, 32, {9, 9}, {128, 128}, {1, 1}, {1, 1}, {0, 0}, {0, 0}});
this->conv_params.push_back(
{2, 2, 32, 128, 256, {1, 1}, {7, 7}, {2, 2}, {1, 1}, {0, 0}, {0, 0}});
this->conv_params.push_back(
{2, 2, 32, 128, 256, {3, 3}, {14, 14}, {1, 1}, {1, 1}, {1, 1}, {1, 1}});
this->conv_params.push_back(
{2, 2, 32, 128, 256, {1, 1}, {3, 3}, {1, 1}, {1, 1}, {0, 0}, {0, 0}});
this->conv_params.push_back({2, 1, 1, 1, 32, {3, 3}, {32, 32}, {1, 1}, {1, 1}, {1, 1}, {1, 1}});
this->conv_params.push_back({2, 1, 1, 64, 3, {3, 3}, {32, 32}, {1, 1}, {1, 1}, {1, 1}, {1, 1}});
this->conv_params.push_back({2, 1, 1, 1, 1, {3, 3}, {32, 32}, {1, 1}, {1, 1}, {1, 1}, {1, 1}});
this->conv_params.push_back(
{2, 96, 1, 1, 1, {1, 1}, {120, 160}, {1, 1}, {1, 1}, {1, 1}, {1, 1}});
this->conv_params.push_back(
{2, 96, 1, 1, 1, {3, 3}, {120, 160}, {1, 1}, {1, 1}, {1, 1}, {1, 1}});
this->template Run<2>();
}
TYPED_TEST(TestGroupedConvndFwdDynamicOp3d, Test3D)
{
this->conv_params.clear();
this->conv_params.push_back(
{3, 3, 5, 96, 200, {1, 1, 1}, {37, 37, 16}, {1, 1, 1}, {1, 1, 1}, {0, 0, 0}, {0, 0, 0}});
this->conv_params.push_back(
{3, 1, 1, 32, 32, {1, 1, 1}, {32, 32, 32}, {1, 1, 1}, {1, 1, 1}, {0, 0, 0}, {0, 0, 0}});
this->conv_params.push_back(
{3, 1, 1, 32, 32, {2, 2, 2}, {32, 32, 32}, {1, 1, 1}, {1, 1, 1}, {0, 0, 0}, {0, 0, 0}});
this->conv_params.push_back(
{3, 1, 1, 32, 32, {3, 3, 3}, {32, 32, 32}, {1, 1, 1}, {1, 1, 1}, {0, 0, 0}, {0, 0, 0}});
this->conv_params.push_back(
{3, 1, 1, 32, 32, {5, 5, 5}, {32, 32, 32}, {1, 1, 1}, {1, 1, 1}, {0, 0, 0}, {0, 0, 0}});
this->conv_params.push_back(
{3, 1, 1, 32, 32, {9, 9, 9}, {32, 32, 32}, {1, 1, 1}, {1, 1, 1}, {0, 0, 0}, {0, 0, 0}});
this->conv_params.push_back(
{3, 2, 32, 128, 256, {1, 1, 1}, {7, 7, 7}, {2, 2, 2}, {1, 1, 1}, {0, 0, 0}, {0, 0, 0}});
this->conv_params.push_back(
{3, 2, 32, 128, 256, {3, 3, 3}, {14, 14, 3}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}});
this->conv_params.push_back(
{3, 2, 32, 128, 256, {1, 1, 1}, {3, 3, 3}, {1, 1, 1}, {1, 1, 1}, {0, 0, 0}, {0, 0, 0}});
this->conv_params.push_back(
{3, 1, 1, 32, 32, {1, 1, 1}, {16, 16, 16}, {1, 1, 1}, {1, 1, 1}, {0, 0, 0}, {0, 0, 0}});
this->conv_params.push_back(
{3, 1, 1, 1, 32, {3, 3, 3}, {32, 32, 32}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}});
this->conv_params.push_back(
{3, 1, 1, 64, 3, {3, 3, 3}, {32, 32, 32}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}});
this->conv_params.push_back(
{3, 1, 1, 1, 1, {3, 3, 3}, {32, 32, 32}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}});
this->conv_params.push_back(
{3, 96, 1, 1, 1, {1, 1, 1}, {120, 40, 20}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}});
this->conv_params.push_back(
{3, 96, 1, 1, 1, {3, 3, 3}, {120, 40, 20}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}});
this->template Run<3>();
}

View File

@@ -1,6 +1,26 @@
# Copyright (c) Advanced Micro Devices, Inc., or its affiliates.
# SPDX-License-Identifier: MIT
if(GPU_TARGETS MATCHES "gfx9|gfx12")
add_gtest_executable(test_grouped_convnd_fwd_convinvscale test_grouped_convnd_fwd_convinvscale.cpp)
target_link_libraries(test_grouped_convnd_fwd_convinvscale PRIVATE utility device_grouped_conv3d_fwd_convinvscale_instance)
add_gtest_executable(test_grouped_convnd_fwd_convscaleadd test_grouped_convnd_fwd_convscaleadd.cpp)
target_link_libraries(test_grouped_convnd_fwd_convscaleadd PRIVATE utility device_grouped_conv3d_fwd_convscale_add_instance)
add_gtest_executable(test_grouped_convnd_fwd_convscalerelu test_grouped_convnd_fwd_convscalerelu.cpp)
target_link_libraries(test_grouped_convnd_fwd_convscalerelu PRIVATE utility device_grouped_conv3d_fwd_convscale_relu_instance)
add_gtest_executable(test_grouped_convnd_fwd_convscale test_grouped_convnd_fwd_convscale.cpp)
target_link_libraries(test_grouped_convnd_fwd_convscale PRIVATE utility device_grouped_conv3d_fwd_convscale_instance)
add_gtest_executable(test_grouped_convnd_fwd_combconvscale test_grouped_convnd_fwd_combconvscale.cpp)
target_link_libraries(test_grouped_convnd_fwd_combconvscale PRIVATE utility device_grouped_conv3d_fwd_convscale_instance)
add_gtest_executable(test_grouped_convnd_fwd_combconvscalerelu test_grouped_convnd_fwd_combconvscalerelu.cpp)
target_link_libraries(test_grouped_convnd_fwd_combconvscalerelu PRIVATE utility device_grouped_conv3d_fwd_convscale_relu_instance)
endif()
if(GPU_TARGETS MATCHES "gfx9|gfx11|gfx12")
add_gtest_executable(test_grouped_convnd_fwd_bias_clamp test_grouped_convnd_fwd_bias_clamp.cpp)
target_link_libraries(test_grouped_convnd_fwd_bias_clamp PRIVATE utility device_grouped_conv2d_fwd_bias_clamp_instance device_grouped_conv3d_fwd_bias_clamp_instance)
@@ -23,4 +43,7 @@ if(GPU_TARGETS MATCHES "gfx9|gfx11|gfx12")
add_gtest_executable(test_grouped_convnd_fwd_gk_bias_bnorm_clamp test_grouped_convnd_fwd_gk_bias_bnorm_clamp.cpp)
target_link_libraries(test_grouped_convnd_fwd_gk_bias_bnorm_clamp PRIVATE utility device_grouped_conv2d_fwd_bias_bnorm_clamp_instance device_grouped_conv3d_fwd_bias_bnorm_clamp_instance)
add_gtest_executable(test_grouped_convnd_fwd_scaleadd_scaleadd_relu test_grouped_convnd_fwd_scaleadd_scaleadd_relu.cpp)
target_link_libraries(test_grouped_convnd_fwd_scaleadd_scaleadd_relu PRIVATE utility device_grouped_conv3d_fwd_scaleadd_scaleadd_relu_instance)
endif()

View File

@@ -0,0 +1,94 @@
// Copyright (c) Advanced Micro Devices, Inc., or its affiliates.
// SPDX-License-Identifier: MIT
#include <cstdlib>
#include <iostream>
#include <initializer_list>
#include <vector>
#include <gtest/gtest.h>
#include "ck/utility/common_header.hpp"
#include "ck/host_utility/device_prop.hpp"
#include "profiler/profile_grouped_conv_fwd_outelementop_impl.hpp"
#include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
#include "ck/tensor_operation/gpu/element/combined_element_wise_operation.hpp"
using CombConvScale = ck::tensor_operation::element_wise::ScaleScalePass;
template <typename Tuple>
class TestGroupedConvndFwdCombConvScale : public ::testing::Test
{
protected:
using InDataType = std::tuple_element_t<0, Tuple>;
using WeiDataType = std::tuple_element_t<1, Tuple>;
using OutDataType = std::tuple_element_t<2, Tuple>;
using InLayout = std::tuple_element_t<3, Tuple>;
using WeiLayout = std::tuple_element_t<4, Tuple>;
using OutLayout = std::tuple_element_t<5, Tuple>;
using IndexType = ck::index_t;
std::vector<ck::utils::conv::ConvParam> conv_params;
template <ck::index_t NDimSpatial>
void Run()
{
EXPECT_FALSE(conv_params.empty());
bool pass = true;
for(auto& param : conv_params)
{
if(ck::get_device_name() == "gfx908" || ck::get_device_name() == "gfx90a")
{
if(std::is_same<InDataType, ck::f8_t>::value ||
std::is_same<InDataType, ck::bf8_t>::value)
{
printf("Skipping FP8 / BF8 tests on CDNA1/2.\n");
continue;
}
}
pass = pass && ck::profiler::profile_grouped_conv_fwd_outelementop_impl<NDimSpatial,
InLayout,
WeiLayout,
OutLayout,
InDataType,
WeiDataType,
OutDataType,
CombConvScale,
InDataType,
InDataType>(
true, // do_verification
1, // init_method: integer value
false, // do_log
true, // time_kernel
param);
}
EXPECT_TRUE(pass);
}
};
using namespace ck::tensor_layout::convolution;
using CombConvScaleKernelTypes3d =
::testing::Types<std::tuple<ck::f8_t, ck::f8_t, float, NDHWGC, GKZYXC, NDHWGK>>;
template <typename Tuple>
class TestGroupedConvndFwdCombConvScale3d : public TestGroupedConvndFwdCombConvScale<Tuple>
{
};
TYPED_TEST_SUITE(TestGroupedConvndFwdCombConvScale3d, CombConvScaleKernelTypes3d);
TYPED_TEST(TestGroupedConvndFwdCombConvScale3d, Test3D)
{
this->conv_params.clear();
this->conv_params.push_back(
{3, 3, 5, 96, 200, {1, 1, 1}, {37, 37, 16}, {1, 1, 1}, {1, 1, 1}, {0, 0, 0}, {0, 0, 0}});
this->conv_params.push_back(
{3, 1, 1, 32, 32, {9, 9, 9}, {32, 32, 32}, {1, 1, 1}, {1, 1, 1}, {0, 0, 0}, {0, 0, 0}});
this->conv_params.push_back(
{3, 2, 32, 128, 256, {1, 1, 1}, {7, 7, 7}, {2, 2, 2}, {1, 1, 1}, {0, 0, 0}, {0, 0, 0}});
this->conv_params.push_back(
{3, 96, 1, 1, 1, {1, 1, 1}, {120, 40, 20}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}});
this->template Run<3>();
}

View File

@@ -0,0 +1,95 @@
// Copyright (c) Advanced Micro Devices, Inc., or its affiliates.
// SPDX-License-Identifier: MIT
#include <cstdlib>
#include <iostream>
#include <initializer_list>
#include <vector>
#include <gtest/gtest.h>
#include "ck/utility/common_header.hpp"
#include "ck/host_utility/device_prop.hpp"
#include "profiler/profile_grouped_conv_fwd_outelementop_impl.hpp"
#include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
#include "ck/tensor_operation/gpu/element/combined_element_wise_operation.hpp"
using CombConvScaleRelu = ck::tensor_operation::element_wise::ScaleScaleRelu;
template <typename Tuple>
class TestGroupedConvndFwdCombConvScaleRelu : public ::testing::Test
{
protected:
using InDataType = std::tuple_element_t<0, Tuple>;
using WeiDataType = std::tuple_element_t<1, Tuple>;
using OutDataType = std::tuple_element_t<2, Tuple>;
using InLayout = std::tuple_element_t<3, Tuple>;
using WeiLayout = std::tuple_element_t<4, Tuple>;
using OutLayout = std::tuple_element_t<5, Tuple>;
using IndexType = ck::index_t;
std::vector<ck::utils::conv::ConvParam> conv_params;
template <ck::index_t NDimSpatial>
void Run()
{
EXPECT_FALSE(conv_params.empty());
bool pass = true;
for(auto& param : conv_params)
{
if(ck::get_device_name() == "gfx908" || ck::get_device_name() == "gfx90a")
{
if(std::is_same<InDataType, ck::f8_t>::value ||
std::is_same<InDataType, ck::bf8_t>::value)
{
printf("Skipping FP8 / BF8 tests on CDNA1/2.\n");
continue;
}
}
pass =
pass && ck::profiler::profile_grouped_conv_fwd_outelementop_impl<NDimSpatial,
InLayout,
WeiLayout,
OutLayout,
InDataType,
WeiDataType,
OutDataType,
CombConvScaleRelu,
InDataType,
InDataType>(
true, // do_verification
1, // init_method: integer value
false, // do_log
true, // time_kernel
param);
}
EXPECT_TRUE(pass);
}
};
using namespace ck::tensor_layout::convolution;
using CombConvScaleReluKernelTypes3d =
::testing::Types<std::tuple<ck::f8_t, ck::f8_t, float, NDHWGC, GKZYXC, NDHWGK>>;
template <typename Tuple>
class TestGroupedConvndFwdCombConvScaleRelu3d : public TestGroupedConvndFwdCombConvScaleRelu<Tuple>
{
};
TYPED_TEST_SUITE(TestGroupedConvndFwdCombConvScaleRelu3d, CombConvScaleReluKernelTypes3d);
TYPED_TEST(TestGroupedConvndFwdCombConvScaleRelu3d, Test3D)
{
this->conv_params.clear();
this->conv_params.push_back(
{3, 3, 5, 96, 200, {1, 1, 1}, {37, 37, 16}, {1, 1, 1}, {1, 1, 1}, {0, 0, 0}, {0, 0, 0}});
this->conv_params.push_back(
{3, 1, 1, 32, 32, {9, 9, 9}, {32, 32, 32}, {1, 1, 1}, {1, 1, 1}, {0, 0, 0}, {0, 0, 0}});
this->conv_params.push_back(
{3, 2, 32, 128, 256, {1, 1, 1}, {7, 7, 7}, {2, 2, 2}, {1, 1, 1}, {0, 0, 0}, {0, 0, 0}});
this->conv_params.push_back(
{3, 96, 1, 1, 1, {1, 1, 1}, {120, 40, 20}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}});
this->template Run<3>();
}

View File

@@ -0,0 +1,89 @@
// Copyright (c) Advanced Micro Devices, Inc., or its affiliates.
// SPDX-License-Identifier: MIT
#include <cstdlib>
#include <iostream>
#include <initializer_list>
#include <vector>
#include <gtest/gtest.h>
#include "ck/utility/common_header.hpp"
#include "ck/host_utility/device_prop.hpp"
#include "profiler/profile_grouped_conv_fwd_outelementop_impl.hpp"
#include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
using ConvInvscale = ck::tensor_operation::element_wise::ConvInvscale;
template <typename Tuple>
class TestGroupedConvndFwdConvInvscale : public ::testing::Test
{
protected:
using DataType = std::tuple_element_t<0, Tuple>;
using InLayout = std::tuple_element_t<1, Tuple>;
using WeiLayout = std::tuple_element_t<2, Tuple>;
using OutLayout = std::tuple_element_t<3, Tuple>;
using IndexType = ck::index_t;
std::vector<ck::utils::conv::ConvParam> conv_params;
template <ck::index_t NDimSpatial>
void Run()
{
EXPECT_FALSE(conv_params.empty());
bool pass = true;
for(auto& param : conv_params)
{
if(ck::get_device_name() == "gfx908" || ck::get_device_name() == "gfx90a")
{
if(std::is_same<DataType, ck::f8_t>::value ||
std::is_same<DataType, ck::bf8_t>::value)
{
printf("Skipping FP8 / BF8 tests on CDNA1/2.\n");
continue;
}
}
pass = pass && ck::profiler::profile_grouped_conv_fwd_outelementop_impl<NDimSpatial,
InLayout,
WeiLayout,
OutLayout,
DataType,
DataType,
DataType,
ConvInvscale>(
true, // do_verification
1, // init_method: integer value
false, // do_log
true, // time_kernel
param);
}
EXPECT_TRUE(pass);
}
};
using namespace ck::tensor_layout::convolution;
using KernelTypes3d = ::testing::Types<std::tuple<ck::f8_t, NDHWGC, GKZYXC, NDHWGK>>;
template <typename Tuple>
class TestGroupedConvndFwdConvInvscale3d : public TestGroupedConvndFwdConvInvscale<Tuple>
{
};
TYPED_TEST_SUITE(TestGroupedConvndFwdConvInvscale3d, KernelTypes3d);
TYPED_TEST(TestGroupedConvndFwdConvInvscale3d, Test3D)
{
this->conv_params.clear();
this->conv_params.push_back(
{3, 3, 5, 96, 200, {1, 1, 1}, {37, 37, 16}, {1, 1, 1}, {1, 1, 1}, {0, 0, 0}, {0, 0, 0}});
this->conv_params.push_back(
{3, 1, 1, 32, 32, {9, 9, 9}, {32, 32, 32}, {1, 1, 1}, {1, 1, 1}, {0, 0, 0}, {0, 0, 0}});
this->conv_params.push_back(
{3, 2, 32, 128, 256, {1, 1, 1}, {7, 7, 7}, {2, 2, 2}, {1, 1, 1}, {0, 0, 0}, {0, 0, 0}});
this->conv_params.push_back(
{3, 96, 1, 1, 1, {1, 1, 1}, {120, 40, 20}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}});
this->template Run<3>();
}

View File

@@ -0,0 +1,97 @@
// Copyright (c) Advanced Micro Devices, Inc., or its affiliates.
// SPDX-License-Identifier: MIT
#include <cstdlib>
#include <iostream>
#include <initializer_list>
#include <vector>
#include <gtest/gtest.h>
#include "ck/utility/common_header.hpp"
#include "ck/host_utility/device_prop.hpp"
#include "profiler/profile_grouped_conv_fwd_outelementop_impl.hpp"
#include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
using ConvScale = ck::tensor_operation::element_wise::ConvScale;
template <typename Tuple>
class TestGroupedConvndFwdConvScale : public ::testing::Test
{
protected:
using InDataType = std::tuple_element_t<0, Tuple>;
using WeiDataType = std::tuple_element_t<1, Tuple>;
using OutDataType = std::tuple_element_t<2, Tuple>;
using AComputeType = std::tuple_element_t<3, Tuple>;
using BComputeType = std::tuple_element_t<4, Tuple>;
using InLayout = std::tuple_element_t<5, Tuple>;
using WeiLayout = std::tuple_element_t<6, Tuple>;
using OutLayout = std::tuple_element_t<7, Tuple>;
using IndexType = ck::index_t;
std::vector<ck::utils::conv::ConvParam> conv_params;
template <ck::index_t NDimSpatial>
void Run()
{
EXPECT_FALSE(conv_params.empty());
bool pass = true;
for(auto& param : conv_params)
{
if(ck::get_device_name() == "gfx908" || ck::get_device_name() == "gfx90a")
{
if(std::is_same<InDataType, ck::f8_t>::value ||
std::is_same<InDataType, ck::bf8_t>::value)
{
printf("Skipping FP8 / BF8 tests on CDNA1/2.\n");
continue;
}
}
pass = pass && ck::profiler::profile_grouped_conv_fwd_outelementop_impl<NDimSpatial,
InLayout,
WeiLayout,
OutLayout,
InDataType,
WeiDataType,
OutDataType,
ConvScale,
AComputeType,
BComputeType>(
true, // do_verification
1, // init_method: integer value
false, // do_log
true, // time_kernel
param);
}
EXPECT_TRUE(pass);
}
};
using namespace ck::tensor_layout::convolution;
using KernelTypes3d = ::testing::Types<
std::tuple<ck::f8_t, ck::f8_t, ck::f8_t, ck::f8_t, ck::f8_t, NDHWGC, GKZYXC, NDHWGK>,
std::tuple<ck::bf8_t, ck::bf8_t, ck::f8_t, ck::bf8_t, ck::bf8_t, NDHWGC, GKZYXC, NDHWGK>,
std::tuple<ck::f8_t, ck::bf8_t, ck::f8_t, ck::f8_t, ck::bf8_t, NDHWGC, GKZYXC, NDHWGK>,
std::tuple<ck::bf8_t, ck::f8_t, ck::f8_t, ck::bf8_t, ck::f8_t, NDHWGC, GKZYXC, NDHWGK>>;
template <typename Tuple>
class TestGroupedConvndFwdConvScale3d : public TestGroupedConvndFwdConvScale<Tuple>
{
};
TYPED_TEST_SUITE(TestGroupedConvndFwdConvScale3d, KernelTypes3d);
TYPED_TEST(TestGroupedConvndFwdConvScale3d, Test3D)
{
this->conv_params.clear();
this->conv_params.push_back(
{3, 3, 5, 96, 200, {1, 1, 1}, {37, 37, 16}, {1, 1, 1}, {1, 1, 1}, {0, 0, 0}, {0, 0, 0}});
this->conv_params.push_back(
{3, 1, 1, 32, 32, {9, 9, 9}, {32, 32, 32}, {1, 1, 1}, {1, 1, 1}, {0, 0, 0}, {0, 0, 0}});
this->conv_params.push_back(
{3, 2, 32, 128, 256, {1, 1, 1}, {7, 7, 7}, {2, 2, 2}, {1, 1, 1}, {0, 0, 0}, {0, 0, 0}});
this->conv_params.push_back(
{3, 96, 1, 1, 1, {1, 1, 1}, {120, 40, 20}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}});
this->template Run<3>();
}

View File

@@ -0,0 +1,91 @@
// Copyright (c) Advanced Micro Devices, Inc., or its affiliates.
// SPDX-License-Identifier: MIT
#include <cstdlib>
#include <iostream>
#include <initializer_list>
#include <vector>
#include <gtest/gtest.h>
#include "ck/utility/common_header.hpp"
#include "ck/host_utility/device_prop.hpp"
#include "profiler/profile_grouped_conv_fwd_convscale_add_impl.hpp"
#include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
using ConvScaleAdd = ck::tensor_operation::element_wise::ConvScaleAdd;
template <typename Tuple>
class TestGroupedConvndFwdConvScaleAdd : public ::testing::Test
{
protected:
using DataType = std::tuple_element_t<0, Tuple>;
using InLayout = std::tuple_element_t<1, Tuple>;
using WeiLayout = std::tuple_element_t<2, Tuple>;
using BiasLayout = std::tuple_element_t<3, Tuple>;
using OutLayout = std::tuple_element_t<4, Tuple>;
using IndexType = ck::index_t;
std::vector<ck::utils::conv::ConvParam> conv_params;
template <ck::index_t NDimSpatial>
void Run()
{
EXPECT_FALSE(conv_params.empty());
bool pass = true;
for(auto& param : conv_params)
{
if(ck::get_device_name() == "gfx908" || ck::get_device_name() == "gfx90a")
{
if(std::is_same<DataType, ck::f8_t>::value ||
std::is_same<DataType, ck::bf8_t>::value)
{
printf("Skipping FP8 / BF8 tests on CDNA1/2.\n");
continue;
}
}
pass = pass && ck::profiler::profile_grouped_conv_fwd_convscale_add_impl<NDimSpatial,
InLayout,
WeiLayout,
BiasLayout,
OutLayout,
DataType,
DataType,
float,
DataType>(
true, // do_verification
1, // init_method: integer value
false, // do_log
true, // time_kernel
param);
}
EXPECT_TRUE(pass);
}
};
using namespace ck::tensor_layout::convolution;
using KernelTypes3d = ::testing::Types<std::tuple<ck::f8_t, NDHWGC, GKZYXC, NDHWGK, NDHWGK>>;
template <typename Tuple>
class TestGroupedConvndFwdConvScaleAdd3d : public TestGroupedConvndFwdConvScaleAdd<Tuple>
{
};
TYPED_TEST_SUITE(TestGroupedConvndFwdConvScaleAdd3d, KernelTypes3d);
TYPED_TEST(TestGroupedConvndFwdConvScaleAdd3d, Test3D)
{
this->conv_params.clear();
this->conv_params.push_back(
{3, 3, 5, 96, 200, {1, 1, 1}, {37, 37, 16}, {1, 1, 1}, {1, 1, 1}, {0, 0, 0}, {0, 0, 0}});
this->conv_params.push_back(
{3, 1, 1, 32, 32, {9, 9, 9}, {32, 32, 32}, {1, 1, 1}, {1, 1, 1}, {0, 0, 0}, {0, 0, 0}});
this->conv_params.push_back(
{3, 2, 32, 128, 256, {1, 1, 1}, {7, 7, 7}, {2, 2, 2}, {1, 1, 1}, {0, 0, 0}, {0, 0, 0}});
this->conv_params.push_back(
{3, 96, 1, 1, 1, {1, 1, 1}, {120, 40, 20}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}});
this->template Run<3>();
}

View File

@@ -0,0 +1,89 @@
// Copyright (c) Advanced Micro Devices, Inc., or its affiliates.
// SPDX-License-Identifier: MIT
#include <cstdlib>
#include <iostream>
#include <initializer_list>
#include <vector>
#include <gtest/gtest.h>
#include "ck/utility/common_header.hpp"
#include "ck/host_utility/device_prop.hpp"
#include "profiler/profile_grouped_conv_fwd_outelementop_impl.hpp"
#include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
using ConvScaleRelu = ck::tensor_operation::element_wise::ConvScaleRelu;
template <typename Tuple>
class TestGroupedConvndFwdConvScaleRelu : public ::testing::Test
{
protected:
using DataType = std::tuple_element_t<0, Tuple>;
using InLayout = std::tuple_element_t<1, Tuple>;
using WeiLayout = std::tuple_element_t<2, Tuple>;
using OutLayout = std::tuple_element_t<3, Tuple>;
using IndexType = ck::index_t;
std::vector<ck::utils::conv::ConvParam> conv_params;
template <ck::index_t NDimSpatial>
void Run()
{
EXPECT_FALSE(conv_params.empty());
bool pass = true;
for(auto& param : conv_params)
{
if(ck::get_device_name() == "gfx908" || ck::get_device_name() == "gfx90a")
{
if(std::is_same<DataType, ck::f8_t>::value ||
std::is_same<DataType, ck::bf8_t>::value)
{
printf("Skipping FP8 / BF8 tests on CDNA1/2.\n");
continue;
}
}
pass = pass && ck::profiler::profile_grouped_conv_fwd_outelementop_impl<NDimSpatial,
InLayout,
WeiLayout,
OutLayout,
DataType,
DataType,
DataType,
ConvScaleRelu>(
true, // do_verification
1, // init_method: integer value
false, // do_log
true, // time_kernel
param);
}
EXPECT_TRUE(pass);
}
};
using namespace ck::tensor_layout::convolution;
using KernelTypes3d = ::testing::Types<std::tuple<ck::f8_t, NDHWGC, GKZYXC, NDHWGK>>;
template <typename Tuple>
class TestGroupedConvndFwdConvScaleRelu3d : public TestGroupedConvndFwdConvScaleRelu<Tuple>
{
};
TYPED_TEST_SUITE(TestGroupedConvndFwdConvScaleRelu3d, KernelTypes3d);
TYPED_TEST(TestGroupedConvndFwdConvScaleRelu3d, Test3D)
{
this->conv_params.clear();
this->conv_params.push_back(
{3, 3, 5, 96, 200, {1, 1, 1}, {37, 37, 16}, {1, 1, 1}, {1, 1, 1}, {0, 0, 0}, {0, 0, 0}});
this->conv_params.push_back(
{3, 1, 1, 32, 32, {9, 9, 9}, {32, 32, 32}, {1, 1, 1}, {1, 1, 1}, {0, 0, 0}, {0, 0, 0}});
this->conv_params.push_back(
{3, 2, 32, 128, 256, {1, 1, 1}, {7, 7, 7}, {2, 2, 2}, {1, 1, 1}, {0, 0, 0}, {0, 0, 0}});
this->conv_params.push_back(
{3, 96, 1, 1, 1, {1, 1, 1}, {120, 40, 20}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}});
this->template Run<3>();
}

View File

@@ -0,0 +1,99 @@
// Copyright (c) Advanced Micro Devices, Inc., or its affiliates.
// SPDX-License-Identifier: MIT
#include <cstdlib>
#include <iostream>
#include <initializer_list>
#include <vector>
#include <gtest/gtest.h>
#include "ck/utility/common_header.hpp"
#include "ck/host_utility/device_prop.hpp"
#include "profiler/profile_grouped_conv_fwd_scaleadd_scaleadd_relu_impl.hpp"
#include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
using I8 = int8_t;
using F16 = ck::half_t;
using BF16 = ck::bhalf_t;
using F32 = float;
template <typename Tuple>
class TestGroupedConvndFwdScaleAddScaleAddRelu : public ::testing::Test
{
protected:
using InDataType = std::tuple_element_t<0, Tuple>;
using WeiDataType = std::tuple_element_t<1, Tuple>;
using OutDataType = std::tuple_element_t<2, Tuple>;
using InLayout = std::tuple_element_t<3, Tuple>;
using WeiLayout = std::tuple_element_t<4, Tuple>;
using OutLayout = std::tuple_element_t<5, Tuple>;
std::vector<ck::utils::conv::ConvParam> conv_params;
template <ck::index_t NDimSpatial>
void Run()
{
EXPECT_FALSE(conv_params.empty());
bool pass = true;
for(auto& param : conv_params)
{
if(ck::get_device_name() == "gfx908" || ck::get_device_name() == "gfx90a")
{
if(std::is_same<InDataType, ck::f8_t>::value ||
std::is_same<InDataType, ck::bf8_t>::value)
{
printf("Skipping FP8 / BF8 tests on CDNA1/2.\n");
continue;
}
}
pass = pass && ck::profiler::profile_grouped_conv_fwd_scaleadd_scaleadd_relu_impl<
NDimSpatial,
InLayout,
WeiLayout,
OutLayout,
InDataType,
WeiDataType,
OutDataType,
ck::tensor_operation::element_wise::ScaleAddScaleAddRelu,
InDataType,
InDataType>(true, // do_verification
1, // init_method: integer value
false, // do_log
true, // time_kernel
param);
}
EXPECT_TRUE(pass);
}
};
using namespace ck::tensor_layout::convolution;
using CombConvScaleAddScaleAddReluKernelTypes3d =
::testing::Types<std::tuple<F16, F16, F16, NDHWGC, GKZYXC, NDHWGK>,
std::tuple<BF16, BF16, BF16, NDHWGC, GKZYXC, NDHWGK>,
std::tuple<I8, I8, I8, NDHWGC, GKZYXC, NDHWGK>>;
template <typename Tuple>
class TestGroupedConvndFwdScaleAddScaleAddRelu3d
: public TestGroupedConvndFwdScaleAddScaleAddRelu<Tuple>
{
};
TYPED_TEST_SUITE(TestGroupedConvndFwdScaleAddScaleAddRelu3d,
CombConvScaleAddScaleAddReluKernelTypes3d);
TYPED_TEST(TestGroupedConvndFwdScaleAddScaleAddRelu3d, Test3D)
{
this->conv_params.clear();
this->conv_params.push_back(
{3, 3, 5, 96, 200, {1, 1, 1}, {37, 37, 16}, {1, 1, 1}, {1, 1, 1}, {0, 0, 0}, {0, 0, 0}});
this->conv_params.push_back(
{3, 1, 1, 32, 32, {5, 5, 5}, {32, 32, 32}, {1, 1, 1}, {1, 1, 1}, {0, 0, 0}, {0, 0, 0}});
this->conv_params.push_back(
{3, 2, 32, 128, 256, {1, 1, 1}, {7, 7, 7}, {2, 2, 2}, {1, 1, 1}, {0, 0, 0}, {0, 0, 0}});
this->conv_params.push_back(
{3, 1, 1, 64, 3, {3, 3, 3}, {32, 32, 32}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}});
this->template Run<3>();
}