Merge commit 'bb8445dca8a43fe37b9dd35c04bda98d33115399' into develop

This commit is contained in:
assistant-librarian[bot]
2025-12-18 07:15:19 +00:00
parent 334ae1c494
commit ba29aebebd
31 changed files with 3351 additions and 953 deletions

View File

@@ -311,4 +311,5 @@ if(SUPPORTED_GPU_TARGETS MATCHES "gfx12")
endif()
add_subdirectory(position_embedding)
add_subdirectory(scatter_gather)
add_subdirectory(gpu_reference)
add_subdirectory(util)

View File

@@ -5,3 +5,8 @@ add_gtest_executable(test_convnd_fwd convnd_fwd_xdl.cpp)
if(result EQUAL 0)
target_link_libraries(test_convnd_fwd PRIVATE utility device_conv2d_fwd_instance)
endif()
add_gtest_executable(test_convnd_fwd_naive convnd_fwd_naive.cpp)
if(result EQUAL 0)
target_link_libraries(test_convnd_fwd_naive PRIVATE utility)
endif()

View File

@@ -0,0 +1,220 @@
// Copyright (c) Advanced Micro Devices, Inc., or its affiliates.
// SPDX-License-Identifier: MIT
#include <cstdlib>
#include <iostream>
#include <vector>
#include <gtest/gtest.h>
#include "ck/ck.hpp"
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "ck/tensor_operation/gpu/device/device_conv_fwd.hpp"
#include "ck/tensor_operation/gpu/device/impl/device_conv3d_fwd_naive_ndhwc_kzyxc_ndhwk.hpp"
#include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
#include "ck/library/utility/algorithm.hpp"
#include "ck/library/utility/check_err.hpp"
#include "ck/library/utility/device_memory.hpp"
#include "ck/library/utility/host_tensor.hpp"
#include "ck/library/utility/host_tensor_generator.hpp"
#include "ck/library/utility/convolution_parameter.hpp"
#include "ck/library/utility/convolution_host_tensor_descriptor_helper.hpp"
#include "ck/library/reference_tensor_operation/cpu/reference_conv_fwd.hpp"
using InDataType = float;
using WeiDataType = float;
using OutDataType = float;
using AccDataType = float;
using InElementOp = ck::tensor_operation::element_wise::PassThrough;
using WeiElementOp = ck::tensor_operation::element_wise::PassThrough;
using OutElementOp = ck::tensor_operation::element_wise::PassThrough;
using DeviceConvNaive = ck::tensor_operation::device::
DeviceConv3dFwdNaive_Input_N_Di_Hi_Wi_C_Weight_K_Z_Y_X_C_Output_N_Do_Ho_Wo_K<InDataType,
WeiDataType,
OutDataType,
AccDataType,
InElementOp,
WeiElementOp,
OutElementOp>;
template <ck::index_t NDimSpatial>
bool run_conv3d_naive_test(const ck::utils::conv::ConvParam& conv_param)
{
using namespace ck;
using namespace ck::tensor_operation::host;
using InLayout = ck::tensor_layout::convolution::GNCDHW;
using WeiLayout = ck::tensor_layout::convolution::GKCZYX;
using OutLayout = ck::tensor_layout::convolution::GNKDHW;
const auto in_g_n_c_wis_desc =
ck::utils::conv::make_input_host_tensor_descriptor_g_n_c_wis_packed<InLayout>(conv_param);
const auto wei_g_k_c_xs_desc =
ck::utils::conv::make_weight_host_tensor_descriptor_g_k_c_xs_packed<WeiLayout>(conv_param);
const auto out_g_n_k_wos_desc =
ck::utils::conv::make_output_host_tensor_descriptor_g_n_k_wos_packed<OutLayout>(conv_param);
Tensor<InDataType> in(in_g_n_c_wis_desc);
Tensor<WeiDataType> wei(wei_g_k_c_xs_desc);
Tensor<OutDataType> out_host(out_g_n_k_wos_desc);
Tensor<OutDataType> out_device(out_g_n_k_wos_desc);
// Initialize tensors
in.GenerateTensorValue(GeneratorTensor_2<InDataType>{-5, 5});
wei.GenerateTensorValue(GeneratorTensor_2<WeiDataType>{-5, 5});
DeviceMem in_device_buf(sizeof(InDataType) * in.mDesc.GetElementSpaceSize());
DeviceMem wei_device_buf(sizeof(WeiDataType) * wei.mDesc.GetElementSpaceSize());
DeviceMem out_device_buf(sizeof(OutDataType) * out_device.mDesc.GetElementSpaceSize());
in_device_buf.ToDevice(in.mData.data());
wei_device_buf.ToDevice(wei.mData.data());
// Run device kernel - convert long_index_t vectors to index_t
std::vector<ck::index_t> input_spatial_lengths(conv_param.input_spatial_lengths_.begin(),
conv_param.input_spatial_lengths_.end());
std::vector<ck::index_t> filter_spatial_lengths(conv_param.filter_spatial_lengths_.begin(),
conv_param.filter_spatial_lengths_.end());
auto output_spatial_lengths_long = conv_param.GetOutputSpatialLengths();
std::vector<ck::index_t> output_spatial_lengths(output_spatial_lengths_long.begin(),
output_spatial_lengths_long.end());
std::vector<ck::index_t> conv_filter_strides(conv_param.conv_filter_strides_.begin(),
conv_param.conv_filter_strides_.end());
std::vector<ck::index_t> conv_filter_dilations(conv_param.conv_filter_dilations_.begin(),
conv_param.conv_filter_dilations_.end());
std::vector<ck::index_t> input_left_pads(conv_param.input_left_pads_.begin(),
conv_param.input_left_pads_.end());
std::vector<ck::index_t> input_right_pads(conv_param.input_right_pads_.begin(),
conv_param.input_right_pads_.end());
auto conv = DeviceConvNaive{};
auto invoker = conv.MakeInvoker();
auto argument =
conv.MakeArgument(static_cast<const InDataType*>(in_device_buf.GetDeviceBuffer()),
static_cast<const WeiDataType*>(wei_device_buf.GetDeviceBuffer()),
static_cast<OutDataType*>(out_device_buf.GetDeviceBuffer()),
conv_param.N_,
conv_param.K_,
conv_param.C_,
input_spatial_lengths,
filter_spatial_lengths,
output_spatial_lengths,
conv_filter_strides,
conv_filter_dilations,
input_left_pads,
input_right_pads,
InElementOp{},
WeiElementOp{},
OutElementOp{});
if(!conv.IsSupportedArgument(argument))
{
std::cout << "Unsupported argument for naive conv3d kernel" << std::endl;
return false;
}
invoker.Run(argument, StreamConfig{nullptr, false});
// Run CPU reference
auto ref_conv = ReferenceConvFwd<NDimSpatial,
InDataType,
WeiDataType,
OutDataType,
InElementOp,
WeiElementOp,
OutElementOp,
0,
0,
0,
AccDataType>();
auto ref_invoker = ref_conv.MakeInvoker();
auto ref_argument = ref_conv.MakeArgument(in,
wei,
out_host,
conv_param.conv_filter_strides_,
conv_param.conv_filter_dilations_,
conv_param.input_left_pads_,
conv_param.input_right_pads_,
InElementOp{},
WeiElementOp{},
OutElementOp{});
ref_invoker.Run(ref_argument);
// Compare results
out_device_buf.FromDevice(out_device.mData.data());
return ck::utils::check_err(out_device, out_host, "Error: incorrect results!", 1e-3, 1e-3);
}
TEST(TestConv3dNaive, Conv3dNaive_Small)
{
// Small 3D convolution test
ck::utils::conv::ConvParam param{
3, // spatial_dim
1, // G
2, // N
16, // K
16, // C
{3, 3, 3}, // filter
{7, 7, 7}, // input spatial
{2, 2, 2}, // strides
{1, 1, 1}, // dilations
{1, 1, 1}, // left pads
{1, 1, 1} // right pads
};
bool pass = run_conv3d_naive_test<3>(param);
EXPECT_TRUE(pass);
}
TEST(TestConv3dNaive, Conv3dNaive_Medium)
{
// Medium size 3D convolution test
ck::utils::conv::ConvParam param{
3, // spatial_dim
1, // G
4, // N
32, // K
32, // C
{3, 3, 3}, // filter
{14, 14, 14}, // input spatial
{1, 1, 1}, // strides
{1, 1, 1}, // dilations
{1, 1, 1}, // left pads
{1, 1, 1} // right pads
};
bool pass = run_conv3d_naive_test<3>(param);
EXPECT_TRUE(pass);
}
TEST(TestConv3dNaive, Conv3dNaive_UnitFilter)
{
// 1x1x1 filter (no padding)
ck::utils::conv::ConvParam param{
3, // spatial_dim
1, // G
2, // N
24, // K
24, // C
{1, 1, 1}, // filter
{8, 8, 8}, // input spatial
{1, 1, 1}, // strides
{1, 1, 1}, // dilations
{0, 0, 0}, // left pads
{0, 0, 0} // right pads
};
bool pass = run_conv3d_naive_test<3>(param);
EXPECT_TRUE(pass);
}
int main(int argc, char** argv)
{
testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();
}

View File

@@ -0,0 +1,11 @@
# Copyright (c) Advanced Micro Devices, Inc., or its affiliates.
# SPDX-License-Identifier: MIT
add_gtest_executable(test_gpu_reference_conv_fwd test_gpu_reference_conv_fwd.cpp)
target_link_libraries(test_gpu_reference_conv_fwd PRIVATE utility)
add_gtest_executable(test_gpu_reference_conv_bwd_data test_gpu_reference_conv_bwd_data.cpp)
target_link_libraries(test_gpu_reference_conv_bwd_data PRIVATE utility)
add_gtest_executable(test_gpu_reference_conv_bwd_weight test_gpu_reference_conv_bwd_weight.cpp)
target_link_libraries(test_gpu_reference_conv_bwd_weight PRIVATE utility)

View File

@@ -0,0 +1,137 @@
// Copyright (c) Advanced Micro Devices, Inc., or its affiliates.
// SPDX-License-Identifier: MIT
#pragma once
#include "ck/ck.hpp"
#include "ck/library/utility/convolution_parameter.hpp"
#include <vector>
namespace ck {
namespace test {
// Common test shapes for all convolution tests (fwd, bwd_data, bwd_weight)
namespace conv_test_shapes {
// 2D Conv, FP16, Small
inline ck::utils::conv::ConvParam get_2d_small()
{
return ck::utils::conv::ConvParam(2, // num_dim_spatial
1, // G
2, // N
8, // K
8, // C
{3, 3}, // filter_spatial
{7, 7}, // input_spatial
{1, 1}, // strides
{1, 1}, // dilations
{0, 0}, // left_pads
{0, 0} // right_pads
);
}
// 2D Conv, FP32, Medium
inline ck::utils::conv::ConvParam get_2d_medium()
{
return ck::utils::conv::ConvParam(2, // num_dim_spatial
1, // G
4, // N
16, // K
16, // C
{3, 3}, // filter_spatial
{14, 14}, // input_spatial
{1, 1}, // strides
{1, 1}, // dilations
{0, 0}, // left_pads
{0, 0} // right_pads
);
}
// 1D Conv, FP16
inline ck::utils::conv::ConvParam get_1d()
{
return ck::utils::conv::ConvParam(1, // num_dim_spatial
1, // G
2, // N
8, // K
8, // C
{3}, // filter_spatial
{16}, // input_spatial
{1}, // strides
{1}, // dilations
{0}, // left_pads
{0} // right_pads
);
}
// 3D Conv, FP16, Small
inline ck::utils::conv::ConvParam get_3d_small()
{
return ck::utils::conv::ConvParam(3, // num_dim_spatial
1, // G
1, // N
8, // K
8, // C
{3, 3, 3}, // filter_spatial
{5, 5, 5}, // input_spatial
{1, 1, 1}, // strides
{1, 1, 1}, // dilations
{0, 0, 0}, // left_pads
{0, 0, 0} // right_pads
);
}
// 2D Conv with stride
inline ck::utils::conv::ConvParam get_2d_stride2()
{
return ck::utils::conv::ConvParam(2, // num_dim_spatial
1, // G
2, // N
8, // K
8, // C
{3, 3}, // filter_spatial
{8, 8}, // input_spatial
{2, 2}, // strides
{1, 1}, // dilations
{0, 0}, // left_pads
{0, 0} // right_pads
);
}
// 2D Grouped Conv, FP16, G=2
inline ck::utils::conv::ConvParam get_2d_grouped_g2()
{
return ck::utils::conv::ConvParam(2, // num_dim_spatial
2, // G
2, // N
8, // K (8 total output channels)
16, // C (16 total input channels, 8 per group with G=2)
{3, 3}, // filter_spatial
{7, 7}, // input_spatial
{1, 1}, // strides
{1, 1}, // dilations
{0, 0}, // left_pads
{0, 0} // right_pads
);
}
// 2D Grouped Conv, FP32, G=4
inline ck::utils::conv::ConvParam get_2d_grouped_g4()
{
return ck::utils::conv::ConvParam(2, // num_dim_spatial
4, // G
1, // N
16, // K (16 total output channels)
16, // C (16 total input channels, 4 per group with G=4)
{3, 3}, // filter_spatial
{8, 8}, // input_spatial
{1, 1}, // strides
{1, 1}, // dilations
{0, 0}, // left_pads
{0, 0} // right_pads
);
}
} // namespace conv_test_shapes
} // namespace test
} // namespace ck

View File

@@ -0,0 +1,385 @@
// Copyright (c) Advanced Micro Devices, Inc., or its affiliates.
// SPDX-License-Identifier: MIT
#pragma once
#include "ck/ck.hpp"
#include "ck/host_utility/hip_check_error.hpp"
#include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
#include "ck/library/utility/check_err.hpp"
#include "ck/library/utility/device_memory.hpp"
#include "ck/library/utility/host_tensor.hpp"
#include "ck/library/utility/host_tensor_generator.hpp"
#include "ck/library/utility/convolution_host_tensor_descriptor_helper.hpp"
// CPU references
#include "ck/library/reference_tensor_operation/cpu/reference_conv_fwd.hpp"
#include "ck/library/reference_tensor_operation/cpu/reference_conv_bwd_data.hpp"
#include "ck/library/reference_tensor_operation/cpu/reference_conv_bwd_weight.hpp"
// GPU references
#include "ck/library/reference_tensor_operation/gpu/naive_conv_fwd_gpu.hpp"
#include "ck/library/reference_tensor_operation/gpu/naive_conv_bwd_data_gpu.hpp"
#include "ck/library/reference_tensor_operation/gpu/naive_conv_bwd_weight_gpu.hpp"
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "common_test_params.hpp"
namespace ck {
namespace test {
enum class ConvKernelType
{
Forward,
BackwardData,
BackwardWeight
};
// Helper function to initialize and copy a tensor to device
template <typename DataType>
void initialize_and_copy_tensor(Tensor<DataType>& host_tensor, DeviceMem& device_mem)
{
host_tensor.GenerateTensorValue(GeneratorTensor_2<DataType>{-5, 5});
device_mem.ToDevice(host_tensor.mData.data());
}
// Helper to get default layout types based on NDimSpatial
template <index_t NDimSpatial>
struct DefaultConvLayouts
{
using InLayout = std::conditional_t<NDimSpatial == 3,
tensor_layout::convolution::GNCDHW,
std::conditional_t<NDimSpatial == 2,
tensor_layout::convolution::GNCHW,
tensor_layout::convolution::GNCW>>;
using WeiLayout = std::conditional_t<NDimSpatial == 3,
tensor_layout::convolution::GKCZYX,
std::conditional_t<NDimSpatial == 2,
tensor_layout::convolution::GKCYX,
tensor_layout::convolution::GKCX>>;
using OutLayout = std::conditional_t<NDimSpatial == 3,
tensor_layout::convolution::GNKDHW,
std::conditional_t<NDimSpatial == 2,
tensor_layout::convolution::GNKHW,
tensor_layout::convolution::GNKW>>;
};
// Forward convolution implementation
template <index_t NDimSpatial,
typename InDataType,
typename WeiDataType,
typename OutDataType,
typename InLayout,
typename WeiLayout,
typename OutLayout>
bool test_conv_fwd_impl(const ck::utils::conv::ConvParam& params,
const Tensor<InDataType>& input_cpu,
const Tensor<WeiDataType>& weight_cpu,
DeviceMem& input_dev,
DeviceMem& weight_dev,
DeviceMem& output_dev)
{
using InElementOp = tensor_operation::element_wise::PassThrough;
using WeiElementOp = tensor_operation::element_wise::PassThrough;
using OutElementOp = tensor_operation::element_wise::PassThrough;
// Call GPU reference with ConvParam directly
ref::naive_conv_fwd<InLayout,
WeiLayout,
OutLayout,
InDataType,
WeiDataType,
OutDataType,
InElementOp,
WeiElementOp,
OutElementOp>(
reinterpret_cast<const InDataType*>(input_dev.GetDeviceBuffer()),
reinterpret_cast<const WeiDataType*>(weight_dev.GetDeviceBuffer()),
reinterpret_cast<OutDataType*>(output_dev.GetDeviceBuffer()),
params);
HIP_CHECK_ERROR(hipDeviceSynchronize());
// Run CPU reference
std::vector<long_index_t> strides_long(params.conv_filter_strides_.begin(),
params.conv_filter_strides_.end());
std::vector<long_index_t> dilations_long(params.conv_filter_dilations_.begin(),
params.conv_filter_dilations_.end());
std::vector<long_index_t> pads_long(params.input_left_pads_.begin(),
params.input_left_pads_.end());
Tensor<InDataType> input_ref = input_cpu;
Tensor<WeiDataType> weight_ref = weight_cpu;
Tensor<OutDataType> output_ref(
ck::utils::conv::make_output_host_tensor_descriptor_g_n_k_wos_packed<OutLayout>(params));
auto ref_conv = tensor_operation::host::ReferenceConvFwd<NDimSpatial,
InDataType,
WeiDataType,
OutDataType,
InElementOp,
WeiElementOp,
OutElementOp>();
auto ref_invoker = ref_conv.MakeInvoker();
auto ref_arg = ref_conv.MakeArgument(input_ref,
weight_ref,
output_ref,
strides_long,
dilations_long,
pads_long,
pads_long,
InElementOp{},
WeiElementOp{},
OutElementOp{});
ref_invoker.Run(ref_arg);
// Copy result from device and compare
Tensor<OutDataType> output_gpu(output_ref.mDesc);
output_dev.FromDevice(output_gpu.mData.data());
HIP_CHECK_ERROR(hipDeviceSynchronize());
// Compare results
return ck::utils::check_err(output_gpu, output_ref);
}
// Backward data convolution implementation
template <index_t NDimSpatial,
typename InDataType,
typename WeiDataType,
typename OutDataType,
typename InLayout,
typename WeiLayout,
typename OutLayout>
bool test_conv_bwd_data_impl(const ck::utils::conv::ConvParam& params,
const Tensor<WeiDataType>& weight_cpu,
const Tensor<OutDataType>& output_cpu,
DeviceMem& weight_dev,
DeviceMem& output_dev,
DeviceMem& input_dev)
{
using InElementOp = tensor_operation::element_wise::PassThrough;
using WeiElementOp = tensor_operation::element_wise::PassThrough;
using OutElementOp = tensor_operation::element_wise::PassThrough;
// Call GPU reference with ConvParam directly
ref::naive_conv_bwd_data<InLayout,
WeiLayout,
OutLayout,
InDataType,
WeiDataType,
OutDataType,
InElementOp,
WeiElementOp,
OutElementOp>(
reinterpret_cast<InDataType*>(input_dev.GetDeviceBuffer()),
reinterpret_cast<const WeiDataType*>(weight_dev.GetDeviceBuffer()),
reinterpret_cast<const OutDataType*>(output_dev.GetDeviceBuffer()),
params);
HIP_CHECK_ERROR(hipDeviceSynchronize());
// Run CPU reference
std::vector<long_index_t> strides_long(params.conv_filter_strides_.begin(),
params.conv_filter_strides_.end());
std::vector<long_index_t> dilations_long(params.conv_filter_dilations_.begin(),
params.conv_filter_dilations_.end());
std::vector<long_index_t> pads_long(params.input_left_pads_.begin(),
params.input_left_pads_.end());
Tensor<InDataType> input_ref(
ck::utils::conv::make_input_host_tensor_descriptor_g_n_c_wis_packed<InLayout>(params));
Tensor<WeiDataType> weight_ref = weight_cpu;
Tensor<OutDataType> output_ref = output_cpu;
auto ref_conv = tensor_operation::host::ReferenceConvBwdData<NDimSpatial,
InDataType,
WeiDataType,
OutDataType,
InElementOp,
WeiElementOp,
OutElementOp>();
auto ref_invoker = ref_conv.MakeInvoker();
auto ref_arg = ref_conv.MakeArgument(input_ref,
weight_ref,
output_ref,
strides_long,
dilations_long,
pads_long,
pads_long,
InElementOp{},
WeiElementOp{},
OutElementOp{});
ref_invoker.Run(ref_arg);
// Copy result from device and compare
Tensor<InDataType> input_gpu(input_ref.mDesc);
input_dev.FromDevice(input_gpu.mData.data());
HIP_CHECK_ERROR(hipDeviceSynchronize());
// Compare results
return ck::utils::check_err(input_gpu, input_ref);
}
// Backward weight convolution implementation
template <index_t NDimSpatial,
typename InDataType,
typename WeiDataType,
typename OutDataType,
typename InLayout,
typename WeiLayout,
typename OutLayout>
bool test_conv_bwd_weight_impl(const ck::utils::conv::ConvParam& params,
const Tensor<InDataType>& input_cpu,
const Tensor<OutDataType>& output_cpu,
DeviceMem& input_dev,
DeviceMem& output_dev,
DeviceMem& weight_dev)
{
using InElementOp = tensor_operation::element_wise::PassThrough;
using WeiElementOp = tensor_operation::element_wise::PassThrough;
using OutElementOp = tensor_operation::element_wise::PassThrough;
// Call GPU reference with ConvParam directly
ref::naive_conv_bwd_weight<InLayout,
WeiLayout,
OutLayout,
InDataType,
WeiDataType,
OutDataType,
InElementOp,
WeiElementOp,
OutElementOp>(
reinterpret_cast<const InDataType*>(input_dev.GetDeviceBuffer()),
reinterpret_cast<WeiDataType*>(weight_dev.GetDeviceBuffer()),
reinterpret_cast<const OutDataType*>(output_dev.GetDeviceBuffer()),
params);
HIP_CHECK_ERROR(hipDeviceSynchronize());
// Run CPU reference
std::vector<long_index_t> strides_long(params.conv_filter_strides_.begin(),
params.conv_filter_strides_.end());
std::vector<long_index_t> dilations_long(params.conv_filter_dilations_.begin(),
params.conv_filter_dilations_.end());
std::vector<long_index_t> pads_long(params.input_left_pads_.begin(),
params.input_left_pads_.end());
Tensor<InDataType> input_ref = input_cpu;
Tensor<WeiDataType> weight_ref(
ck::utils::conv::make_weight_host_tensor_descriptor_g_k_c_xs_packed<WeiLayout>(params));
Tensor<OutDataType> output_ref = output_cpu;
auto ref_conv = tensor_operation::host::ReferenceConvBwdWeight<NDimSpatial,
InDataType,
WeiDataType,
OutDataType,
InElementOp,
WeiElementOp,
OutElementOp>();
auto ref_invoker = ref_conv.MakeInvoker();
auto ref_arg = ref_conv.MakeArgument(input_ref,
weight_ref,
output_ref,
strides_long,
dilations_long,
pads_long,
pads_long,
InElementOp{},
WeiElementOp{},
OutElementOp{});
ref_invoker.Run(ref_arg);
// Copy result from device and compare
Tensor<WeiDataType> weight_gpu(weight_ref.mDesc);
weight_dev.FromDevice(weight_gpu.mData.data());
HIP_CHECK_ERROR(hipDeviceSynchronize());
// Compare results
return ck::utils::check_err(weight_gpu, weight_ref);
}
// Main test function - dispatches to specific implementations
template <index_t NDimSpatial,
typename InDataType,
typename WeiDataType,
typename OutDataType,
typename InLayout = typename DefaultConvLayouts<NDimSpatial>::InLayout,
typename WeiLayout = typename DefaultConvLayouts<NDimSpatial>::WeiLayout,
typename OutLayout = typename DefaultConvLayouts<NDimSpatial>::OutLayout>
bool test_conv_gpu_ref(const ck::utils::conv::ConvParam& params, ConvKernelType kernel_type)
{
// Create tensor descriptors using the specified layouts
const auto in_g_n_c_wis_desc =
ck::utils::conv::make_input_host_tensor_descriptor_g_n_c_wis_packed<InLayout>(params);
const auto wei_g_k_c_xs_desc =
ck::utils::conv::make_weight_host_tensor_descriptor_g_k_c_xs_packed<WeiLayout>(params);
const auto out_g_n_k_wos_desc =
ck::utils::conv::make_output_host_tensor_descriptor_g_n_k_wos_packed<OutLayout>(params);
// Create tensors using tensor descriptors (supports multiple layouts)
Tensor<InDataType> input(in_g_n_c_wis_desc);
Tensor<WeiDataType> weight(wei_g_k_c_xs_desc);
Tensor<OutDataType> output(out_g_n_k_wos_desc);
// Allocate device memory
DeviceMem input_dev(input.mData.size() * sizeof(InDataType));
DeviceMem weight_dev(weight.mData.size() * sizeof(WeiDataType));
DeviceMem output_dev(output.mData.size() * sizeof(OutDataType));
// Initialize and copy tensors based on kernel type
if(kernel_type == ConvKernelType::Forward)
{
initialize_and_copy_tensor(input, input_dev);
initialize_and_copy_tensor(weight, weight_dev);
}
else if(kernel_type == ConvKernelType::BackwardData)
{
initialize_and_copy_tensor(weight, weight_dev);
initialize_and_copy_tensor(output, output_dev);
}
else // BackwardWeight
{
initialize_and_copy_tensor(input, input_dev);
initialize_and_copy_tensor(output, output_dev);
}
// Dispatch to appropriate implementation with layout types
if(kernel_type == ConvKernelType::Forward)
{
return test_conv_fwd_impl<NDimSpatial,
InDataType,
WeiDataType,
OutDataType,
InLayout,
WeiLayout,
OutLayout>(
params, input, weight, input_dev, weight_dev, output_dev);
}
else if(kernel_type == ConvKernelType::BackwardData)
{
return test_conv_bwd_data_impl<NDimSpatial,
InDataType,
WeiDataType,
OutDataType,
InLayout,
WeiLayout,
OutLayout>(
params, weight, output, weight_dev, output_dev, input_dev);
}
else // BackwardWeight
{
return test_conv_bwd_weight_impl<NDimSpatial,
InDataType,
WeiDataType,
OutDataType,
InLayout,
WeiLayout,
OutLayout>(
params, input, output, input_dev, output_dev, weight_dev);
}
}
} // namespace test
} // namespace ck

View File

@@ -0,0 +1,224 @@
// Copyright (c) Advanced Micro Devices, Inc., or its affiliates.
// SPDX-License-Identifier: MIT
#include <gtest/gtest.h>
#include "gpu_reference_utils.hpp"
using namespace ck;
using ck::test::ConvKernelType;
TEST(GpuReferenceConvBwdData, Conv2DFP16Small)
{
auto params = test::conv_test_shapes::get_2d_small();
bool result =
test::test_conv_gpu_ref<2, half_t, half_t, half_t>(params, ConvKernelType::BackwardData);
EXPECT_TRUE(result);
}
TEST(GpuReferenceConvBwdData, Conv2DFP32Medium)
{
auto params = test::conv_test_shapes::get_2d_medium();
bool result =
test::test_conv_gpu_ref<2, float, float, float>(params, ConvKernelType::BackwardData);
EXPECT_TRUE(result);
}
TEST(GpuReferenceConvBwdData, Conv1DFP16)
{
auto params = test::conv_test_shapes::get_1d();
bool result =
test::test_conv_gpu_ref<1, half_t, half_t, half_t>(params, ConvKernelType::BackwardData);
EXPECT_TRUE(result);
}
TEST(GpuReferenceConvBwdData, Conv3DFP16Small)
{
auto params = test::conv_test_shapes::get_3d_small();
bool result =
test::test_conv_gpu_ref<3, half_t, half_t, half_t>(params, ConvKernelType::BackwardData);
EXPECT_TRUE(result);
}
TEST(GpuReferenceConvBwdData, Conv2DFP16Stride2)
{
auto params = test::conv_test_shapes::get_2d_stride2();
bool result =
test::test_conv_gpu_ref<2, half_t, half_t, half_t>(params, ConvKernelType::BackwardData);
EXPECT_TRUE(result);
}
TEST(GpuReferenceConvBwdData, Conv2DFP16GroupedG2)
{
auto params = test::conv_test_shapes::get_2d_grouped_g2();
bool result =
test::test_conv_gpu_ref<2, half_t, half_t, half_t>(params, ConvKernelType::BackwardData);
EXPECT_TRUE(result);
}
TEST(GpuReferenceConvBwdData, Conv2DFP32GroupedG4)
{
auto params = test::conv_test_shapes::get_2d_grouped_g4();
bool result =
test::test_conv_gpu_ref<2, float, float, float>(params, ConvKernelType::BackwardData);
EXPECT_TRUE(result);
}
TEST(GpuReferenceConvBwdData, Conv2DFP32GroupedNHWGC_GKYXC_NHWGK)
{
auto params = test::conv_test_shapes::get_2d_grouped_g2();
bool result = test::test_conv_gpu_ref<2,
float,
float,
float,
tensor_layout::convolution::NHWGC,
tensor_layout::convolution::GKYXC,
tensor_layout::convolution::NHWGK>(
params, ConvKernelType::BackwardData);
EXPECT_TRUE(result);
}
TEST(GpuReferenceConvBwdData, Conv2DFP16GroupedNHWGC_GKYXC_NHWGK)
{
auto params = test::conv_test_shapes::get_2d_grouped_g2();
bool result = test::test_conv_gpu_ref<2,
half_t,
half_t,
half_t,
tensor_layout::convolution::NHWGC,
tensor_layout::convolution::GKYXC,
tensor_layout::convolution::NHWGK>(
params, ConvKernelType::BackwardData);
EXPECT_TRUE(result);
}
TEST(GpuReferenceConvBwdData, Conv2DFP32GroupedNGCHW_GKYXC_NGKHW)
{
auto params = test::conv_test_shapes::get_2d_grouped_g2();
bool result = test::test_conv_gpu_ref<2,
float,
float,
float,
tensor_layout::convolution::NGCHW,
tensor_layout::convolution::GKYXC,
tensor_layout::convolution::NGKHW>(
params, ConvKernelType::BackwardData);
EXPECT_TRUE(result);
}
TEST(GpuReferenceConvBwdData, Conv2DFP16GroupedNGCHW_GKYXC_NGKHW)
{
auto params = test::conv_test_shapes::get_2d_grouped_g2();
bool result = test::test_conv_gpu_ref<2,
half_t,
half_t,
half_t,
tensor_layout::convolution::NGCHW,
tensor_layout::convolution::GKYXC,
tensor_layout::convolution::NGKHW>(
params, ConvKernelType::BackwardData);
EXPECT_TRUE(result);
}
TEST(GpuReferenceConvBwdData, Conv2DFP32GroupedNGCHW_GKCYX_NGKHW)
{
auto params = test::conv_test_shapes::get_2d_grouped_g2();
bool result = test::test_conv_gpu_ref<2,
float,
float,
float,
tensor_layout::convolution::NGCHW,
tensor_layout::convolution::GKCYX,
tensor_layout::convolution::NGKHW>(
params, ConvKernelType::BackwardData);
EXPECT_TRUE(result);
}
TEST(GpuReferenceConvBwdData, Conv2DFP16GroupedNGCHW_GKCYX_NGKHW)
{
auto params = test::conv_test_shapes::get_2d_grouped_g2();
bool result = test::test_conv_gpu_ref<2,
half_t,
half_t,
half_t,
tensor_layout::convolution::NGCHW,
tensor_layout::convolution::GKCYX,
tensor_layout::convolution::NGKHW>(
params, ConvKernelType::BackwardData);
EXPECT_TRUE(result);
}
TEST(GpuReferenceConvBwdData, Conv3DFP32GroupedNDHWGC_GKZYXC_NDHWGK)
{
auto params = test::conv_test_shapes::get_3d_small();
// Modify to be grouped (G=2)
params.G_ = 2;
params.C_ = 16; // 8 per group
params.K_ = 16; // 8 per group
bool result = test::test_conv_gpu_ref<3,
float,
float,
float,
tensor_layout::convolution::NDHWGC,
tensor_layout::convolution::GKZYXC,
tensor_layout::convolution::NDHWGK>(
params, ConvKernelType::BackwardData);
EXPECT_TRUE(result);
}
TEST(GpuReferenceConvBwdData, Conv3DFP16GroupedNDHWGC_GKZYXC_NDHWGK)
{
auto params = test::conv_test_shapes::get_3d_small();
// Modify to be grouped (G=2)
params.G_ = 2;
params.C_ = 16; // 8 per group
params.K_ = 16; // 8 per group
bool result = test::test_conv_gpu_ref<3,
half_t,
half_t,
half_t,
tensor_layout::convolution::NDHWGC,
tensor_layout::convolution::GKZYXC,
tensor_layout::convolution::NDHWGK>(
params, ConvKernelType::BackwardData);
EXPECT_TRUE(result);
}
TEST(GpuReferenceConvBwdData, Conv3DFP32GroupedNGCDHW_GKCZYX_NGKDHW)
{
auto params = test::conv_test_shapes::get_3d_small();
// Modify to be grouped (G=2)
params.G_ = 2;
params.C_ = 16; // 8 per group
params.K_ = 16; // 8 per group
bool result = test::test_conv_gpu_ref<3,
float,
float,
float,
tensor_layout::convolution::NGCDHW,
tensor_layout::convolution::GKCZYX,
tensor_layout::convolution::NGKDHW>(
params, ConvKernelType::BackwardData);
EXPECT_TRUE(result);
}
TEST(GpuReferenceConvBwdData, Conv3DFP16GroupedNGCDHW_GKCZYX_NGKDHW)
{
auto params = test::conv_test_shapes::get_3d_small();
// Modify to be grouped (G=2)
params.G_ = 2;
params.C_ = 16; // 8 per group
params.K_ = 16; // 8 per group
bool result = test::test_conv_gpu_ref<3,
half_t,
half_t,
half_t,
tensor_layout::convolution::NGCDHW,
tensor_layout::convolution::GKCZYX,
tensor_layout::convolution::NGKDHW>(
params, ConvKernelType::BackwardData);
EXPECT_TRUE(result);
}

View File

@@ -0,0 +1,224 @@
// Copyright (c) Advanced Micro Devices, Inc., or its affiliates.
// SPDX-License-Identifier: MIT
#include <gtest/gtest.h>
#include "gpu_reference_utils.hpp"
using namespace ck;
using ck::test::ConvKernelType;
TEST(GpuReferenceConvBwdWeight, Conv2DFP16Small)
{
auto params = test::conv_test_shapes::get_2d_small();
bool result =
test::test_conv_gpu_ref<2, half_t, half_t, half_t>(params, ConvKernelType::BackwardWeight);
EXPECT_TRUE(result);
}
TEST(GpuReferenceConvBwdWeight, Conv2DFP32Medium)
{
auto params = test::conv_test_shapes::get_2d_medium();
bool result =
test::test_conv_gpu_ref<2, float, float, float>(params, ConvKernelType::BackwardWeight);
EXPECT_TRUE(result);
}
TEST(GpuReferenceConvBwdWeight, Conv1DFP16)
{
auto params = test::conv_test_shapes::get_1d();
bool result =
test::test_conv_gpu_ref<1, half_t, half_t, half_t>(params, ConvKernelType::BackwardWeight);
EXPECT_TRUE(result);
}
TEST(GpuReferenceConvBwdWeight, Conv3DFP16Small)
{
auto params = test::conv_test_shapes::get_3d_small();
bool result =
test::test_conv_gpu_ref<3, half_t, half_t, half_t>(params, ConvKernelType::BackwardWeight);
EXPECT_TRUE(result);
}
TEST(GpuReferenceConvBwdWeight, Conv2DFP16Stride2)
{
auto params = test::conv_test_shapes::get_2d_stride2();
bool result =
test::test_conv_gpu_ref<2, half_t, half_t, half_t>(params, ConvKernelType::BackwardWeight);
EXPECT_TRUE(result);
}
TEST(GpuReferenceConvBwdWeight, Conv2DFP16GroupedG2)
{
auto params = test::conv_test_shapes::get_2d_grouped_g2();
bool result =
test::test_conv_gpu_ref<2, half_t, half_t, half_t>(params, ConvKernelType::BackwardWeight);
EXPECT_TRUE(result);
}
TEST(GpuReferenceConvBwdWeight, Conv2DFP32GroupedG4)
{
auto params = test::conv_test_shapes::get_2d_grouped_g4();
bool result =
test::test_conv_gpu_ref<2, float, float, float>(params, ConvKernelType::BackwardWeight);
EXPECT_TRUE(result);
}
TEST(GpuReferenceConvBwdWeight, Conv2DFP32GroupedNHWGC_GKYXC_NHWGK)
{
auto params = test::conv_test_shapes::get_2d_grouped_g2();
bool result = test::test_conv_gpu_ref<2,
float,
float,
float,
tensor_layout::convolution::NHWGC,
tensor_layout::convolution::GKYXC,
tensor_layout::convolution::NHWGK>(
params, ConvKernelType::BackwardWeight);
EXPECT_TRUE(result);
}
TEST(GpuReferenceConvBwdWeight, Conv2DFP16GroupedNHWGC_GKYXC_NHWGK)
{
auto params = test::conv_test_shapes::get_2d_grouped_g2();
bool result = test::test_conv_gpu_ref<2,
half_t,
half_t,
half_t,
tensor_layout::convolution::NHWGC,
tensor_layout::convolution::GKYXC,
tensor_layout::convolution::NHWGK>(
params, ConvKernelType::BackwardWeight);
EXPECT_TRUE(result);
}
TEST(GpuReferenceConvBwdWeight, Conv2DFP32GroupedNGCHW_GKYXC_NGKHW)
{
auto params = test::conv_test_shapes::get_2d_grouped_g2();
bool result = test::test_conv_gpu_ref<2,
float,
float,
float,
tensor_layout::convolution::NGCHW,
tensor_layout::convolution::GKYXC,
tensor_layout::convolution::NGKHW>(
params, ConvKernelType::BackwardWeight);
EXPECT_TRUE(result);
}
TEST(GpuReferenceConvBwdWeight, Conv2DFP16GroupedNGCHW_GKYXC_NGKHW)
{
auto params = test::conv_test_shapes::get_2d_grouped_g2();
bool result = test::test_conv_gpu_ref<2,
half_t,
half_t,
half_t,
tensor_layout::convolution::NGCHW,
tensor_layout::convolution::GKYXC,
tensor_layout::convolution::NGKHW>(
params, ConvKernelType::BackwardWeight);
EXPECT_TRUE(result);
}
TEST(GpuReferenceConvBwdWeight, Conv2DFP32GroupedNGCHW_GKCYX_NGKHW)
{
auto params = test::conv_test_shapes::get_2d_grouped_g2();
bool result = test::test_conv_gpu_ref<2,
float,
float,
float,
tensor_layout::convolution::NGCHW,
tensor_layout::convolution::GKCYX,
tensor_layout::convolution::NGKHW>(
params, ConvKernelType::BackwardWeight);
EXPECT_TRUE(result);
}
TEST(GpuReferenceConvBwdWeight, Conv2DFP16GroupedNGCHW_GKCYX_NGKHW)
{
auto params = test::conv_test_shapes::get_2d_grouped_g2();
bool result = test::test_conv_gpu_ref<2,
half_t,
half_t,
half_t,
tensor_layout::convolution::NGCHW,
tensor_layout::convolution::GKCYX,
tensor_layout::convolution::NGKHW>(
params, ConvKernelType::BackwardWeight);
EXPECT_TRUE(result);
}
TEST(GpuReferenceConvBwdWeight, Conv3DFP32GroupedNDHWGC_GKZYXC_NDHWGK)
{
auto params = test::conv_test_shapes::get_3d_small();
// Modify to be grouped (G=2)
params.G_ = 2;
params.C_ = 16; // 8 per group
params.K_ = 16; // 8 per group
bool result = test::test_conv_gpu_ref<3,
float,
float,
float,
tensor_layout::convolution::NDHWGC,
tensor_layout::convolution::GKZYXC,
tensor_layout::convolution::NDHWGK>(
params, ConvKernelType::BackwardWeight);
EXPECT_TRUE(result);
}
TEST(GpuReferenceConvBwdWeight, Conv3DFP16GroupedNDHWGC_GKZYXC_NDHWGK)
{
auto params = test::conv_test_shapes::get_3d_small();
// Modify to be grouped (G=2)
params.G_ = 2;
params.C_ = 16; // 8 per group
params.K_ = 16; // 8 per group
bool result = test::test_conv_gpu_ref<3,
half_t,
half_t,
half_t,
tensor_layout::convolution::NDHWGC,
tensor_layout::convolution::GKZYXC,
tensor_layout::convolution::NDHWGK>(
params, ConvKernelType::BackwardWeight);
EXPECT_TRUE(result);
}
TEST(GpuReferenceConvBwdWeight, Conv3DFP32GroupedNGCDHW_GKCZYX_NGKDHW)
{
auto params = test::conv_test_shapes::get_3d_small();
// Modify to be grouped (G=2)
params.G_ = 2;
params.C_ = 16; // 8 per group
params.K_ = 16; // 8 per group
bool result = test::test_conv_gpu_ref<3,
float,
float,
float,
tensor_layout::convolution::NGCDHW,
tensor_layout::convolution::GKCZYX,
tensor_layout::convolution::NGKDHW>(
params, ConvKernelType::BackwardWeight);
EXPECT_TRUE(result);
}
TEST(GpuReferenceConvBwdWeight, Conv3DFP16GroupedNGCDHW_GKCZYX_NGKDHW)
{
auto params = test::conv_test_shapes::get_3d_small();
// Modify to be grouped (G=2)
params.G_ = 2;
params.C_ = 16; // 8 per group
params.K_ = 16; // 8 per group
bool result = test::test_conv_gpu_ref<3,
half_t,
half_t,
half_t,
tensor_layout::convolution::NGCDHW,
tensor_layout::convolution::GKCZYX,
tensor_layout::convolution::NGKDHW>(
params, ConvKernelType::BackwardWeight);
EXPECT_TRUE(result);
}

View File

@@ -0,0 +1,222 @@
// Copyright (c) Advanced Micro Devices, Inc., or its affiliates.
// SPDX-License-Identifier: MIT
#include <gtest/gtest.h>
#include "gpu_reference_utils.hpp"
using namespace ck;
using ck::test::ConvKernelType;
TEST(GpuReferenceConvFwd, Conv2DFP16Small)
{
auto params = test::conv_test_shapes::get_2d_small();
bool result =
test::test_conv_gpu_ref<2, half_t, half_t, half_t>(params, ConvKernelType::Forward);
EXPECT_TRUE(result);
}
TEST(GpuReferenceConvFwd, Conv2DFP32Medium)
{
auto params = test::conv_test_shapes::get_2d_medium();
bool result = test::test_conv_gpu_ref<2, float, float, float>(params, ConvKernelType::Forward);
EXPECT_TRUE(result);
}
TEST(GpuReferenceConvFwd, Conv1DFP16)
{
auto params = test::conv_test_shapes::get_1d();
bool result =
test::test_conv_gpu_ref<1, half_t, half_t, half_t>(params, ConvKernelType::Forward);
EXPECT_TRUE(result);
}
TEST(GpuReferenceConvFwd, Conv3DFP16Small)
{
auto params = test::conv_test_shapes::get_3d_small();
bool result =
test::test_conv_gpu_ref<3, half_t, half_t, half_t>(params, ConvKernelType::Forward);
EXPECT_TRUE(result);
}
TEST(GpuReferenceConvFwd, Conv2DFP16Stride2)
{
auto params = test::conv_test_shapes::get_2d_stride2();
bool result =
test::test_conv_gpu_ref<2, half_t, half_t, half_t>(params, ConvKernelType::Forward);
EXPECT_TRUE(result);
}
TEST(GpuReferenceConvFwd, Conv2DFP16GroupedG2)
{
auto params = test::conv_test_shapes::get_2d_grouped_g2();
bool result =
test::test_conv_gpu_ref<2, half_t, half_t, half_t>(params, ConvKernelType::Forward);
EXPECT_TRUE(result);
}
TEST(GpuReferenceConvFwd, Conv2DFP32GroupedG4)
{
auto params = test::conv_test_shapes::get_2d_grouped_g4();
bool result = test::test_conv_gpu_ref<2, float, float, float>(params, ConvKernelType::Forward);
EXPECT_TRUE(result);
}
TEST(GpuReferenceConvFwd, Conv2DFP32GroupedNHWGC_GKYXC_NHWGK)
{
auto params = test::conv_test_shapes::get_2d_grouped_g2();
bool result =
test::test_conv_gpu_ref<2,
float,
float,
float,
tensor_layout::convolution::NHWGC,
tensor_layout::convolution::GKYXC,
tensor_layout::convolution::NHWGK>(params, ConvKernelType::Forward);
EXPECT_TRUE(result);
}
TEST(GpuReferenceConvFwd, Conv2DFP16GroupedNHWGC_GKYXC_NHWGK)
{
auto params = test::conv_test_shapes::get_2d_grouped_g2();
bool result =
test::test_conv_gpu_ref<2,
half_t,
half_t,
half_t,
tensor_layout::convolution::NHWGC,
tensor_layout::convolution::GKYXC,
tensor_layout::convolution::NHWGK>(params, ConvKernelType::Forward);
EXPECT_TRUE(result);
}
TEST(GpuReferenceConvFwd, Conv2DFP32GroupedNGCHW_GKYXC_NGKHW)
{
auto params = test::conv_test_shapes::get_2d_grouped_g2();
bool result =
test::test_conv_gpu_ref<2,
float,
float,
float,
tensor_layout::convolution::NGCHW,
tensor_layout::convolution::GKYXC,
tensor_layout::convolution::NGKHW>(params, ConvKernelType::Forward);
EXPECT_TRUE(result);
}
TEST(GpuReferenceConvFwd, Conv2DFP16GroupedNGCHW_GKYXC_NGKHW)
{
auto params = test::conv_test_shapes::get_2d_grouped_g2();
bool result =
test::test_conv_gpu_ref<2,
half_t,
half_t,
half_t,
tensor_layout::convolution::NGCHW,
tensor_layout::convolution::GKYXC,
tensor_layout::convolution::NGKHW>(params, ConvKernelType::Forward);
EXPECT_TRUE(result);
}
TEST(GpuReferenceConvFwd, Conv2DFP32GroupedNGCHW_GKCYX_NGKHW)
{
auto params = test::conv_test_shapes::get_2d_grouped_g2();
bool result =
test::test_conv_gpu_ref<2,
float,
float,
float,
tensor_layout::convolution::NGCHW,
tensor_layout::convolution::GKCYX,
tensor_layout::convolution::NGKHW>(params, ConvKernelType::Forward);
EXPECT_TRUE(result);
}
TEST(GpuReferenceConvFwd, Conv2DFP16GroupedNGCHW_GKCYX_NGKHW)
{
auto params = test::conv_test_shapes::get_2d_grouped_g2();
bool result =
test::test_conv_gpu_ref<2,
half_t,
half_t,
half_t,
tensor_layout::convolution::NGCHW,
tensor_layout::convolution::GKCYX,
tensor_layout::convolution::NGKHW>(params, ConvKernelType::Forward);
EXPECT_TRUE(result);
}
TEST(GpuReferenceConvFwd, Conv3DFP32GroupedNDHWGC_GKZYXC_NDHWGK)
{
auto params = test::conv_test_shapes::get_3d_small();
// Modify to be grouped (G=2)
params.G_ = 2;
params.C_ = 16; // 8 per group
params.K_ = 16; // 8 per group
bool result = test::test_conv_gpu_ref<3,
float,
float,
float,
tensor_layout::convolution::NDHWGC,
tensor_layout::convolution::GKZYXC,
tensor_layout::convolution::NDHWGK>(
params, ConvKernelType::Forward);
EXPECT_TRUE(result);
}
TEST(GpuReferenceConvFwd, Conv3DFP16GroupedNDHWGC_GKZYXC_NDHWGK)
{
auto params = test::conv_test_shapes::get_3d_small();
// Modify to be grouped (G=2)
params.G_ = 2;
params.C_ = 16; // 8 per group
params.K_ = 16; // 8 per group
bool result = test::test_conv_gpu_ref<3,
half_t,
half_t,
half_t,
tensor_layout::convolution::NDHWGC,
tensor_layout::convolution::GKZYXC,
tensor_layout::convolution::NDHWGK>(
params, ConvKernelType::Forward);
EXPECT_TRUE(result);
}
TEST(GpuReferenceConvFwd, Conv3DFP32GroupedNGCDHW_GKCZYX_NGKDHW)
{
auto params = test::conv_test_shapes::get_3d_small();
// Modify to be grouped (G=2)
params.G_ = 2;
params.C_ = 16; // 8 per group
params.K_ = 16; // 8 per group
bool result = test::test_conv_gpu_ref<3,
float,
float,
float,
tensor_layout::convolution::NGCDHW,
tensor_layout::convolution::GKCZYX,
tensor_layout::convolution::NGKDHW>(
params, ConvKernelType::Forward);
EXPECT_TRUE(result);
}
TEST(GpuReferenceConvFwd, Conv3DFP16GroupedNGCDHW_GKCZYX_NGKDHW)
{
auto params = test::conv_test_shapes::get_3d_small();
// Modify to be grouped (G=2)
params.G_ = 2;
params.C_ = 16; // 8 per group
params.K_ = 16; // 8 per group
bool result = test::test_conv_gpu_ref<3,
half_t,
half_t,
half_t,
tensor_layout::convolution::NGCDHW,
tensor_layout::convolution::GKCZYX,
tensor_layout::convolution::NGKDHW>(
params, ConvKernelType::Forward);
EXPECT_TRUE(result);
}

View File

@@ -73,7 +73,7 @@ bool RunConvBwdDataTest(const ck::utils::conv::ConvParam& param, ck::index_t spl
InLayout,
DataType,
DataType,
DataType>(true, // do_verification
DataType>(2, // do_verification
1, // init_method
false, // do_log
false, // time_kernel

View File

@@ -47,7 +47,7 @@ class TestGroupedConvndBwdDataXdl : public ::testing::Test
DataType,
DataType,
DataType>(
true, // do_verification
2, // do_verification
1, // init_method: integer value
false, // do_log
false, // time_kernel

View File

@@ -73,7 +73,7 @@ class TestGroupedConvndBwdWeight : public ::testing::Test
InDataType,
WeiDataType,
OutDataType>(
true, // do_verification
2, // do_verification
1, // init_method: integer value
false, // do_log
false, // time_kernel

View File

@@ -80,7 +80,7 @@ bool RunConvBwdWeightTest(const ck::utils::conv::ConvParam& param, ck::index_t s
InDataType,
WeiDataType,
OutDataType>(
true, // do_verification
2, // do_verification
1, // init_method
false, // do_log
false, // time_kernel

View File

@@ -46,7 +46,7 @@ class TestGroupedConvndFwd : public ::testing::Test
DataType,
DataType,
IndexType>(
true, // do_verification
2, // do_verification
1, // init_method: integer value
false, // do_log
false, // time_kernel

View File

@@ -77,7 +77,7 @@ bool RunConvTest(const ck::utils::conv::ConvParam& param)
DataType,
DataType,
DataType,
IndexType>(true, // do_verification
IndexType>(2, // do_verification
1, // init_method
false, // do_log
false, // time_kernel