[CK_BUILDER] convolution testing (#3267)

* Add README.md for testing

* Add tensor_memory_manager.

* ck-builder: tensor memory manager rebase fixes

This fixes some issues caused by the API being changed recently.
Also, this streamlines the ckt namespace to always be ck_tile::builder::test,
as this is already being used by other tests

Really, this commit should be squashed into the previous,
but I'm keeping it separate for brevity.

* ck-builder: test arguments initial prototype

* ck-builder: test system initial prototype

* ck-builder: fix non-standardized copyright comments

* ck-builder: new prototype

* ck-builder: group testing inputs/outputs into a separate structure

This is basically the return of the tensor memory manager after all,
except that the design is more closely tied to the actual operation.
Using a struct allows us to add additional input/output tensors
without breaking code (by defaulting those new parameters). Note
that the tensors are split into a separate inputs/outputs because we
usually want to allocate the output _twice_: once for the real
computation and once for the reference computation.

* ck-builder: simplify prototype naming; start docs

* ck-builder: update testing readme

* ck-builder: testing documentation

* ck-builder: HipStatusMatcher

This matcher can be used to check HIP status codes and provide
nice and readable error messages.

* ck-builder: tensor_buffer.hpp tests

* ck-builder: conv_fwd.hpp tests

* ck-builder: add example end-to-end test in conv fwd 2d fp16

* ck-builder: simplify extent usage

* ck-builder: update testing doc

* ck-builder: skip end to end test on non-gfx9

* fix check_copyright_year interpreter

/bin/bash is not guaranteed to exist on Linux. Signed,
a NixOS user

* ck-builder: fix copyrights

* ck-builder: reduce conv fwd testing size

This test allocated 24GB of memory, too much for 16GB cards.

---------

Co-authored-by: John Shumway <jshumway@amd.com>
This commit is contained in:
Robin Voetter
2025-12-13 15:33:41 +01:00
committed by GitHub
parent 9707ddb444
commit 6219b12730
17 changed files with 1660 additions and 57 deletions

View File

@@ -78,24 +78,27 @@ add_ck_builder_test(test_ckb_conv_builder
test_fwd_instance_traits.cpp
test_bwd_data_instance_traits.cpp
test_instance_traits_util.cpp
unit_device_buffer.cpp
unit_tensor_descriptor.cpp
unit_conv_elementwise_op.cpp
unit_conv_tensor_layout.cpp
unit_conv_tensor_type.cpp
unit_conv_thread_block.cpp
unit_conv_tuning_params.cpp)
# Tests the inline diff utility used for comparing strings in tests assertions
add_ck_builder_test(test_ckb_inline_diff test_inline_diff.cpp)
unit_conv_tuning_params.cpp
unit_conv_fwd_testing.cpp)
target_link_libraries(test_ckb_conv_builder PRIVATE utility)
# Tests the inline diff utility used for comparing strings in tests assertions
add_ck_builder_test(test_ckb_inline_diff test_inline_diff.cpp)
# Tests convolution trait selection and configuration
add_ck_builder_test(test_ckb_conv_traits
conv/ck/test_conv_traits.cpp)
# Tests convolution problem description and parameter handling
add_ck_builder_test(test_ckb_conv_description
test_conv_description.cpp)
# Tests convolution trait selection and configuration
add_ck_builder_test(test_ckb_conv_traits
conv/ck/test_conv_traits.cpp)
# Tests convolution problem description and parameter handling
add_ck_builder_test(test_ckb_conv_description
test_conv_description.cpp)
################################################################################
# REGRESSION TESTS - Integration Tests (With Kernel Compilation)
################################################################################
@@ -134,8 +137,8 @@ add_ck_builder_test(test_ckb_build_fwd_instances
conv/ck/test_ckb_conv_fwd_3d_fp32.cpp
conv/ck_tile/test_ckb_conv_fwd_2d_fp16_v3.cpp
conv/ck_tile/test_ckb_conv_bwd_weight_2d_fp16_v3.cpp
conv/ck_tile/test_ckb_conv_bwd_data_2d_fp16_v3.cpp
)
conv/ck_tile/test_ckb_conv_bwd_data_2d_fp16_v3.cpp)
target_link_libraries(test_ckb_build_fwd_instances PRIVATE utility)
################################################################################

View File

@@ -4,46 +4,83 @@
#include "utils/ckb_conv_test_configs.hpp"
#include "utils/ckb_conv_test_utils.hpp"
#include "utils/conv_algorithm_type_utils.hpp"
#include "ck_tile/builder/testing/conv_fwd_ck.hpp"
#include "ck_tile/host/device_prop.hpp"
namespace {
namespace ckb = ck_tile::builder;
namespace ckt = ck_tile::builder::test;
namespace cku = ck_tile::builder::test_utils;
using namespace ck_tile::builder::test_utils;
constexpr auto SIGNATURE =
ckt::ConvSignature{.spatial_dim = 2,
.direction = ckb::ConvDirection::FORWARD,
.data_type = ckb::DataType::FP16,
.accumulation_data_type = ckb::DataType::FP32,
.input = {.config = {.layout = ckb::TensorLayout::GNHWC}},
.weight = {.config = {.layout = ckb::TensorLayout::GKYXC}},
.output = {.config = {.layout = ckb::TensorLayout::GNHWK}}};
TEST(FwdConvInstances,
Create_DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_Instance_2D_FP16_GNHWC)
constexpr auto ALGORITHM = cku::ConvAlgorithm_DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3{}
.with_thread_block(cku::FwdThreadBlock_256_256x256x32)
.with_gemm_config(cku::FwdGemmParams_Xdl_4x4_per_wave)
.with_transfer(cku::FwdTransfer_4x64x1)
.with_specializations(ckb::ConvFwdSpecialization::DEFAULT,
ckb::GemmSpecialization::MNKPadding)
.with_block_gemm(cku::BlockGemmDesc_v3_intrawave);
using Builder = ckb::ConvBuilder<SIGNATURE, ALGORITHM>;
using Instance = Builder::Instance;
TEST(Fwd2DFp16_CShufV3_GNHWC, Create)
{
using enum ck_tile::builder::ConvDirection;
using enum ck_tile::builder::DataType;
using enum ck_tile::builder::TensorLayout;
constexpr ConvSignature FwdConvSignature{.spatial_dim = 2,
.direction = FORWARD,
.data_type = FP16,
.accumulation_data_type = FP32,
.input = {.config = {.layout = GNHWC}},
.weight = {.config = {.layout = GKYXC}},
.output = {.config = {.layout = GNHWK}}};
constexpr auto FwdConvAlgorithm =
ConvAlgorithm_DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3{}
.with_thread_block(FwdThreadBlock_256_256x256x32)
.with_gemm_config(FwdGemmParams_Xdl_4x4_per_wave)
.with_transfer(FwdTransfer_4x64x1)
.with_specializations(ConvFwdSpecialization::FILTER_1X1_PAD0,
GemmSpecialization::MNKPadding)
.with_block_gemm(BlockGemmDesc_v3_intrawave);
using Builder = ConvBuilder<FwdConvSignature, FwdConvAlgorithm>;
const auto expected_transfer_parameters = to_string(FwdConvAlgorithm);
run_test<Builder>({"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3",
expected_transfer_parameters,
"Filter1x1Pad0",
"Intrawave",
"v3",
"GNHWC,GKYXC,EmptyTuple,GNHWK",
"PassThrough,PassThrough,PassThrough",
"MNKPadding"});
const auto expected_transfer_parameters = to_string(ALGORITHM);
cku::run_test<Builder>({"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3",
expected_transfer_parameters,
"Default",
"Intrawave",
"v3",
"GNHWC,GKYXC,EmptyTuple,GNHWK",
"PassThrough,PassThrough,PassThrough",
"MNKPadding"});
}
} // namespace
TEST(Fwd2DFp16_CShufV3_GNHWC, EndToEnd)
{
if(!ck_tile::get_device_name().starts_with("gfx9"))
{
GTEST_SKIP() << "unsupported architecture";
}
ckt::Args<SIGNATURE> args = {
.lengths =
{
.batch_size = 16,
.groups = 1,
.input_channels = 32,
.output_channels = 48,
.image =
{
.width = 56,
.height = 64,
},
.filter =
{
.width = 3,
.height = 5,
},
},
.filter_strides = {.width = 1, .height = 1},
.filter_dilation = {.width = 1, .height = 1},
.input_left_pad = {.width = 0, .height = 0},
.input_right_pad = {.width = 0, .height = 0},
.a_elementwise_op = {},
.b_elementwise_op = {},
.cde_elementwise_op = {},
};
auto inputs = alloc_inputs(args);
auto outputs = alloc_outputs(args);
auto conv = Instance{};
ckt::run(conv, args, inputs.get(), outputs.get());
}

View File

@@ -5,6 +5,8 @@
#include "testing_utils.hpp"
using ck_tile::test::HipError;
using ck_tile::test::HipSuccess;
using ck_tile::test::InstanceMatcher;
using ck_tile::test::InstanceSet;
using ck_tile::test::StringEqWithDiff;
@@ -96,3 +98,12 @@ TEST(InstanceMatcher, ExplainMatchResult)
"Unexpected: 1\n"
"- python\n"));
}
TEST(HipStatusMatcher, Basic)
{
EXPECT_THAT(hipSuccess, HipSuccess());
EXPECT_THAT(hipErrorInvalidValue, HipError(hipErrorInvalidValue));
EXPECT_THAT(hipErrorInvalidValue, Not(HipSuccess()));
EXPECT_THAT(hipSuccess, Not(HipError(hipErrorInvalidValue)));
EXPECT_THAT(hipErrorOutOfMemory, Not(HipError(hipErrorInvalidValue)));
}

View File

@@ -11,6 +11,11 @@
#include <vector>
#include <algorithm>
std::ostream& operator<<(std::ostream& os, hipError_t status)
{
return os << hipGetErrorString(status);
}
namespace ck_tile::test {
// Wagner-Fischer Algorithm for Computing Edit Distance and Inline Diff
@@ -297,4 +302,41 @@ void InstanceMatcher::DescribeNegationTo(std::ostream* os) const
*os << "is not equal to " << expected_;
}
bool HipStatusMatcher::MatchAndExplain(hipError_t actual,
::testing::MatchResultListener* listener) const
{
(void)listener;
if(actual == expected_)
{
return true;
}
return false;
}
void HipStatusMatcher::DescribeTo(std::ostream* os) const { *os << hipGetErrorString(expected_); }
void HipStatusMatcher::DescribeNegationTo(std::ostream* os) const
{
if(expected_ == hipSuccess)
{
*os << "any error";
}
else
{
*os << "isn't equal to " << hipGetErrorString(expected_);
}
}
::testing::Matcher<hipError_t> HipSuccess()
{
return ::testing::MakeMatcher(new HipStatusMatcher(hipSuccess));
}
::testing::Matcher<hipError_t> HipError(hipError_t error)
{
return ::testing::MakeMatcher(new HipStatusMatcher(error));
}
} // namespace ck_tile::test

View File

@@ -11,6 +11,16 @@
#include <vector>
#include <array>
/// @brief ostream-overload for hipError
///
/// Google Test likes to print errors to ostream, and this provides integration
/// with that. Since we only expect to use this with CK-Builder's own tests,
/// providing this implementation seems not problematic, but if it starts to
/// clash with another implementation then we will need to provide this
/// implementation another way. Unfortunately Google Test does not have a
/// dedicated function to override to provide printing support.
std::ostream& operator<<(std::ostream& os, hipError_t status);
namespace ck_tile::test {
static bool isTerminalOutput() { return isatty(fileno(stdout)) || isatty(fileno(stderr)); }
@@ -109,4 +119,35 @@ struct InstanceMatcher : public ::testing::MatcherInterface<InstanceSet>
::testing::Matcher<InstanceSet> InstancesMatch(const InstanceSet& expected);
/// @brief Google Test hipError_t matcher.
///
/// This is a custom Google Test matcher implementation which can be used to
/// compare HIP status codes. Use `HipSuccess()` or `HipError()` to obtain
/// an instance.
///
/// @see HipSuccess
/// @see HipError
/// @see ::testing::MatcherInterface
struct HipStatusMatcher : public ::testing::MatcherInterface<hipError_t>
{
HipStatusMatcher(hipError_t expected) : expected_(expected) {}
bool MatchAndExplain(hipError_t actual,
::testing::MatchResultListener* listener) const override;
void DescribeTo(std::ostream* os) const override;
void DescribeNegationTo(std::ostream* os) const override;
hipError_t expected_;
};
/// @brief Construct a Google Test matcher that checks that a HIP operation
/// was successful.
::testing::Matcher<hipError_t> HipSuccess();
/// @brief Construct a Google Test matcher that checks that a HIP operation
/// returned a particular error code.
///
/// @param error The error to expect.
::testing::Matcher<hipError_t> HipError(hipError_t error);
} // namespace ck_tile::test

View File

@@ -0,0 +1,83 @@
// Copyright (c) Advanced Micro Devices, Inc., or its affiliates.
// SPDX-License-Identifier: MIT
#include "impl/conv_signature_types.hpp"
#include "testing_utils.hpp"
#include "ck_tile/builder/testing/conv_fwd.hpp"
#include <gtest/gtest.h>
#include <gmock/gmock.h>
#include <vector>
namespace ckb = ck_tile::builder;
namespace ckt = ck_tile::builder::test;
using ::testing::ElementsAreArray;
using ::testing::NotNull;
constexpr auto SIGNATURE =
ckt::ConvSignature{.spatial_dim = 2,
.direction = ckb::ConvDirection::FORWARD,
.data_type = ckb::DataType::BF16,
.accumulation_data_type = ckb::DataType::FP32,
.input = {.config = {.layout = ckb::TensorLayout::NHWGC}},
.weight = {.config = {.layout = ckb::TensorLayout::GKYXC}},
.output = {.config = {.layout = ckb::TensorLayout::NHWGK}}};
constexpr ckt::Args<SIGNATURE> ARGS = {
.lengths =
{
.batch_size = 17,
.groups = 5,
.input_channels = 13,
.output_channels = 44,
.image =
{
.width = 99,
.height = 125,
},
.filter =
{
.width = 9,
.height = 4,
},
},
.filter_strides = {.width = 1, .height = 1},
.filter_dilation = {.width = 1, .height = 1},
.input_left_pad = {.width = 0, .height = 0},
.input_right_pad = {.width = 0, .height = 0},
.a_elementwise_op = {},
.b_elementwise_op = {},
.cde_elementwise_op = {},
};
using Inputs = ckt::Inputs<SIGNATURE>;
using Outputs = ckt::Outputs<SIGNATURE>;
using UniqueInputs = ckt::UniqueInputs<SIGNATURE>;
using UniqueOutputs = ckt::UniqueOutputs<SIGNATURE>;
static_assert(ckt::ValidUniqueInputs<SIGNATURE>);
static_assert(ckt::ValidUniqueOutputs<SIGNATURE>);
TEST(ConvFwdTesting, MakeDescriptors)
{
const auto get_lengths = [](const auto& descriptor) {
const auto lengths = descriptor.get_lengths();
// Google Test cannot print std::span, so turn it into a vector for
// legibility.
return std::vector(lengths.begin(), lengths.end());
};
EXPECT_THAT(get_lengths(ARGS.make_input_descriptor()), ElementsAreArray({5, 17, 13, 125, 99}));
EXPECT_THAT(get_lengths(ARGS.make_weight_descriptor()), ElementsAreArray({5, 44, 13, 4, 9}));
EXPECT_THAT(get_lengths(ARGS.make_output_descriptor()), ElementsAreArray({5, 17, 44, 122, 91}));
}
TEST(ConvFwdTesting, Alloc)
{
auto inputs = alloc_inputs(ARGS);
auto outputs = alloc_outputs(ARGS);
EXPECT_THAT(inputs.get().input, NotNull());
EXPECT_THAT(inputs.get().weight, NotNull());
EXPECT_THAT(outputs.get().output, NotNull());
}

View File

@@ -0,0 +1,81 @@
// Copyright (c) Advanced Micro Devices, Inc., or its affiliates.
// SPDX-License-Identifier: MIT
#include "ck_tile/builder/testing/tensor_buffer.hpp"
#include "testing_utils.hpp"
#include <gtest/gtest.h>
#include <gmock/gmock.h>
#include <vector>
namespace ckb = ck_tile::builder;
namespace ckt = ck_tile::builder::test;
using ck_tile::test::HipError;
using ck_tile::test::HipSuccess;
using ::testing::Eq;
using ::testing::IsNull;
using ::testing::NotNull;
using ::testing::Throws;
TEST(DeviceBuffer, DefaultToNull)
{
ckt::DeviceBuffer buffer;
EXPECT_THAT(buffer.get(), IsNull());
}
TEST(DeviceBuffer, AllocBuffer)
{
const auto size = 12345;
auto buffer = ckt::alloc_buffer(size);
// Pointer should be non-null
EXPECT_THAT(buffer.get(), NotNull());
// Actually, the pointer should be a device pointer
hipPointerAttribute_t attr;
EXPECT_THAT(hipPointerGetAttributes(&attr, buffer.get()), HipSuccess());
EXPECT_THAT(attr.devicePointer, NotNull());
EXPECT_THAT(attr.type, Eq(hipMemoryTypeDevice));
// Memory should be writable without error
EXPECT_THAT(hipMemset(buffer.get(), 0xFF, size), HipSuccess());
}
TEST(DeviceBuffer, AutoFree)
{
const auto size = 12345;
std::byte* ptr = nullptr;
{
auto buffer = ckt::alloc_buffer(size);
ptr = buffer.get();
}
// Trying to use a pointer after freeing should return en error in HIP.
EXPECT_THAT(hipMemset(ptr, 0xFF, size), HipError(hipErrorInvalidValue));
}
TEST(DeviceBuffer, ThrowsOnOom)
{
const auto size = size_t{1} << 60; // 1 exabyte
auto check = [] { auto buffer = ckt::alloc_buffer(size); };
EXPECT_THAT(check, Throws<ckt::OutOfDeviceMemoryError>());
}
TEST(DeviceBuffer, AllocTensorBuffer)
{
std::vector<size_t> lengths = {128, 128, 128};
std::vector<size_t> strides = {128 * 128, 128, 1};
ckt::TensorDescriptor<ckb::DataType::FP32> descriptor(lengths, strides);
auto buffer = ckt::alloc_tensor_buffer(descriptor);
// Pointer should be non-null
EXPECT_THAT(buffer.get(), NotNull());
// Memory should be writable without error
EXPECT_THAT(hipMemset(buffer.get(), 0xFF, descriptor.get_element_space_size_in_bytes()),
HipSuccess());
}

View File

@@ -0,0 +1,47 @@
// Copyright (c) Advanced Micro Devices, Inc., or its affiliates.
// SPDX-License-Identifier: MIT
#include "ck_tile/builder/testing/tensor_buffer.hpp"
#include "testing_utils.hpp"
#include <gtest/gtest.h>
#include <gmock/gmock.h>
#include <vector>
namespace ckb = ck_tile::builder;
namespace ckt = ck_tile::builder::test;
using ::testing::ElementsAreArray;
using ::testing::Ge;
TEST(TensorDescriptor, Basic)
{
constexpr auto dt = ckb::DataType::FP16;
std::vector<size_t> lengths = {123, 456, 789};
std::vector<size_t> strides = {456 * 789, 789, 1};
ckt::TensorDescriptor<dt> descriptor(lengths, strides);
EXPECT_THAT(descriptor.get_lengths(), ElementsAreArray(lengths));
EXPECT_THAT(descriptor.get_strides(), ElementsAreArray(strides));
}
TEST(TensorDescriptor, ComputeSize)
{
constexpr auto dt = ckb::DataType::FP32;
std::vector<size_t> lengths = {305, 130, 924};
std::vector<size_t> strides = {1000 * 1000, 1, 1000};
ckt::TensorDescriptor<dt> descriptor(lengths, strides);
// Compute the location of the last item in memory, then add one
// to get the minimum size.
size_t expected_size = 1;
for(size_t i = 0; i < lengths.size(); ++i)
{
expected_size += (lengths[i] - 1) * strides[i];
}
EXPECT_THAT(descriptor.get_element_space_size(), Ge(expected_size));
EXPECT_THAT(descriptor.get_element_space_size_in_bytes(),
Ge(expected_size * ckt::data_type_sizeof(dt)));
}