ck-builder: tensor input/output reflection (#3536)

This adds some utilities to automatically generate UniqueInputs,
UniqueOutputs, alloc_inputs, alloc_outputs, and validate, based
on a Inputs::reflect() and Outputs::reflect().
This commit is contained in:
Robin Voetter
2026-01-12 09:45:53 +01:00
committed by GitHub
parent 32408c8bc0
commit b352a68606
8 changed files with 299 additions and 102 deletions

View File

@@ -7,6 +7,7 @@
#include "ck_tile/builder/factory/helpers/ck/conv_tensor_layout.hpp"
#include "ck_tile/builder/factory/helpers/ck/conv_elementwise_op.hpp"
#include "ck_tile/builder/testing/testing.hpp"
#include "ck_tile/builder/testing/testing_reflect.hpp"
#include "ck_tile/builder/testing/filter_extent.hpp"
#include "ck_tile/builder/testing/tensor_buffer.hpp"
#include "ck_tile/builder/testing/tensor_initialization.hpp"
@@ -182,6 +183,12 @@ struct Inputs<SIGNATURE>
{
void* input;
void* weight;
static void reflect(const Args<SIGNATURE>& args, const auto& inspect)
{
inspect("input", args.make_input_descriptor(), &Inputs<SIGNATURE>::input);
inspect("weight", args.make_weight_descriptor(), &Inputs<SIGNATURE>::weight);
}
};
/// @brief `Outputs` specialization for forward convolution.
@@ -194,68 +201,13 @@ template <auto SIGNATURE>
struct Outputs<SIGNATURE>
{
void* output;
};
/// @brief `UniqueInputs` specialization for forward convolution.
///
/// @tparam SIGNATURE Forward convolution signature.
///
/// @see UniqueInputs
/// @see ValidUniqueInputs
template <auto SIGNATURE>
requires ValidConvSignature<SIGNATURE> && ConvDirectionIsForward<SIGNATURE>
struct UniqueInputs<SIGNATURE>
{
DeviceBuffer input_buf;
DeviceBuffer weight_buf;
/// @see ValidUniqueInputs
Inputs<SIGNATURE> get()
static void reflect(const Args<SIGNATURE>& args, const auto& inspect)
{
return {
.input = input_buf.get(),
.weight = weight_buf.get(),
};
inspect("output", args.make_output_descriptor(), &Outputs<SIGNATURE>::output);
}
};
/// @brief `UniqueOutputs` specialization for forward convolution.
///
/// @tparam SIGNATURE Forward convolution signature.
///
/// @see UniqueOutputs
/// @see ValidUniqueOutputs
template <auto SIGNATURE>
requires ValidConvSignature<SIGNATURE> && ConvDirectionIsForward<SIGNATURE>
struct UniqueOutputs<SIGNATURE>
{
DeviceBuffer output_buf;
/// @see ValidUniqueOutputs
Outputs<SIGNATURE> get()
{
return {
.output = output_buf.get(),
};
}
};
/// @brief `alloc_inputs()` specialization for forward convolution.
///
/// @tparam SIGNATURE Forward convolution signature.
///
/// @see alloc_inputs()
template <auto SIGNATURE>
requires ValidConvSignature<SIGNATURE> && ConvDirectionIsForward<SIGNATURE> &&
ValidUniqueInputs<SIGNATURE>
UniqueInputs<SIGNATURE> alloc_inputs(const Args<SIGNATURE>& args)
{
return {
.input_buf = alloc_tensor_buffer(args.make_input_descriptor()),
.weight_buf = alloc_tensor_buffer(args.make_weight_descriptor()),
};
}
/// @brief `init_inputs()` specialization for forward convolution.
///
/// @tparam SIGNATURE Forward convolution signature.
@@ -269,34 +221,4 @@ void init_inputs(const Args<SIGNATURE>& args, Inputs<SIGNATURE> inputs)
init_tensor_buffer_uniform_fp(inputs.weight, args.make_weight_descriptor(), -2.0f, 2.0f);
}
/// @brief `alloc_outputs()` specialization for forward convolution.
///
/// @tparam SIGNATURE Forward convolution signature.
///
/// @see alloc_outputs()
template <auto SIGNATURE>
requires ValidConvSignature<SIGNATURE> && ConvDirectionIsForward<SIGNATURE> &&
ValidUniqueOutputs<SIGNATURE>
UniqueOutputs<SIGNATURE> alloc_outputs(const Args<SIGNATURE>& args)
{
return {
.output_buf = alloc_tensor_buffer(args.make_output_descriptor()),
};
}
/// @brief `validate()` specialization for forward convolution.
///
/// @tparam SIGNATURE Forward convolution signature.
///
/// @see validate()
template <auto SIGNATURE>
requires ValidConvSignature<SIGNATURE> && ConvDirectionIsForward<SIGNATURE>
ValidationReport
validate(const Args<SIGNATURE>& args, Outputs<SIGNATURE> actual, Outputs<SIGNATURE> expected)
{
ValidationReport report;
report.check("output", args.make_output_descriptor(), actual.output, expected.output);
return report;
}
} // namespace ck_tile::builder::test

View File

@@ -81,4 +81,15 @@ inline DeviceBuffer alloc_buffer(size_t size)
return DeviceBuffer(d_buf);
}
/// @brief "Align" an offset to a multiple of a particular alignment.
///
/// Returns `addr` aligned to the next multiple of `alignment`.
///
/// @param addr The address to align.
/// @param alignment The alignment.
inline size_t align_fwd(size_t addr, size_t alignment)
{
return addr % alignment == 0 ? addr : addr - addr % alignment + alignment;
}
} // namespace ck_tile::builder::test

View File

@@ -5,6 +5,8 @@
#include <concepts>
#include "ck_tile/builder/testing/tensor_descriptor.hpp"
#include "ck_tile/builder/testing/tensor_buffer.hpp"
#include "ck_tile/builder/testing/validation.hpp"
/// This file is the main header for the CK-Builder testing system. A high-level
@@ -132,8 +134,8 @@ struct Outputs;
/// be created using `alloc_inputs()` and that an instance of the corresponding
/// `Inputs` structure can be obtained using `.get()`.
///
/// @note The easiest way to implement this type is to use the `DeviceBuffer`
/// type to allocate individual device buffers for each input tensor.
/// @note A default implementation is provided for this type if `Inputs`
/// supports `TensorReflectable`.
///
/// @tparam SIGNATURE The signature to specialize the structure for.
///
@@ -151,8 +153,8 @@ struct UniqueInputs;
/// be created using `alloc_outputs()` and that an instance of the corresponding
/// `Outputs` structure can be obtained using `.get()`.
///
/// @note The easiest way to implement this type is to use the `DeviceBuffer`
/// type to allocate individual device buffers for each output tensor.
/// @note A default implementation is provided for this type if `Outputs`
/// supports `TensorReflectable`.
///
/// @tparam SIGNATURE The signature to specialize the structure for.
///
@@ -197,6 +199,12 @@ concept ValidUniqueOutputs = requires(UniqueOutputs<SIGNATURE>& inputs) {
/// amount of memory required and then allocate it on the device, for example
/// using `alloc_buffer` or `alloc_tensor_buffer`.
///
/// @note This function is explicitly deleted to generate compile errors
/// for missing implementations.
///
/// @note A default implementation is provided for this function if `Inputs`
/// supports `TensorReflectable`.
///
/// @tparam SIGNATURE The signature to specialize the structure for.
///
/// @param args The run-time arguments of the operation.
@@ -207,22 +215,22 @@ concept ValidUniqueOutputs = requires(UniqueOutputs<SIGNATURE>& inputs) {
/// @see alloc_tensor_buffer()
template <auto SIGNATURE>
requires ValidUniqueInputs<SIGNATURE>
UniqueInputs<SIGNATURE> alloc_inputs(const Args<SIGNATURE>& args);
UniqueInputs<SIGNATURE> alloc_inputs(const Args<SIGNATURE>& args) = delete;
/// @brief Allocate inputs corresponding to a signature.
/// @brief Initialize inputs corresponding to a signature.
///
/// The `init_inputs()` function is used to initialize pseudo-random data
/// to the tensors specified in the Inputs structure. Implementors should
/// fill each of the tensors in `inputs` with appropriate random data.
///
/// @note This function is explicitly deleted to generate compile errors
/// for missing implementations.
///
/// @tparam SIGNATURE the signature to specialize the structure for.
///
/// @param args The run-time arguments of the operation.
/// @param inputs The operation inputs to initialize with random data.
///
/// @note This function is explicitly deleted to generate compile errors
/// for missing implementations.
///
/// @see Inputs
/// @see tensor_initialization
template <auto SIGNATURE>
@@ -235,13 +243,16 @@ void init_inputs(const Args<SIGNATURE>& args, Inputs<SIGNATURE> inputs) = delete
/// amount of memory required and then allocate it on the device, for example
/// using `alloc_buffer` or `alloc_tensor_buffer`.
///
/// @note This function is explicitly deleted to generate compile errors
/// for missing implementations.
///
/// @note A default implementation is provided for this function if `Outputs`
/// supports `TensorReflectable`.
///
/// @tparam SIGNATURE The signature to specialize the structure for.
///
/// @param args The run-time arguments of the operation.
///
/// @note This function is explicitly deleted to generate compile errors
/// for missing implementations.
///
/// @see Outputs
/// @see UniqueOutputs
/// @see alloc_buffer()
@@ -262,15 +273,15 @@ UniqueInputs<SIGNATURE> alloc_outputs(const Args<SIGNATURE>& args) = delete;
/// were incorrect, and where (a subset of) those elements are located within
/// the tensor. See `ValidationReport` for more information about the report.
///
/// @note This function is explicitly deleted to generate compile errors
/// for missing implementations.
///
/// @tparam SIGNATURE The signature to specialize the structure for.
///
/// @param args The run-time arguments of the operation.
/// @param actual The actual results, the results of the operation to-be-tested.
/// @param expected The expected results, the results of the reference implementation.
///
/// @note This function is explicitly deleted to generate compile errors
/// for missing implementations.
///
/// @see ValidationReport
template <auto SIGNATURE>
ValidationReport validate(const Args<SIGNATURE>& args,

View File

@@ -0,0 +1,199 @@
// Copyright (c) Advanced Micro Devices, Inc., or its affiliates.
// SPDX-License-Identifier: MIT
#pragma once
#include <string_view>
/// testing.hpp requires developers of a type of SIGNATURE to implement
/// quite a lot of functionality for each SIGNATURE. For example, next
/// to `Args`, `Inputs`, `Outputs`, `run`, they also have to define
/// `UniqueInputs`, `UniqueOutputs`, `alloc_inputs`, `alloc_outputs`,
/// and `validate`. The implementation of these latter few functions
/// is usually quite straight forward and adds a bunch of copy-paste
/// overhead. The functionality in this file offers an alternative
/// route: By implementing some reflection functionality in `Inputs`
/// and `Outputs`, we can automatically derive most of the functionality.
namespace ck_tile::builder::test {
/// @brief Check whether an `Input` or `Output` struct can be reflected.
///
/// In order to avoid having to manually redefine a bunch of types related to
/// each `Inputs`/`Outputs` structure, those structures can also provide some
/// "reflection" functionality. To this end, they should implement
/// `static void reflect(const Args<SIGNATURE> args&, auto inspect)`, where `inspect`
/// is called with information about each field in the struct. In more detail,
/// the signature of the `inspect` function is as follows:
///
/// void inspect(
/// // A human-readable name for the tensor
/// std::string_view name,
/// // Descriptor for the tensor in memory, usually obtained via `args`.
/// const TensorDescriptor<DT, RANK>& desc,
/// // Member pointer to a field of `T`, which is a GPU-memory pointer
/// // to the relevant tensor memory.
/// void* T::* ptr);
///
/// Here, `T` is `Inputs<SIGNATURE>` or `Outputs<SIGNATURE>`.
///
/// @see Inputs
/// @see Outputs
template <typename T, auto SIGNATURE>
concept TensorReflectable = requires(const Args<SIGNATURE>& args) {
{
T::reflect(args,
[]([[maybe_unused]] std::string_view name,
// Note: This will be a TensorDescriptor<DT, RANK>, but the actual
// DT and RANK may differ depending on member.
[[maybe_unused]] const auto& desc,
[[maybe_unused]] void* T::*ptr) {})
};
};
namespace detail {
/// The default alignment between tensors allocated separately
/// by `UniqueTensors`. This should be large enough to accomodate
/// any type. hipMalloc returns an alignment of 256 by default.
constexpr size_t TENSOR_ALIGNMENT = 256;
/// @brief Common type for automatically managing memory of sets of tensors.
///
/// This type implements the automatic memory management logic for `Inputs` and
/// `Outputs` that support reflection.
///
/// @tparam SIGNATURE The signature to specialize the structure for.
/// @tparam Tensors The `Inputs` or `Outputs` type corresponding to `SIGNATURE`.
template <auto SIGNATURE, typename Tensors>
requires TensorReflectable<Tensors, SIGNATURE>
struct UniqueTensors
{
/// @brief Allocate tensors.
///
/// This function computes the total size of memory to allocate according to
/// the tensors in `args`, and then allocates it as a continuous buffer.
///
/// @param args The run-time arguments of the operation.
explicit UniqueTensors(const Args<SIGNATURE>& args)
{
// First compute the total size of all tensors combined
size_t total_size = 0;
Tensors::reflect(args,
[&, this]([[maybe_unused]] std::string_view name,
const auto& desc,
[[maybe_unused]] void* Tensors::*ptr) {
total_size = align_fwd(total_size, TENSOR_ALIGNMENT);
total_size += desc.get_element_space_size_in_bytes();
});
data_ = alloc_buffer(total_size);
// Now assign the pointers based on the same offsets that
// we computed in the first loop.
size_t offset = 0;
Tensors::reflect(args,
[&, this]([[maybe_unused]] std::string_view name,
const auto& desc,
[[maybe_unused]] void* Tensors::*ptr) {
offset = align_fwd(offset, TENSOR_ALIGNMENT);
tensors_.*ptr = data_.get() + offset;
offset += desc.get_element_space_size_in_bytes();
});
}
/// @brief Return raw `Inputs` or `Outputs` type.
///
/// @see ValidUniqueInputs
/// @see ValidUniqueOutputs
Tensors get() const { return tensors_; }
private:
/// Owning pointer of input memory
DeviceBuffer data_;
/// Struct with pointers to each tensor. Stored here so that we
/// don't need to keep recomputing it.
Tensors tensors_;
};
} // namespace detail
/// @brief Implementation of `UniqueInputs` for `Inputs` that support reflection.
///
/// @tparam SIGNATURE The signature to specialize for.
///
/// @see UniqueInputs
template <auto SIGNATURE>
requires TensorReflectable<Inputs<SIGNATURE>, SIGNATURE>
struct UniqueInputs<SIGNATURE> : detail::UniqueTensors<SIGNATURE, Inputs<SIGNATURE>>
{
using detail::UniqueTensors<SIGNATURE, Inputs<SIGNATURE>>::UniqueTensors;
};
/// @brief Implementation of `UniqueOutputs` for `Outputs` that support reflection.
///
/// @tparam SIGNATURE The signature to specialize for.
///
/// @see UniqueOutputs
template <auto SIGNATURE>
requires TensorReflectable<Outputs<SIGNATURE>, SIGNATURE>
struct UniqueOutputs<SIGNATURE> : detail::UniqueTensors<SIGNATURE, Outputs<SIGNATURE>>
{
using detail::UniqueTensors<SIGNATURE, Outputs<SIGNATURE>>::UniqueTensors;
};
/// @brief Implementation of `alloc_inputs` for `Inputs` that support reflection.
///
/// @tparam SIGNATURE The signature to specialize for.
///
/// @param args The run-time arguments of the operation.
///
/// @see alloc_inputs
template <auto SIGNATURE>
requires TensorReflectable<Inputs<SIGNATURE>, SIGNATURE>
UniqueInputs<SIGNATURE> alloc_inputs(const Args<SIGNATURE>& args)
{
static_assert(ValidUniqueInputs<SIGNATURE>, "sanity check");
return UniqueInputs<SIGNATURE>(args);
}
/// @brief Implementation of `alloc_outputs` for `Outputs` that support reflection.
///
/// @tparam SIGNATURE The signature to specialize for.
///
/// @param args The run-time arguments of the operation.
///
/// @see alloc_outputs
template <auto SIGNATURE>
requires TensorReflectable<Outputs<SIGNATURE>, SIGNATURE>
UniqueOutputs<SIGNATURE> alloc_outputs(const Args<SIGNATURE>& args)
{
static_assert(ValidUniqueOutputs<SIGNATURE>, "sanity check");
return UniqueOutputs<SIGNATURE>(args);
}
/// @brief Implementation of `validate` for `Outputs` that support reflection.
///
/// @tparam SIGNATURE The signature to specialize for.
///
/// @param args The run-time arguments of the operation.
/// @param actual The actual results, the results of the operation to-be-tested.
/// @param expected The expected results, the results of the reference implementation.
///
/// @see alloc_outputs
template <auto SIGNATURE>
requires TensorReflectable<Outputs<SIGNATURE>, SIGNATURE>
ValidationReport
validate(const Args<SIGNATURE>& args, Outputs<SIGNATURE> actual, Outputs<SIGNATURE> expected)
{
ValidationReport report;
Outputs<SIGNATURE>::reflect(
args, [&](std::string_view name, const auto& desc, void* Outputs<SIGNATURE>::*ptr) {
report.check(name, desc, actual.*ptr, expected.*ptr);
});
return report;
}
} // namespace ck_tile::builder::test

View File

@@ -84,11 +84,13 @@ add_ck_builder_test(test_ckb_conv_builder
unit_error.cpp
unit_validation.cpp
unit_debug.cpp
unit_conv_fwd_testing.cpp
unit_conv_elementwise_op.cpp
unit_conv_tensor_layout.cpp
unit_conv_tensor_type.cpp
unit_conv_thread_block.cpp
unit_conv_tuning_params.cpp)
target_link_libraries(test_ckb_conv_builder PRIVATE utility)
# Tests the inline diff utility used for comparing strings in tests assertions
add_ck_builder_test(test_ckb_inline_diff test_inline_diff.cpp)

View File

@@ -4,6 +4,7 @@
#include "impl/conv_signature_types.hpp"
#include "testing_utils.hpp"
#include "ck_tile/builder/testing/conv_fwd.hpp"
#include "ck_tile/builder/testing/tensor_foreach.hpp"
#include <gtest/gtest.h>
#include <gmock/gmock.h>
#include <vector>
@@ -12,6 +13,7 @@ namespace ckb = ck_tile::builder;
namespace ckt = ck_tile::builder::test;
using ::testing::ElementsAreArray;
using ::testing::Eq;
using ::testing::NotNull;
constexpr auto SIGNATURE =
@@ -57,6 +59,8 @@ using UniqueOutputs = ckt::UniqueOutputs<SIGNATURE>;
static_assert(ckt::ValidUniqueInputs<SIGNATURE>);
static_assert(ckt::ValidUniqueOutputs<SIGNATURE>);
static_assert(ckt::TensorReflectable<Inputs, SIGNATURE>);
static_assert(ckt::TensorReflectable<Outputs, SIGNATURE>);
TEST(ConvFwdTesting, MakeDescriptors)
{
@@ -81,3 +85,41 @@ TEST(ConvFwdTesting, Alloc)
EXPECT_THAT(inputs.get().weight, NotNull());
EXPECT_THAT(outputs.get().output, NotNull());
}
TEST(ConvFwdTesting, Validate)
{
auto a = alloc_outputs(ARGS);
auto b = alloc_outputs(ARGS);
// Positive test
{
ckt::Outputs<SIGNATURE>::reflect(
ARGS,
[&]([[maybe_unused]] std::string_view name,
const auto& desc,
void* ckt::Outputs<SIGNATURE>::*ptr) {
ckt::clear_tensor_buffer(desc, a.get().*ptr, ck::bhalf_t{123});
ckt::clear_tensor_buffer(desc, b.get().*ptr, ck::bhalf_t{123});
});
const auto report = ckt::validate(ARGS, a.get(), b.get());
EXPECT_THAT(report.get_errors().size(), Eq(0));
}
// Negative test
{
size_t field_count = 0;
ckt::Outputs<SIGNATURE>::reflect(
ARGS,
[&]([[maybe_unused]] std::string_view name,
const auto& desc,
void* ckt::Outputs<SIGNATURE>::*ptr) {
++field_count;
ckt::clear_tensor_buffer(desc, a.get().*ptr, ck::bhalf_t{2});
ckt::clear_tensor_buffer(desc, b.get().*ptr, ck::bhalf_t{1});
});
const auto report = ckt::validate(ARGS, a.get(), b.get());
EXPECT_THAT(report.get_errors().size(), Eq(field_count));
}
}

View File

@@ -88,3 +88,11 @@ TEST(DeviceBuffer, AllocTensorBuffer)
EXPECT_THAT(hipMemset(buffer.get(), 0xFF, descriptor.get_element_space_size_in_bytes()),
HipSuccess());
}
TEST(DeviceBuffer, AlignForward)
{
EXPECT_THAT(ckt::align_fwd(24, 8), Eq(24));
EXPECT_THAT(ckt::align_fwd(25, 8), Eq(32));
EXPECT_THAT(ckt::align_fwd(0xd7c563, 0x1000), Eq(0xd7d000));
EXPECT_THAT(ckt::align_fwd(19561, 23), Eq(19573));
}

View File

@@ -204,6 +204,7 @@ struct DummySignature
constexpr DummySignature DUMMY_SIGNATURE = {};
namespace ck_tile::builder::test {
template <>
struct Args<DUMMY_SIGNATURE>
{
@@ -225,6 +226,7 @@ struct Outputs<DUMMY_SIGNATURE>
void* b;
};
// Explicitly implement validate for this type to test that that works.
template <>
ValidationReport validate<DUMMY_SIGNATURE>(const Args<DUMMY_SIGNATURE>& args,
Outputs<DUMMY_SIGNATURE> actual,