Files
composable_kernel/experimental/builder/include/ck_tile/builder/testing/validation.hpp
Robin Voetter e3884bbf05 [CK_BUILDER] Debug utilities (#3528)
* ck-builder: make toString to_string

We are using snake case for CK-Builder

* ck-builder: add debug.hpp with tensor descriptor printing function

This adds some initial functionality to debug.hpp, a header which will
be used to house some debug utilities.

* ck-builder: abstract nd-iteration

Abstracting this makes it easier to test, clearer, and allows us to
use it elsewhere (such as in debug.hpp soon)

* ck-builder: tensor printing

* ck-builder: rename INT32 to I32

This makes it more in line with the other data type definitions.
2026-01-08 10:14:13 +01:00

205 lines
8.3 KiB
C++

// Copyright (c) Advanced Micro Devices, Inc., or its affiliates.
// SPDX-License-Identifier: MIT
#pragma once
#include "ck_tile/builder/testing/error.hpp"
#include "ck_tile/builder/testing/tensor_buffer.hpp"
#include "ck_tile/builder/testing/tensor_foreach.hpp"
#include "ck_tile/builder/factory/helpers/ck/conv_tensor_type.hpp"
#include "ck/utility/type_convert.hpp"
#include <string_view>
#include <vector>
#include <algorithm>
#include <functional>
#include <bit>
/// This file implements functionality related to "validation", ie, functionality
/// to compare tensors. The functionality in this file should be testing-framework
/// agnostic, and it should NOT generate any error messages by itself. Instead,
/// all relevant information should be stored in the `ValidationReport` structure.
/// This structure should then be used to generate error messages, explainations,
/// etc, by the actual testing framework that the user has chosen.
namespace ck_tile::builder::test {
/// @brief Information about how a set of comparisons failed or succeeded.
///
/// This structure represents a "report" generated by comparing sets of tensors.
/// Its intended to be used as the result of `ckt::validate()`, where `check()`
/// is invoked for each of the output tensors of a particular device operation.
/// The test should be considered successful if _all_ of those checks passes,
/// which can inspected by asserting that `get_errors().size()` is 0.
struct ValidationReport
{
/// @brief Information related to a single tensor comparison.
///
/// This structure holds the information about the result of comparing
/// two particular tensors.
struct Case
{
/// The name of the tensor that was compared here, stored here for convenience
/// so that reporting any errors is easier.
std::string tensor_name;
/// The number of elements which were different between the two compared tensors.
uint64_t wrong_elements;
/// The total number of elements in each tensor.
uint64_t total_elements;
/// The number of elements which were bitwise 0.
uint64_t zero_elements;
/// @brief Check whether both the output and reference tensor were both all zeros.
///
/// If both tensors are all zero, it indicates either an incorrect testing setup
/// or an issue with the testing framework. For that reason we also consider that
/// a failure.
bool is_all_zero() const { return zero_elements == total_elements; }
/// @brief Return whether the check associated to this case was successful.
///
/// This function returns whether the check associated to this case was successful,
/// which is directly derived from checking whether the number of incorrect elements
/// was 0 AND whether the tensor was not all zero.
bool is_ok() const { return wrong_elements == 0 && !is_all_zero(); }
};
/// @brief Get comparison cases which were incorrect.
///
/// This function returns a vector of comparison cases that did not succeed, ie, for
/// which `Case::is_ok` return false. In order to check whether validation passed, it
/// is sufficient to assert that this function returns no cases.
std::vector<Case> get_errors() const
{
std::vector<Case> errors;
std::copy_if(reports_.begin(),
reports_.end(),
std::back_inserter(errors),
[](const auto& report) { return !report.is_ok(); });
return errors;
}
/// @brief Compare two tensors and record the results in the report.
///
/// This is the main function used to compare two tensors. The results of this
/// comparison, including any supplemental information, is recorded into the report.
///
/// @returns `false` if the comparison failed. If so, the details can be found via
/// `get_errors()`.
///
/// @tparam DT The data type of the tensors to check.
/// @tparam RANK The rank (number of spatial dimensions) of the tensor to check.
///
/// @param tensor_name The name of the tensors to check. This should be a value by which
/// whoever is debugging the associated test later can easily find out which of the
/// outputs of a device operation was incorrect.
/// @param descriptor The descriptor (memory layout) of the tensor.
/// @param actual The device buffer with the values of the tensor to-be-tested, ie, the
/// results of the device operation.
/// @param expected The device buffer with the values of the reference tensor. These are
/// treated as a "golden standard", and should usually be generated by a reference
/// implementation.
/// @param rtol The relative acceptable tolerance between two values.
/// @param atol The absolute acceptable tolerance between two values.
template <DataType DT, size_t RANK>
bool check(std::string_view tensor_name,
const TensorDescriptor<DT, RANK>& descriptor,
const void* actual,
const void* expected,
double rtol = 1e-3,
double atol = 1e-3);
private:
std::vector<Case> reports_;
};
template <DataType DT, size_t RANK>
bool ValidationReport::check(std::string_view tensor_name,
const TensorDescriptor<DT, RANK>& descriptor,
const void* actual_data,
const void* expected_data,
double rtol,
double atol)
{
const auto strides = descriptor.get_strides();
// During development and CI, only the kernels that were changed would fail, and so we can
// assume that the average case does not have errors. Therefore, split out testing into a
// quick test which just counts the incorrect elements, and a more in-depth test that also
// returns the indices of the incorrect items.
// Initial pass: count errors
// Allocate and reset counter
auto d_counters = alloc_buffer(sizeof(uint64_t) * 2);
check_hip(hipMemset(d_counters.get(), 0, sizeof(uint64_t) * 2));
auto d_error_count = &reinterpret_cast<uint64_t*>(d_counters.get())[0];
auto d_zero_count = &reinterpret_cast<uint64_t*>(d_counters.get())[1];
tensor_foreach(descriptor.get_lengths(), [=](auto index) {
using CKType = typename factory::internal::DataTypeToCK<DT>::type;
const auto* actual = static_cast<const CKType*>(actual_data);
const auto* expected = static_cast<const CKType*>(expected_data);
static_assert(!std::is_same_v<CKType, double>,
"TODO implement compare_kernel() for double");
const auto offset = calculate_offset(index, strides);
const auto a = actual[offset];
const auto b = expected[offset];
const auto o = static_cast<double>(type_convert<float>(a));
const auto r = static_cast<double>(type_convert<float>(b));
const auto err = std::abs(o - r);
if(err > atol + rtol * std::abs(r) || !std::isfinite(o) || !std::isfinite(r))
{
// We expect the number of errors to be very low, so just use an atomic
// for now.
atomicAdd(d_error_count, 1);
}
// Now compare the numbers as bitwise too.
// Update the counter if they're both zero.
using Bytes = std::array<std::byte, sizeof(CKType)>;
bool all_zero = true;
for(auto x : std::bit_cast<Bytes>(a))
{
if(x != std::byte{0})
all_zero = false;
}
for(auto x : std::bit_cast<Bytes>(b))
{
if(x != std::byte{0})
all_zero = false;
}
if(all_zero)
{
atomicAdd(d_zero_count, 1);
}
});
uint64_t error_count = 0;
check_hip(hipMemcpy(&error_count, d_error_count, sizeof(uint64_t), hipMemcpyDeviceToHost));
uint64_t zero_count = 0;
check_hip(hipMemcpy(&zero_count, d_zero_count, sizeof(uint64_t), hipMemcpyDeviceToHost));
// TODO: Gather detailed coordinates.
reports_.push_back(Case{
.tensor_name = std::string(tensor_name),
.wrong_elements = error_count,
.total_elements = descriptor.get_element_size(),
.zero_elements = zero_count,
});
return reports_.back().is_ok();
}
} // namespace ck_tile::builder::test