mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-17 19:40:04 +00:00
[CK_TILE] Support for elementwise kernel (#2246)
* Elementwise kernel implementation
Co-authored-by: Sami Aario <samaario@amd.com>
Co-authored-by: Mohsen Saffari <mohsen.saffari@amd.com>
Co-authored-by: yashagar <yashagar@amd.com>
* Elementwise with generalized nDims
* Adding the n-ary input tensor feature
* Generalize dimensions on top of inputs
* Add TFLOPS + remove std usage for tuples
* 1D basecase optimization
* Cleanup code + refactoring to a common interface
* Generalize to unary and add an example
* Cleanup, refactoring and commenting
* Suggestions for LWPCK-3170: elementwise kernel improvements
* Clang-format: remod.py
* Replace InputTensorType with XDataType as the type of input_tensors
* Add Tuple::apply and use it in ElementWiseKernel::operator to call operation with the exact number of arguments in xs
* Move examples to folder 19_elementwise
* Add missing copyright headers and fix some existing ones
* Replace an assert with throw std::runtime_error in elementwise example
* Avoid reading the output by using make_static_distributed_tensor for y_tile
* Removed two unused includes
* No need to move windows to the next block when each workgroup processes a single tile
* Only copy input tensors to the device
* Use get_warp_size to obtain warp size, and use ceiling division for grid size also for the unary example
* Adding output strides to the kernel, transposition example and update the other examples
* Changes made by remod.py
* Use default template parameter values for memory operation and coherence in a call to make_naive_tensor_view
* Move binary operations to include/ck_tile/ops/elementwise/binary_elementwise_operation.hpp
* Reuse generic reference binary/unary operation in examples + refactoring the transpose reference
* Fix comments in elementwise_example.cpp
- Refer to AMD terminology except when suggesting NVIDIA alternatives in parentheses
- ElementWiseTraits was renamed to ElementWiseShape
- Adopt suggestions made by Copilot when prompted to check for factual or typographical errors
* Simplify CMakeLists.txt and remove the unused variables this uncovers
* Rename a file and fix some copyright statements
* Changes made by script/clang-format-overwrite.sh
* Add basic unit test for ElementWiseKernel
* Remove left-over uninformative comment in apply unit test
* Changes made by clang-format-overwrite.sh
* fixup! Use default template parameter values for memory operation and coherence in a call to make_naive_tensor_view
* Clean up test_tuple_apply.cpp and test_elementwise_1d.cpp
* Use make_uniform_array_with_factory to define h_xs and d_xs_mems_owner as type std::array
* Use a DeviceMem constructor that calls get_element_space_size_in_bytes internally
* Move examples to folder 20_elementwise
* Reduced register pressure on the CK tile elementwise kernel + add 4d input example to be able benchmark against old CK
* Fix CLang formating
* Bump up the elementwise example folder number
* Elementwise: add padding + minor cleanup
* Add Vector Size inference + fix issue with wrong vectorization due to missing GuaranteedLastDimensionVectorStride setting in make_naive_tensor_view
* Add isSupportedArg to Elementwise kernel + addapt example and unit tests
* Fix clang-format on the unit test file
---------
Co-authored-by: Damien Lejeune <damien.lejeune@amd.com>
Co-authored-by: Sami Aario <samaario@amd.com>
Co-authored-by: Mohsen Saffari <mohsen.saffari@amd.com>
Co-authored-by: Aviral Goel <aviral.goel@amd.com>
[ROCm/composable_kernel commit: 606b0cc947]
This commit is contained in:
committed by
GitHub
parent
bdb86fee78
commit
094e5bad50
@@ -5,6 +5,8 @@ add_subdirectory(batched_gemm)
|
||||
add_subdirectory(grouped_gemm)
|
||||
add_subdirectory(gemm_multi_d)
|
||||
add_subdirectory(data_type)
|
||||
add_subdirectory(container)
|
||||
add_subdirectory(elementwise)
|
||||
# Not including these tests as there is a bug on gfx90a and gfx942
|
||||
# resulting in "GPU core dump"
|
||||
#add_subdirectory(moe_smoothquant)
|
||||
|
||||
6
test/ck_tile/container/CMakeLists.txt
Normal file
6
test/ck_tile/container/CMakeLists.txt
Normal file
@@ -0,0 +1,6 @@
|
||||
if(GPU_TARGETS MATCHES "gfx9")
|
||||
add_gtest_executable(test_ck_tile_tuple_apply test_tuple_apply.cpp)
|
||||
if(result EQUAL 0)
|
||||
target_link_libraries(test_ck_tile_tuple_apply PRIVATE utility)
|
||||
endif()
|
||||
endif()
|
||||
223
test/ck_tile/container/test_tuple_apply.cpp
Normal file
223
test/ck_tile/container/test_tuple_apply.cpp
Normal file
@@ -0,0 +1,223 @@
|
||||
// SPDX-License-Identifier: MIT
|
||||
// Copyright (c) 2025, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
#include "ck_tile/core.hpp"
|
||||
|
||||
using namespace ck_tile;
|
||||
|
||||
class TestCkTileTupleApply : public ::testing::Test
|
||||
{
|
||||
public:
|
||||
// Test functors for different scenarios
|
||||
struct AddFunction
|
||||
{
|
||||
template <typename... Args>
|
||||
CK_TILE_HOST_DEVICE constexpr auto operator()(Args... args) const
|
||||
{
|
||||
return (args + ...);
|
||||
}
|
||||
};
|
||||
|
||||
struct MultiplyFunction
|
||||
{
|
||||
template <typename... Args>
|
||||
CK_TILE_HOST_DEVICE constexpr auto operator()(Args... args) const
|
||||
{
|
||||
return (args * ...);
|
||||
}
|
||||
};
|
||||
|
||||
struct MaxFunction
|
||||
{
|
||||
template <typename T>
|
||||
CK_TILE_HOST_DEVICE constexpr T operator()(T a) const
|
||||
{
|
||||
return a;
|
||||
}
|
||||
|
||||
template <typename T, typename... Args>
|
||||
CK_TILE_HOST_DEVICE constexpr T operator()(T a, Args... args) const
|
||||
{
|
||||
auto rest_max = operator()(args...);
|
||||
return a > rest_max ? a : rest_max;
|
||||
}
|
||||
};
|
||||
|
||||
struct ReturnTupleFunction
|
||||
{
|
||||
template <typename... Args>
|
||||
CK_TILE_HOST_DEVICE constexpr auto operator()(Args... args) const
|
||||
{
|
||||
return make_tuple(args..., sizeof...(args));
|
||||
}
|
||||
};
|
||||
};
|
||||
|
||||
TEST_F(TestCkTileTupleApply, BasicArithmetic)
|
||||
{
|
||||
// Test with simple arithmetic operations
|
||||
auto t1 = make_tuple(1, 2, 3);
|
||||
auto result1 = apply(AddFunction{}, t1);
|
||||
EXPECT_EQ(result1, 6);
|
||||
|
||||
auto t2 = make_tuple(2, 3, 4, 5);
|
||||
auto result2 = apply(MultiplyFunction{}, t2);
|
||||
EXPECT_EQ(result2, 120);
|
||||
}
|
||||
|
||||
TEST_F(TestCkTileTupleApply, SingleElement)
|
||||
{
|
||||
// Test with single element tuple
|
||||
auto t1 = make_tuple(42);
|
||||
auto result1 = apply(AddFunction{}, t1);
|
||||
EXPECT_EQ(result1, 42);
|
||||
|
||||
auto result2 = apply(MultiplyFunction{}, t1);
|
||||
EXPECT_EQ(result2, 42);
|
||||
}
|
||||
|
||||
TEST_F(TestCkTileTupleApply, EmptyTuple)
|
||||
{
|
||||
// Test with empty tuple
|
||||
auto t = tuple<>{};
|
||||
auto result = apply([]() { return 100; }, t);
|
||||
EXPECT_EQ(result, 100);
|
||||
}
|
||||
|
||||
TEST_F(TestCkTileTupleApply, DifferentTypes)
|
||||
{
|
||||
// Test with different data types
|
||||
auto t1 = make_tuple(1, 2.5f, 3.0);
|
||||
auto result1 = apply(AddFunction{}, t1);
|
||||
EXPECT_FLOAT_EQ(result1, 6.5f);
|
||||
|
||||
// Test with mixed integer and floating point
|
||||
auto t2 = make_tuple(10, 0.5f);
|
||||
auto result2 = apply(MultiplyFunction{}, t2);
|
||||
EXPECT_FLOAT_EQ(result2, 5.0f);
|
||||
}
|
||||
|
||||
TEST_F(TestCkTileTupleApply, ReturnTuple)
|
||||
{
|
||||
// Test function that returns a tuple
|
||||
auto t = make_tuple(1, 2, 3);
|
||||
auto result = apply(ReturnTupleFunction{}, t);
|
||||
|
||||
EXPECT_EQ(result.get<0>(), 1);
|
||||
EXPECT_EQ(result.get<1>(), 2);
|
||||
EXPECT_EQ(result.get<2>(), 3);
|
||||
EXPECT_EQ(result.get<3>(), 3); // size
|
||||
}
|
||||
|
||||
TEST_F(TestCkTileTupleApply, LambdaFunction)
|
||||
{
|
||||
// Test with lambda functions
|
||||
auto t1 = make_tuple(5, 10, 15);
|
||||
auto result1 = apply([](auto a, auto b, auto c) { return a + b + c; }, t1);
|
||||
EXPECT_EQ(result1, 30);
|
||||
|
||||
// Test lambda with capture
|
||||
int multiplier = 2;
|
||||
auto result2 =
|
||||
apply([multiplier](auto a, auto b) { return (a + b) * multiplier; }, make_tuple(3, 7));
|
||||
EXPECT_EQ(result2, 20);
|
||||
}
|
||||
|
||||
TEST_F(TestCkTileTupleApply, ConstexprContext)
|
||||
{
|
||||
// Test in constexpr context
|
||||
constexpr auto t = make_tuple(2, 3, 4);
|
||||
constexpr auto result = apply(MultiplyFunction{}, t);
|
||||
static_assert(result == 24, "Constexpr apply should work");
|
||||
EXPECT_EQ(result, 24);
|
||||
}
|
||||
|
||||
TEST_F(TestCkTileTupleApply, ReferenceTypes)
|
||||
{
|
||||
// Test with reference types using tie
|
||||
int a = 1, b = 2, c = 3;
|
||||
auto ref_tuple = tie(a, b, c);
|
||||
|
||||
// Function that modifies references
|
||||
apply(
|
||||
[](auto& x, auto& y, auto& z) {
|
||||
x += 10;
|
||||
y += 20;
|
||||
z += 30;
|
||||
},
|
||||
ref_tuple);
|
||||
|
||||
EXPECT_EQ(a, 11);
|
||||
EXPECT_EQ(b, 22);
|
||||
EXPECT_EQ(c, 33);
|
||||
}
|
||||
|
||||
TEST_F(TestCkTileTupleApply, MoveSemantics)
|
||||
{
|
||||
// Test with move semantics
|
||||
auto t = make_tuple(1, 2, 3);
|
||||
auto result = apply(AddFunction{}, std::move(t));
|
||||
EXPECT_EQ(result, 6);
|
||||
}
|
||||
|
||||
TEST_F(TestCkTileTupleApply, NumberTypes)
|
||||
{
|
||||
// Test with ck_tile::number types
|
||||
auto t = make_tuple(number<1>{}, number<2>{}, number<3>{});
|
||||
auto result = apply([](auto a, auto b, auto c) { return a + b + c; }, t);
|
||||
EXPECT_EQ(result, 6);
|
||||
}
|
||||
|
||||
TEST_F(TestCkTileTupleApply, ElementwiseOperation)
|
||||
{
|
||||
// Test simulating elementwise operations
|
||||
auto input1 = make_tuple(1.0f, 2.0f, 3.0f);
|
||||
auto input2 = make_tuple(4.0f, 5.0f, 6.0f);
|
||||
|
||||
auto add_elementwise = [](const auto& a, const auto& b) {
|
||||
return apply(
|
||||
[&b](auto... args_a) {
|
||||
return apply(
|
||||
[args_a...](auto... args_b) { return make_tuple((args_a + args_b)...); }, b);
|
||||
},
|
||||
a);
|
||||
};
|
||||
|
||||
auto result = add_elementwise(input1, input2);
|
||||
|
||||
EXPECT_FLOAT_EQ(result.get<0>(), 5.0f);
|
||||
EXPECT_FLOAT_EQ(result.get<1>(), 7.0f);
|
||||
EXPECT_FLOAT_EQ(result.get<2>(), 9.0f);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
class TestCkTileTupleApplySize : public TestCkTileTupleApply
|
||||
{
|
||||
protected:
|
||||
static constexpr int Size = T::value;
|
||||
};
|
||||
|
||||
using TupleSizes = ::testing::Types<std::integral_constant<int, 1>,
|
||||
std::integral_constant<int, 2>,
|
||||
std::integral_constant<int, 3>,
|
||||
std::integral_constant<int, 4>,
|
||||
std::integral_constant<int, 8>,
|
||||
std::integral_constant<int, 16>>;
|
||||
|
||||
TYPED_TEST_SUITE(TestCkTileTupleApplySize, TupleSizes);
|
||||
|
||||
TYPED_TEST(TestCkTileTupleApplySize, GeneratedTupleSum)
|
||||
{
|
||||
constexpr int N = TypeParam::value;
|
||||
|
||||
// Generate tuple with values 1, 2, 3, ..., N
|
||||
constexpr auto t = generate_tuple([](auto i) { return i.value + 1; }, number<N>{});
|
||||
|
||||
// Sum all elements
|
||||
constexpr auto result = apply(TestCkTileTupleApply::AddFunction{}, t);
|
||||
|
||||
// Expected sum: 1 + 2 + ... + N = N*(N+1)/2
|
||||
constexpr int expected = N * (N + 1) / 2;
|
||||
static_assert(result == expected);
|
||||
}
|
||||
6
test/ck_tile/elementwise/CMakeLists.txt
Normal file
6
test/ck_tile/elementwise/CMakeLists.txt
Normal file
@@ -0,0 +1,6 @@
|
||||
if(GPU_TARGETS MATCHES "gfx9")
|
||||
add_gtest_executable(test_ck_tile_elementwise_1d test_elementwise_1d.cpp)
|
||||
if(result EQUAL 0)
|
||||
target_link_libraries(test_ck_tile_elementwise_1d PRIVATE utility)
|
||||
endif()
|
||||
endif()
|
||||
216
test/ck_tile/elementwise/test_elementwise_1d.cpp
Normal file
216
test/ck_tile/elementwise/test_elementwise_1d.cpp
Normal file
@@ -0,0 +1,216 @@
|
||||
// SPDX-License-Identifier: MIT
|
||||
// Copyright (c) 2025, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
#include <vector>
|
||||
#include <cmath> // For std::abs
|
||||
#include <tuple>
|
||||
#include <type_traits> // For std::is_same_v, std::is_floating_point_v
|
||||
#include <utility> // For std::index_sequence, std::forward
|
||||
|
||||
#include "ck_tile/core.hpp"
|
||||
#include "ck_tile/host.hpp"
|
||||
#include "ck_tile/host/kernel_launch.hpp"
|
||||
#include "ck_tile/ops/elementwise/kernel/elementwise_kernel.hpp"
|
||||
#include "ck_tile/ops/elementwise/pipeline/elementwise_pipeline_problem.hpp"
|
||||
#include "ck_tile/ops/elementwise/pipeline/elementwise_pipeline_default_policy.hpp"
|
||||
#include "ck_tile/ops/elementwise/pipeline/elementwise_shape.hpp"
|
||||
#include "ck_tile/ops/elementwise/binary_elementwise_operation.hpp"
|
||||
#include "ck_tile/ops/elementwise/unary_element_wise_operation.hpp"
|
||||
|
||||
// Traits to get number of inputs for an elementwise operation
|
||||
template <typename Op>
|
||||
struct elementwise_op_traits;
|
||||
|
||||
template <>
|
||||
struct elementwise_op_traits<ck_tile::element_wise::Add>
|
||||
{
|
||||
static constexpr int num_inputs = 2;
|
||||
};
|
||||
template <>
|
||||
struct elementwise_op_traits<ck_tile::element_wise::Relu>
|
||||
{
|
||||
static constexpr int num_inputs = 1;
|
||||
};
|
||||
|
||||
template <std::size_t D, typename F>
|
||||
auto make_uniform_array_with_factory(F&& factory)
|
||||
{
|
||||
return [&]<std::size_t... Is>(std::index_sequence<Is...>)
|
||||
{
|
||||
return std::array<std::invoke_result_t<F, std::size_t>, D>{factory(Is)...};
|
||||
}
|
||||
(std::make_index_sequence<D>{});
|
||||
}
|
||||
|
||||
template <typename Tuple>
|
||||
class TestCkTileElementwise : public ::testing::Test
|
||||
{
|
||||
protected:
|
||||
using XDataType = std::tuple_element_t<0, Tuple>;
|
||||
using YDataType = std::tuple_element_t<1, Tuple>;
|
||||
using ComputeDataType = std::tuple_element_t<2, Tuple>;
|
||||
using ElementwiseOpType = std::tuple_element_t<3, Tuple>;
|
||||
using BlockWarps_ = std::tuple_element_t<4, Tuple>;
|
||||
using BlockTile_ = std::tuple_element_t<5, Tuple>;
|
||||
using WarpTile_ = std::tuple_element_t<6, Tuple>;
|
||||
using TestElementWiseShape =
|
||||
ck_tile::ElementWiseShape<BlockWarps_, BlockTile_, WarpTile_, ComputeDataType>;
|
||||
static constexpr int NumInputs = elementwise_op_traits<ElementwiseOpType>::num_inputs;
|
||||
|
||||
void RunTest(ck_tile::index_t total_m_elements)
|
||||
{
|
||||
// Dims and Strides (1D example)
|
||||
auto lens = ck_tile::make_tuple(total_m_elements);
|
||||
auto strides = ck_tile::make_tuple(
|
||||
static_cast<ck_tile::index_t>(1)); // Strides for the single dimension
|
||||
|
||||
// Host Tensors
|
||||
auto h_xs = make_uniform_array_with_factory<NumInputs>([&](std::size_t) {
|
||||
auto ret = ck_tile::HostTensor<XDataType>({total_m_elements});
|
||||
ck_tile::FillUniformDistribution<XDataType>{0.f, 5.f}(ret);
|
||||
return ret;
|
||||
});
|
||||
ck_tile::HostTensor<YDataType> h_y({total_m_elements});
|
||||
h_y.SetZero();
|
||||
ck_tile::HostTensor<YDataType> h_y_ref({total_m_elements});
|
||||
h_y_ref.SetZero();
|
||||
|
||||
// Device Buffers
|
||||
auto d_xs_mems_owner = make_uniform_array_with_factory<NumInputs>(
|
||||
[&](std::size_t i) { return ck_tile::DeviceMem(h_xs[i]); });
|
||||
for(int i = 0; i < NumInputs; ++i)
|
||||
{
|
||||
d_xs_mems_owner[i].ToDevice(h_xs[i].data());
|
||||
}
|
||||
|
||||
ck_tile::DeviceMem d_y_mem(h_y);
|
||||
d_y_mem.SetZero();
|
||||
|
||||
auto d_x_ptrs_tuple = [&]<std::size_t... Is>(std::index_sequence<Is...>)
|
||||
{
|
||||
return ck_tile::make_tuple(
|
||||
static_cast<const XDataType*>(d_xs_mems_owner[Is].GetDeviceBuffer())...);
|
||||
}
|
||||
(std::make_index_sequence<NumInputs>{});
|
||||
|
||||
YDataType* p_y_device = static_cast<YDataType*>(d_y_mem.GetDeviceBuffer());
|
||||
|
||||
// Problem and Policy
|
||||
using Problem = ck_tile::ElementWisePipelineProblem<XDataType,
|
||||
ComputeDataType,
|
||||
YDataType,
|
||||
TestElementWiseShape,
|
||||
ElementwiseOpType>;
|
||||
using Policy = ck_tile::ElementWiseDefaultPolicy;
|
||||
|
||||
ck_tile::ElementWiseKernel<Problem, Policy> ew_kernel;
|
||||
|
||||
// Launch configuration
|
||||
ck_tile::index_t grid_size =
|
||||
(total_m_elements + TestElementWiseShape::kBlockM - 1) / TestElementWiseShape::kBlockM;
|
||||
dim3 grid(grid_size, 1, 1);
|
||||
dim3 block(TestElementWiseShape::kBlockSize, 1, 1);
|
||||
constexpr ck_tile::index_t kBlockPerCu = 1;
|
||||
|
||||
ck_tile::stream_config s{nullptr, false, 0}; // Default stream, no timing, no log
|
||||
|
||||
// Check if the kernel configuration is supported
|
||||
if(!ew_kernel.IsSupportedArgument(lens))
|
||||
{
|
||||
throw std::runtime_error(
|
||||
"The kernel configuration is not supported for the given input size.");
|
||||
}
|
||||
|
||||
ck_tile::launch_kernel(
|
||||
s,
|
||||
ck_tile::make_kernel<TestElementWiseShape::kBlockSize, // MaxThreadPerBlock
|
||||
kBlockPerCu> // MinBlockPerCu
|
||||
(ew_kernel,
|
||||
grid,
|
||||
block,
|
||||
0, // actual shared memory
|
||||
lens,
|
||||
strides, // input strides
|
||||
strides, // output strides
|
||||
d_x_ptrs_tuple,
|
||||
p_y_device));
|
||||
|
||||
d_y_mem.FromDevice(h_y.data());
|
||||
|
||||
// Reference computation on host
|
||||
ElementwiseOpType op_host;
|
||||
for(ck_tile::index_t i = 0; i < total_m_elements; ++i)
|
||||
{
|
||||
auto get_host_op_args = [&]<std::size_t... Is>(std::index_sequence<Is...>)
|
||||
{
|
||||
return ck_tile::make_tuple(static_cast<ComputeDataType>(h_xs[Is](i))...);
|
||||
}
|
||||
(std::make_index_sequence<NumInputs>{});
|
||||
|
||||
YDataType temp_y_val;
|
||||
ck_tile::apply(
|
||||
[&](auto&&... host_input_args) {
|
||||
op_host(temp_y_val,
|
||||
std::forward<decltype(host_input_args)>(host_input_args)...);
|
||||
},
|
||||
get_host_op_args);
|
||||
h_y_ref(i) = temp_y_val;
|
||||
}
|
||||
|
||||
// Check results
|
||||
check_err(h_y, h_y_ref, "Error: Incorrect results!", 1e-5, 1e-5);
|
||||
}
|
||||
};
|
||||
|
||||
// Shape parameters (can be shared or varied per test type)
|
||||
using Shape1_BlockWarps = ck_tile::sequence<1>; // 1D warp arrangement in M
|
||||
using Shape1_BlockTile = ck_tile::sequence<256>; // M-dimension of block tile
|
||||
using Shape1_WarpTile = ck_tile::sequence<64>; // M-dimension of warp tile
|
||||
|
||||
// Test configurations
|
||||
using TestConfig_F32_Add = std::tuple<float,
|
||||
float,
|
||||
float,
|
||||
ck_tile::element_wise::Add,
|
||||
Shape1_BlockWarps,
|
||||
Shape1_BlockTile,
|
||||
Shape1_WarpTile>;
|
||||
|
||||
using TestConfig_F32_Relu = std::tuple<float,
|
||||
float,
|
||||
float,
|
||||
ck_tile::element_wise::Relu,
|
||||
Shape1_BlockWarps,
|
||||
Shape1_BlockTile,
|
||||
Shape1_WarpTile>;
|
||||
|
||||
using TestConfig_F16_Add = std::tuple<ck_tile::half_t,
|
||||
ck_tile::half_t,
|
||||
float, // Compute in float for half
|
||||
ck_tile::element_wise::Add,
|
||||
Shape1_BlockWarps,
|
||||
Shape1_BlockTile,
|
||||
Shape1_WarpTile>;
|
||||
|
||||
using TestTypes = ::testing::Types<TestConfig_F32_Add, TestConfig_F32_Relu, TestConfig_F16_Add>;
|
||||
|
||||
TYPED_TEST_SUITE(TestCkTileElementwise, TestTypes);
|
||||
|
||||
TYPED_TEST(TestCkTileElementwise, RunElementwise_1024) { this->RunTest(1024); }
|
||||
|
||||
TYPED_TEST(TestCkTileElementwise, RunElementwise_513)
|
||||
{
|
||||
EXPECT_THROW((this->RunTest(513)),
|
||||
std::runtime_error); // Test with an input size that's not a multiple of kVectorM
|
||||
}
|
||||
|
||||
TYPED_TEST(TestCkTileElementwise, RunElementwise_516)
|
||||
{
|
||||
this->RunTest(516); // Test with an input size that's not a multiple of blockM
|
||||
}
|
||||
|
||||
TYPED_TEST(TestCkTileElementwise, RunElementwise_Small_32)
|
||||
{
|
||||
this->RunTest(32); // Test with a very small size
|
||||
}
|
||||
Reference in New Issue
Block a user