mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-04-20 06:49:15 +00:00
Standalone sweep once softmax kernel w/ ckProfiler (#295)
* use 'sweep once' softmax kernel where applicable * threadwise copy's dst buffer can specify invalid element value * add int8 in/out float compute softmax support give a bit of leeway for int absolute tolerance as there's a single data point of all test cases showing off-by-1 error * format * softmax inherits DeviceNormalization * softmax profiler stub * tighten up reference softmax interface * example prints tensor dimension * add fp32 to softmax profiler * rename header * hook with ckProfiler * format * resolve merge conflict * resolve merge conflicts * update normalization profiler help string * resolve conflict * typo * remove residual * softmax profiler: address feedback * test for mixed precision input/output * fully qualify ck::math::isnan * add comment for device normalization interface * revise wording * constness for alpha/beta scaler pointer
This commit is contained in:
@@ -2,7 +2,10 @@ add_custom_target(test_softmax)
|
||||
|
||||
add_gtest_executable(test_softmax_fp32 test_softmax_fp32.cpp)
|
||||
add_gtest_executable(test_softmax_fp16 test_softmax_fp16.cpp)
|
||||
add_gtest_executable(test_softmax_int8 test_softmax_int8.cpp)
|
||||
target_link_libraries(test_softmax_fp32 PRIVATE host_tensor)
|
||||
target_link_libraries(test_softmax_fp16 PRIVATE host_tensor)
|
||||
target_link_libraries(test_softmax_int8 PRIVATE host_tensor)
|
||||
add_dependencies(test_softmax test_softmax_fp32)
|
||||
add_dependencies(test_softmax test_softmax_fp16)
|
||||
add_dependencies(test_softmax test_softmax_fp16)
|
||||
add_dependencies(test_softmax test_softmax_int8)
|
||||
@@ -15,14 +15,19 @@ class TestSoftmaxFP16 : public ck::TestSoftmax<Tuple>
|
||||
// clang-format off
|
||||
using KernelTypes = ::testing::Types<
|
||||
// InDataType, AccDataType, OutDataType, Rank, NumReduceDim, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, InSrcVectorDim, InSrcVectorSize, OutDstVectorSize>
|
||||
std::tuple<ck::half_t, float, float, I<3>, I<1>, I<256>, I<8>, I<32>, I<1>, I<8>, I<1>, I<8>, I<4>>, // mixed precision
|
||||
std::tuple<ck::half_t, float, ck::half_t, I<3>, I<1>, I<256>, I<8>, I<32>, I<1>, I<8>, I<1>, I<8>, I<8>>,
|
||||
std::tuple<ck::half_t, float, ck::half_t, I<3>, I<1>, I<256>, I<4>, I<64>, I<1>, I<8>, I<1>, I<8>, I<8>>,
|
||||
std::tuple<ck::half_t, float, ck::half_t, I<3>, I<1>, I<256>, I<2>, I<128>, I<1>, I<8>, I<1>, I<8>, I<8>>,
|
||||
std::tuple<ck::half_t, float, ck::half_t, I<3>, I<1>, I<256>, I<1>, I<256>, I<1>, I<8>, I<1>, I<8>, I<8>>,
|
||||
std::tuple<ck::half_t, float, ck::half_t, I<3>, I<1>, I<256>, I<1>, I<256>, I<1>, I<16>, I<1>, I<8>, I<8>>,
|
||||
std::tuple<ck::half_t, float, ck::half_t, I<3>, I<1>, I<256>, I<1>, I<256>, I<1>, I<32>, I<1>, I<8>, I<8>>,
|
||||
std::tuple<ck::half_t, float, ck::half_t, I<3>, I<2>, I<256>, I<8>, I<32>, I<1>, I<8>, I<1>, I<8>, I<8>>,
|
||||
std::tuple<ck::half_t, float, ck::half_t, I<3>, I<2>, I<256>, I<4>, I<64>, I<1>, I<8>, I<1>, I<8>, I<8>>,
|
||||
std::tuple<ck::half_t, float, ck::half_t, I<3>, I<2>, I<256>, I<2>, I<128>, I<1>, I<8>, I<1>, I<8>, I<8>>,
|
||||
std::tuple<ck::half_t, float, ck::half_t, I<3>, I<2>, I<256>, I<1>, I<256>, I<1>, I<8>, I<1>, I<8>, I<8>>
|
||||
std::tuple<ck::half_t, float, ck::half_t, I<3>, I<2>, I<256>, I<1>, I<256>, I<1>, I<8>, I<1>, I<8>, I<8>>,
|
||||
std::tuple<ck::half_t, float, ck::half_t, I<3>, I<2>, I<256>, I<1>, I<256>, I<1>, I<16>, I<1>, I<8>, I<8>>,
|
||||
std::tuple<ck::half_t, float, ck::half_t, I<3>, I<2>, I<256>, I<1>, I<256>, I<1>, I<32>, I<1>, I<8>, I<8>>
|
||||
>;
|
||||
// clang-format on
|
||||
TYPED_TEST_SUITE(TestSoftmaxFP16, KernelTypes);
|
||||
|
||||
@@ -15,14 +15,19 @@ class TestSoftmaxFP32 : public ck::TestSoftmax<Tuple>
|
||||
// clang-format off
|
||||
using KernelTypes = ::testing::Types<
|
||||
// InDataType, AccDataType, OutDataType, Rank, NumReduceDim, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, InSrcVectorDim, InSrcVectorSize, OutDstVectorSize>
|
||||
std::tuple<float, float, ck::half_t, I<3>, I<2>, I<256>, I<1>, I<256>, I<1>, I<8>, I<1>, I<4>, I<8>>, // mixed precision
|
||||
std::tuple<float, float, float, I<3>, I<1>, I<256>, I<8>, I<32>, I<1>, I<4>, I<1>, I<4>, I<4>>,
|
||||
std::tuple<float, float, float, I<3>, I<1>, I<256>, I<4>, I<64>, I<1>, I<4>, I<1>, I<4>, I<4>>,
|
||||
std::tuple<float, float, float, I<3>, I<1>, I<256>, I<2>, I<128>, I<1>, I<4>, I<1>, I<4>, I<4>>,
|
||||
std::tuple<float, float, float, I<3>, I<1>, I<256>, I<1>, I<256>, I<1>, I<4>, I<1>, I<4>, I<4>>,
|
||||
std::tuple<float, float, float, I<3>, I<1>, I<256>, I<1>, I<256>, I<1>, I<8>, I<1>, I<4>, I<4>>,
|
||||
std::tuple<float, float, float, I<3>, I<1>, I<256>, I<1>, I<256>, I<1>, I<16>, I<1>, I<4>, I<4>>,
|
||||
std::tuple<float, float, float, I<3>, I<2>, I<256>, I<8>, I<32>, I<1>, I<4>, I<1>, I<4>, I<4>>,
|
||||
std::tuple<float, float, float, I<3>, I<2>, I<256>, I<4>, I<64>, I<1>, I<4>, I<1>, I<4>, I<4>>,
|
||||
std::tuple<float, float, float, I<3>, I<2>, I<256>, I<2>, I<128>, I<1>, I<4>, I<1>, I<4>, I<4>>,
|
||||
std::tuple<float, float, float, I<3>, I<2>, I<256>, I<1>, I<256>, I<1>, I<4>, I<1>, I<4>, I<4>>
|
||||
std::tuple<float, float, float, I<3>, I<2>, I<256>, I<1>, I<256>, I<1>, I<4>, I<1>, I<4>, I<4>>,
|
||||
std::tuple<float, float, float, I<3>, I<2>, I<256>, I<1>, I<256>, I<1>, I<8>, I<1>, I<4>, I<4>>,
|
||||
std::tuple<float, float, float, I<3>, I<2>, I<256>, I<1>, I<256>, I<1>, I<16>, I<1>, I<4>, I<4>>
|
||||
>;
|
||||
// clang-format on
|
||||
TYPED_TEST_SUITE(TestSoftmaxFP32, KernelTypes);
|
||||
|
||||
30
test/softmax/test_softmax_int8.cpp
Normal file
30
test/softmax/test_softmax_int8.cpp
Normal file
@@ -0,0 +1,30 @@
|
||||
#include "gtest/gtest.h"
|
||||
#include "test_softmax_util.hpp"
|
||||
|
||||
template <ck::index_t N>
|
||||
using I = ck::Number<N>;
|
||||
|
||||
template <typename Tuple>
|
||||
class TestSoftmaxINT8 : public ck::TestSoftmax<Tuple>
|
||||
{
|
||||
};
|
||||
|
||||
// clang-format off
|
||||
using KernelTypes = ::testing::Types<
|
||||
// InDataType, AccDataType, OutDataType, Rank, NumReduceDim, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, InSrcVectorDim, InSrcVectorSize, OutDstVectorSize>
|
||||
std::tuple<int8_t, float, int8_t, I<3>, I<1>, I<256>, I<8>, I<32>, I<1>, I<16>, I<1>, I<16>, I<16>>,
|
||||
std::tuple<int8_t, float, int8_t, I<3>, I<1>, I<256>, I<4>, I<64>, I<1>, I<16>, I<1>, I<16>, I<16>>,
|
||||
std::tuple<int8_t, float, int8_t, I<3>, I<1>, I<256>, I<2>, I<128>, I<1>, I<16>, I<1>, I<16>, I<16>>,
|
||||
std::tuple<int8_t, float, int8_t, I<3>, I<1>, I<256>, I<1>, I<256>, I<1>, I<16>, I<1>, I<16>, I<16>>,
|
||||
std::tuple<int8_t, float, int8_t, I<3>, I<1>, I<256>, I<1>, I<256>, I<1>, I<32>, I<1>, I<16>, I<16>>,
|
||||
std::tuple<int8_t, float, int8_t, I<3>, I<1>, I<256>, I<1>, I<256>, I<1>, I<64>, I<1>, I<16>, I<16>>,
|
||||
std::tuple<int8_t, float, int8_t, I<3>, I<2>, I<256>, I<8>, I<32>, I<1>, I<16>, I<1>, I<16>, I<16>>,
|
||||
std::tuple<int8_t, float, int8_t, I<3>, I<2>, I<256>, I<4>, I<64>, I<1>, I<16>, I<1>, I<16>, I<16>>,
|
||||
std::tuple<int8_t, float, int8_t, I<3>, I<2>, I<256>, I<2>, I<128>, I<1>, I<16>, I<1>, I<16>, I<16>>,
|
||||
std::tuple<int8_t, float, int8_t, I<3>, I<2>, I<256>, I<1>, I<256>, I<1>, I<16>, I<1>, I<16>, I<16>>,
|
||||
std::tuple<int8_t, float, int8_t, I<3>, I<2>, I<256>, I<1>, I<256>, I<1>, I<32>, I<1>, I<16>, I<16>>,
|
||||
std::tuple<int8_t, float, int8_t, I<3>, I<2>, I<256>, I<1>, I<256>, I<1>, I<64>, I<1>, I<16>, I<16>>
|
||||
>;
|
||||
// clang-format on
|
||||
TYPED_TEST_SUITE(TestSoftmaxINT8, KernelTypes);
|
||||
TYPED_TEST(TestSoftmaxINT8, Test_INT8) { this->Run(); }
|
||||
@@ -1,6 +1,8 @@
|
||||
// SPDX-License-Identifier: MIT
|
||||
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <vector>
|
||||
#include <iostream>
|
||||
#include <gtest/gtest.h>
|
||||
@@ -16,6 +18,18 @@
|
||||
|
||||
namespace ck {
|
||||
|
||||
template <typename Range>
|
||||
std::string serialize_range(const Range& range)
|
||||
{
|
||||
std::stringstream ss;
|
||||
for(auto& r : range)
|
||||
{
|
||||
ss << r << ", ";
|
||||
}
|
||||
std::string str = ss.str();
|
||||
return std::string(str.begin(), str.end() - 2);
|
||||
}
|
||||
|
||||
template <typename Tuple>
|
||||
class TestSoftmax : public ::testing::Test
|
||||
{
|
||||
@@ -80,23 +94,43 @@ class TestSoftmax : public ::testing::Test
|
||||
auto argument_ptr = device_instance.MakeArgumentPointer(i_in_lengths,
|
||||
i_in_strides,
|
||||
reduce_dims,
|
||||
alpha,
|
||||
beta,
|
||||
&alpha,
|
||||
&beta,
|
||||
in_dev.GetDeviceBuffer(),
|
||||
out_dev.GetDeviceBuffer());
|
||||
|
||||
if(!device_instance.IsSupportedArgument(argument_ptr.get()))
|
||||
{
|
||||
FAIL() << "Unsupported argument";
|
||||
// std::cout << "Skipped due to unsupported argument: "
|
||||
// << "input lengths = [" << serialize_range(in_length) << "], "
|
||||
// << "scaler = [" << alpha << ", " << beta << "]." << std::endl;
|
||||
return;
|
||||
}
|
||||
|
||||
auto invoker_ptr = device_instance.MakeInvokerPointer();
|
||||
invoker_ptr->Run(argument_ptr.get());
|
||||
|
||||
ref_instance_invoker_.Run({in, out_ref, alpha, beta, Rank, reduce_dims});
|
||||
ref_instance_invoker_.Run({in, out_ref, alpha, beta, reduce_dims});
|
||||
|
||||
out_dev.FromDevice(out.mData.data());
|
||||
EXPECT_TRUE(ck::utils::check_err(out.mData, out_ref.mData));
|
||||
|
||||
bool pass;
|
||||
|
||||
if(std::is_same<InDataType, int8_t>::value)
|
||||
{
|
||||
EXPECT_TRUE(pass = ck::utils::check_err(
|
||||
out.mData, out_ref.mData, "Error: Incorrect results!", 0, 1));
|
||||
}
|
||||
else
|
||||
{
|
||||
EXPECT_TRUE(pass = ck::utils::check_err(out.mData, out_ref.mData));
|
||||
}
|
||||
|
||||
if(!pass)
|
||||
{
|
||||
FAIL() << "Failure in input lengths = [" << serialize_range(in_length) << "], "
|
||||
<< "scaler = [" << alpha << ", " << beta << "].";
|
||||
}
|
||||
}
|
||||
|
||||
void Run()
|
||||
@@ -105,13 +139,14 @@ class TestSoftmax : public ::testing::Test
|
||||
{
|
||||
for(auto scale : this->scales_)
|
||||
{
|
||||
this->RunSingle(in_length, std::get<0>(scale), std::get<1>(scale));
|
||||
this->RunSingle(in_length, scale[0], scale[1]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<std::vector<index_t>> in_lengths_ = {{1, 8, 128}, {2, 128, 1024}, {3, 9, 1032}};
|
||||
std::vector<std::tuple<AccDataType, AccDataType>> scales_ = {{1, 0}, {2, 2}, {0, 1}};
|
||||
std::vector<std::vector<index_t>> in_lengths_ = {
|
||||
{1, 8, 128}, {2, 128, 1024}, {3, 9, 1032}, {4, 4, 2048}, {8, 1, 8192}};
|
||||
std::vector<std::vector<AccDataType>> scales_ = {{1, 0}, {1, 1}, {0, 1}, {2, 2}};
|
||||
|
||||
typename ReferenceInstance::Invoker ref_instance_invoker_;
|
||||
};
|
||||
|
||||
Reference in New Issue
Block a user