mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-19 04:19:36 +00:00
Softmax unit-test reduction across all and non innermost dims cases. (#406)
* Add reduction across all dims cases.
* host softmax: handle all reduce
* Test cases when reduced dim is not innermost axis.
* Fix syntax.
* Test non innermost dim for fp32 and int8
* Group test suites wrt NumReduceDim.
* Additionally test failing cases.
* Throw error when Rank or NumReduceDims doesn't match arguments.
* Check reducedDims has correct values
* Move don't reuse DeviceReduceMultiblock IsSupportedArgument method.
Instead implement own. (in fact just get rid of one check to enable
reduction across inner dimensions).
* Reorganize unit tests to better cover use scenarios.
* Test input validation
* Test reduction of inner dimensions with custom op instances.
* Refactor fp32 and int8 unit tests.
* Fix FP32 instance template parameters.
* Add more instances.
* Instances with InSrcVectorDim=0.
* Do not initialize and copy data when arg not supported.
* ckProfiler Softmax use instance factory.
* Refactor device softmax IsSupported.
* Additionally add non-polymorphic api functions
* Split softmax instances into multiple files.
* Fix profiler.
* Reorganize tests to reuse profiler and cover edge cases.
* Clang-format
* I8 Softmax instances along with UT.
* Reuse type alias definitions from instance factory header.
* Clean included headers
* Fix variable names.
* Add missing checks in Argument constructor.
Co-authored-by: Adam Osewski <aosewski@amd.com>
Co-authored-by: Anthony Chang <ac.chang@outlook.com>
[ROCm/composable_kernel commit: 6d8614ee50]
This commit is contained in:
@@ -1,4 +1,26 @@
|
||||
add_instance_library(device_softmax_instance
|
||||
device_softmax_i8_i8_instance.cpp
|
||||
device_softmax_i8_i8_instance_rank3_reduce1.cpp
|
||||
device_softmax_i8_i8_instance_rank3_reduce2.cpp
|
||||
device_softmax_i8_i8_instance_rank3_reduce3.cpp
|
||||
device_softmax_i8_i8_instance_rank4_reduce1.cpp
|
||||
device_softmax_i8_i8_instance_rank4_reduce2.cpp
|
||||
device_softmax_i8_i8_instance_rank4_reduce3.cpp
|
||||
device_softmax_i8_i8_instance_rank4_reduce4.cpp
|
||||
device_softmax_f16_f16_instance.cpp
|
||||
device_softmax_f16_f16_instance_rank3_reduce1.cpp
|
||||
device_softmax_f16_f16_instance_rank3_reduce2.cpp
|
||||
device_softmax_f16_f16_instance_rank3_reduce3.cpp
|
||||
device_softmax_f16_f16_instance_rank4_reduce1.cpp
|
||||
device_softmax_f16_f16_instance_rank4_reduce2.cpp
|
||||
device_softmax_f16_f16_instance_rank4_reduce3.cpp
|
||||
device_softmax_f16_f16_instance_rank4_reduce4.cpp
|
||||
device_softmax_f32_f32_instance.cpp
|
||||
device_softmax_f32_f32_instance_rank3_reduce1.cpp
|
||||
device_softmax_f32_f32_instance_rank3_reduce2.cpp
|
||||
device_softmax_f32_f32_instance_rank3_reduce3.cpp
|
||||
device_softmax_f32_f32_instance_rank4_reduce1.cpp
|
||||
device_softmax_f32_f32_instance_rank4_reduce2.cpp
|
||||
device_softmax_f32_f32_instance_rank4_reduce3.cpp
|
||||
device_softmax_f32_f32_instance_rank4_reduce4.cpp
|
||||
)
|
||||
|
||||
@@ -1,55 +1,37 @@
|
||||
// SPDX-License-Identifier: MIT
|
||||
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
#include <tuple>
|
||||
#include <vector>
|
||||
|
||||
#include "ck/ck.hpp"
|
||||
#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
|
||||
#include "ck/tensor_operation/gpu/device/impl/device_softmax_impl.hpp"
|
||||
#include "ck/tensor_operation/gpu/element/unary_element_wise_operation.hpp"
|
||||
#include "ck/utility/data_type.hpp"
|
||||
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank3_reduce1.hpp"
|
||||
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank3_reduce2.hpp"
|
||||
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank3_reduce3.hpp"
|
||||
|
||||
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank4_reduce1.hpp"
|
||||
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank4_reduce2.hpp"
|
||||
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank4_reduce3.hpp"
|
||||
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank4_reduce4.hpp"
|
||||
|
||||
namespace ck {
|
||||
namespace tensor_operation {
|
||||
namespace device {
|
||||
namespace instance {
|
||||
|
||||
namespace {
|
||||
using F16 = ck::half_t;
|
||||
using F32 = float;
|
||||
using Pass = ck::tensor_operation::element_wise::PassThrough;
|
||||
} // namespace
|
||||
|
||||
template <index_t Rank, index_t Reduce>
|
||||
using device_softmax_f16_f16_instances = std::tuple<
|
||||
// clang-format off
|
||||
// InDataType, AccDataType, OutDataType, InElementwiseOp, AccElementwiseOp, Rank, NumReduceDim, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, InSrcVectorDim, InSrcVectorSize, OutDstVectorSize>
|
||||
DeviceSoftmaxImpl< F16, F32, F16, Pass, Pass, Rank, Reduce, 256, 8, 32, 1, 8, 1, 1, 1>, // fallback kernel
|
||||
DeviceSoftmaxImpl< F16, F32, F16, Pass, Pass, Rank, Reduce, 256, 8, 32, 1, 8, 1, 8, 8>,
|
||||
DeviceSoftmaxImpl< F16, F32, F16, Pass, Pass, Rank, Reduce, 256, 4, 64, 1, 8, 1, 8, 8>,
|
||||
DeviceSoftmaxImpl< F16, F32, F16, Pass, Pass, Rank, Reduce, 256, 2, 128, 1, 8, 1, 8, 8>,
|
||||
DeviceSoftmaxImpl< F16, F32, F16, Pass, Pass, Rank, Reduce, 256, 2, 128, 1, 16, 1, 8, 8>,
|
||||
DeviceSoftmaxImpl< F16, F32, F16, Pass, Pass, Rank, Reduce, 256, 2, 128, 1, 32, 1, 8, 8>,
|
||||
DeviceSoftmaxImpl< F16, F32, F16, Pass, Pass, Rank, Reduce, 256, 1, 256, 1, 8, 1, 8, 8>,
|
||||
DeviceSoftmaxImpl< F16, F32, F16, Pass, Pass, Rank, Reduce, 256, 1, 256, 1, 16, 1, 8, 8>,
|
||||
DeviceSoftmaxImpl< F16, F32, F16, Pass, Pass, Rank, Reduce, 256, 1, 256, 1, 32, 1, 8, 8>
|
||||
// clang-format on
|
||||
>;
|
||||
|
||||
void add_device_softmax_f16_f16_rank3_instances(
|
||||
std::vector<DeviceSoftmaxPtr<F16, F32, F16, Pass, Pass, 3>>& instances)
|
||||
std::vector<DeviceSoftmaxPtr<F16, F32, F16, PassThrough, PassThrough, 3>>& instances)
|
||||
{
|
||||
add_device_operation_instances(instances, device_softmax_f16_f16_instances<3, 1>{});
|
||||
add_device_operation_instances(instances, device_softmax_f16_f16_instances<3, 2>{});
|
||||
add_device_softmax_f16_f16_rank3_reduce1_instances(instances);
|
||||
add_device_softmax_f16_f16_rank3_reduce2_instances(instances);
|
||||
add_device_softmax_f16_f16_rank3_reduce3_instances(instances);
|
||||
}
|
||||
|
||||
void add_device_softmax_f16_f16_rank4_instances(
|
||||
std::vector<DeviceSoftmaxPtr<F16, F32, F16, Pass, Pass, 4>>& instances)
|
||||
std::vector<DeviceSoftmaxPtr<F16, F32, F16, PassThrough, PassThrough, 4>>& instances)
|
||||
{
|
||||
add_device_operation_instances(instances, device_softmax_f16_f16_instances<4, 1>{});
|
||||
add_device_operation_instances(instances, device_softmax_f16_f16_instances<4, 2>{});
|
||||
add_device_operation_instances(instances, device_softmax_f16_f16_instances<4, 3>{});
|
||||
add_device_softmax_f16_f16_rank4_reduce1_instances(instances);
|
||||
add_device_softmax_f16_f16_rank4_reduce2_instances(instances);
|
||||
add_device_softmax_f16_f16_rank4_reduce3_instances(instances);
|
||||
add_device_softmax_f16_f16_rank4_reduce4_instances(instances);
|
||||
}
|
||||
|
||||
} // namespace instance
|
||||
|
||||
@@ -0,0 +1,27 @@
|
||||
// SPDX-License-Identifier: MIT
|
||||
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
|
||||
#include "ck/library/tensor_operation_instance/device_operation_instance_factory.hpp"
|
||||
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank3_reduce1.hpp"
|
||||
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_type.hpp"
|
||||
|
||||
namespace ck {
|
||||
namespace tensor_operation {
|
||||
namespace device {
|
||||
namespace instance {
|
||||
|
||||
static constexpr index_t RANK = 3;
|
||||
|
||||
void add_device_softmax_f16_f16_rank3_reduce1_instances(
|
||||
std::vector<DeviceSoftmaxPtr<F16, F32, F16, PassThrough, PassThrough, RANK>>& instances)
|
||||
{
|
||||
add_device_operation_instances(instances, device_softmax_f16_f16_instances<RANK, 1>{});
|
||||
}
|
||||
|
||||
} // namespace instance
|
||||
} // namespace device
|
||||
} // namespace tensor_operation
|
||||
} // namespace ck
|
||||
@@ -0,0 +1,27 @@
|
||||
// SPDX-License-Identifier: MIT
|
||||
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
|
||||
#include "ck/library/tensor_operation_instance/device_operation_instance_factory.hpp"
|
||||
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank3_reduce2.hpp"
|
||||
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_type.hpp"
|
||||
|
||||
namespace ck {
|
||||
namespace tensor_operation {
|
||||
namespace device {
|
||||
namespace instance {
|
||||
|
||||
static constexpr index_t RANK = 3;
|
||||
|
||||
void add_device_softmax_f16_f16_rank3_reduce2_instances(
|
||||
std::vector<DeviceSoftmaxPtr<F16, F32, F16, PassThrough, PassThrough, RANK>>& instances)
|
||||
{
|
||||
add_device_operation_instances(instances, device_softmax_f16_f16_instances<RANK, 2>{});
|
||||
}
|
||||
|
||||
} // namespace instance
|
||||
} // namespace device
|
||||
} // namespace tensor_operation
|
||||
} // namespace ck
|
||||
@@ -0,0 +1,27 @@
|
||||
// SPDX-License-Identifier: MIT
|
||||
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
|
||||
#include "ck/library/tensor_operation_instance/device_operation_instance_factory.hpp"
|
||||
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank3_reduce3.hpp"
|
||||
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_type.hpp"
|
||||
|
||||
namespace ck {
|
||||
namespace tensor_operation {
|
||||
namespace device {
|
||||
namespace instance {
|
||||
|
||||
static constexpr index_t RANK = 3;
|
||||
|
||||
void add_device_softmax_f16_f16_rank3_reduce3_instances(
|
||||
std::vector<DeviceSoftmaxPtr<F16, F32, F16, PassThrough, PassThrough, RANK>>& instances)
|
||||
{
|
||||
add_device_operation_instances(instances, device_softmax_f16_f16_instances<RANK, 3>{});
|
||||
}
|
||||
|
||||
} // namespace instance
|
||||
} // namespace device
|
||||
} // namespace tensor_operation
|
||||
} // namespace ck
|
||||
@@ -0,0 +1,27 @@
|
||||
// SPDX-License-Identifier: MIT
|
||||
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
|
||||
#include "ck/library/tensor_operation_instance/device_operation_instance_factory.hpp"
|
||||
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank4_reduce1.hpp"
|
||||
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_type.hpp"
|
||||
|
||||
namespace ck {
|
||||
namespace tensor_operation {
|
||||
namespace device {
|
||||
namespace instance {
|
||||
|
||||
static constexpr index_t RANK = 4;
|
||||
|
||||
void add_device_softmax_f16_f16_rank4_reduce1_instances(
|
||||
std::vector<DeviceSoftmaxPtr<F16, F32, F16, PassThrough, PassThrough, RANK>>& instances)
|
||||
{
|
||||
add_device_operation_instances(instances, device_softmax_f16_f16_instances<RANK, 1>{});
|
||||
}
|
||||
|
||||
} // namespace instance
|
||||
} // namespace device
|
||||
} // namespace tensor_operation
|
||||
} // namespace ck
|
||||
@@ -0,0 +1,27 @@
|
||||
// SPDX-License-Identifier: MIT
|
||||
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
|
||||
#include "ck/library/tensor_operation_instance/device_operation_instance_factory.hpp"
|
||||
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank4_reduce2.hpp"
|
||||
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_type.hpp"
|
||||
|
||||
namespace ck {
|
||||
namespace tensor_operation {
|
||||
namespace device {
|
||||
namespace instance {
|
||||
|
||||
static constexpr index_t RANK = 4;
|
||||
|
||||
void add_device_softmax_f16_f16_rank4_reduce2_instances(
|
||||
std::vector<DeviceSoftmaxPtr<F16, F32, F16, PassThrough, PassThrough, RANK>>& instances)
|
||||
{
|
||||
add_device_operation_instances(instances, device_softmax_f16_f16_instances<RANK, 2>{});
|
||||
}
|
||||
|
||||
} // namespace instance
|
||||
} // namespace device
|
||||
} // namespace tensor_operation
|
||||
} // namespace ck
|
||||
@@ -0,0 +1,27 @@
|
||||
// SPDX-License-Identifier: MIT
|
||||
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
|
||||
#include "ck/library/tensor_operation_instance/device_operation_instance_factory.hpp"
|
||||
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank4_reduce3.hpp"
|
||||
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_type.hpp"
|
||||
|
||||
namespace ck {
|
||||
namespace tensor_operation {
|
||||
namespace device {
|
||||
namespace instance {
|
||||
|
||||
static constexpr index_t RANK = 4;
|
||||
|
||||
void add_device_softmax_f16_f16_rank4_reduce3_instances(
|
||||
std::vector<DeviceSoftmaxPtr<F16, F32, F16, PassThrough, PassThrough, RANK>>& instances)
|
||||
{
|
||||
add_device_operation_instances(instances, device_softmax_f16_f16_instances<RANK, 3>{});
|
||||
}
|
||||
|
||||
} // namespace instance
|
||||
} // namespace device
|
||||
} // namespace tensor_operation
|
||||
} // namespace ck
|
||||
@@ -0,0 +1,27 @@
|
||||
// SPDX-License-Identifier: MIT
|
||||
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
|
||||
#include "ck/library/tensor_operation_instance/device_operation_instance_factory.hpp"
|
||||
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank4_reduce4.hpp"
|
||||
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_type.hpp"
|
||||
|
||||
namespace ck {
|
||||
namespace tensor_operation {
|
||||
namespace device {
|
||||
namespace instance {
|
||||
|
||||
static constexpr index_t RANK = 4;
|
||||
|
||||
void add_device_softmax_f16_f16_rank4_reduce4_instances(
|
||||
std::vector<DeviceSoftmaxPtr<F16, F32, F16, PassThrough, PassThrough, RANK>>& instances)
|
||||
{
|
||||
add_device_operation_instances(instances, device_softmax_f16_f16_instances<RANK, 4>{});
|
||||
}
|
||||
|
||||
} // namespace instance
|
||||
} // namespace device
|
||||
} // namespace tensor_operation
|
||||
} // namespace ck
|
||||
@@ -1,53 +1,37 @@
|
||||
// SPDX-License-Identifier: MIT
|
||||
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
#include <tuple>
|
||||
#include <vector>
|
||||
|
||||
#include "ck/ck.hpp"
|
||||
#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
|
||||
#include "ck/tensor_operation/gpu/device/impl/device_softmax_impl.hpp"
|
||||
#include "ck/tensor_operation/gpu/element/unary_element_wise_operation.hpp"
|
||||
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank3_reduce1.hpp"
|
||||
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank3_reduce2.hpp"
|
||||
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank3_reduce3.hpp"
|
||||
|
||||
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank4_reduce1.hpp"
|
||||
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank4_reduce2.hpp"
|
||||
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank4_reduce3.hpp"
|
||||
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank4_reduce4.hpp"
|
||||
|
||||
namespace ck {
|
||||
namespace tensor_operation {
|
||||
namespace device {
|
||||
namespace instance {
|
||||
|
||||
namespace {
|
||||
using F32 = float;
|
||||
using Pass = ck::tensor_operation::element_wise::PassThrough;
|
||||
} // namespace
|
||||
|
||||
template <index_t Rank, index_t Reduce>
|
||||
using device_softmax_f32_f32_instances = std::tuple<
|
||||
// clang-format off
|
||||
// InDataType, AccDataType, OutDataType, InElementwiseOp, AccElementwiseOp, Rank, NumReduceDim, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, InSrcVectorDim, InSrcVectorSize, OutDstVectorSize>
|
||||
DeviceSoftmaxImpl< F32, F32, F32, Pass, Pass, Rank, Reduce, 256, 8, 32, 1, 8, 1, 1, 1>, // fallback kernel
|
||||
DeviceSoftmaxImpl< F32, F32, F32, Pass, Pass, Rank, Reduce, 256, 8, 32, 1, 8, 1, 4, 4>,
|
||||
DeviceSoftmaxImpl< F32, F32, F32, Pass, Pass, Rank, Reduce, 256, 4, 64, 1, 8, 1, 4, 4>,
|
||||
DeviceSoftmaxImpl< F32, F32, F32, Pass, Pass, Rank, Reduce, 256, 2, 128, 1, 8, 1, 4, 4>,
|
||||
DeviceSoftmaxImpl< F32, F32, F32, Pass, Pass, Rank, Reduce, 256, 2, 128, 1, 16, 1, 4, 4>,
|
||||
DeviceSoftmaxImpl< F32, F32, F32, Pass, Pass, Rank, Reduce, 256, 2, 128, 1, 32, 1, 4, 4>,
|
||||
DeviceSoftmaxImpl< F32, F32, F32, Pass, Pass, Rank, Reduce, 256, 1, 256, 1, 8, 1, 4, 4>,
|
||||
DeviceSoftmaxImpl< F32, F32, F32, Pass, Pass, Rank, Reduce, 256, 1, 256, 1, 16, 1, 4, 4>,
|
||||
DeviceSoftmaxImpl< F32, F32, F32, Pass, Pass, Rank, Reduce, 256, 1, 256, 1, 32, 1, 4, 4>
|
||||
// clang-format on
|
||||
>;
|
||||
|
||||
void add_device_softmax_f32_f32_rank3_instances(
|
||||
std::vector<DeviceSoftmaxPtr<F32, F32, F32, Pass, Pass, 3>>& instances)
|
||||
std::vector<DeviceSoftmaxPtr<F32, F32, F32, PassThrough, PassThrough, 3>>& instances)
|
||||
{
|
||||
add_device_operation_instances(instances, device_softmax_f32_f32_instances<3, 1>{});
|
||||
add_device_operation_instances(instances, device_softmax_f32_f32_instances<3, 2>{});
|
||||
add_device_softmax_f32_f32_rank3_reduce1_instances(instances);
|
||||
add_device_softmax_f32_f32_rank3_reduce2_instances(instances);
|
||||
add_device_softmax_f32_f32_rank3_reduce3_instances(instances);
|
||||
}
|
||||
|
||||
void add_device_softmax_f32_f32_rank4_instances(
|
||||
std::vector<DeviceSoftmaxPtr<F32, F32, F32, Pass, Pass, 4>>& instances)
|
||||
std::vector<DeviceSoftmaxPtr<F32, F32, F32, PassThrough, PassThrough, 4>>& instances)
|
||||
{
|
||||
add_device_operation_instances(instances, device_softmax_f32_f32_instances<4, 1>{});
|
||||
add_device_operation_instances(instances, device_softmax_f32_f32_instances<4, 2>{});
|
||||
add_device_operation_instances(instances, device_softmax_f32_f32_instances<4, 3>{});
|
||||
add_device_softmax_f32_f32_rank4_reduce1_instances(instances);
|
||||
add_device_softmax_f32_f32_rank4_reduce2_instances(instances);
|
||||
add_device_softmax_f32_f32_rank4_reduce3_instances(instances);
|
||||
add_device_softmax_f32_f32_rank4_reduce4_instances(instances);
|
||||
}
|
||||
|
||||
} // namespace instance
|
||||
|
||||
@@ -0,0 +1,27 @@
|
||||
// SPDX-License-Identifier: MIT
|
||||
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
|
||||
#include "ck/library/tensor_operation_instance/device_operation_instance_factory.hpp"
|
||||
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank3_reduce1.hpp"
|
||||
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_type.hpp"
|
||||
|
||||
namespace ck {
|
||||
namespace tensor_operation {
|
||||
namespace device {
|
||||
namespace instance {
|
||||
|
||||
static constexpr index_t RANK = 3;
|
||||
|
||||
void add_device_softmax_f32_f32_rank3_reduce1_instances(
|
||||
std::vector<DeviceSoftmaxPtr<F32, F32, F32, PassThrough, PassThrough, RANK>>& instances)
|
||||
{
|
||||
add_device_operation_instances(instances, device_softmax_f32_f32_instances<RANK, 1>{});
|
||||
}
|
||||
|
||||
} // namespace instance
|
||||
} // namespace device
|
||||
} // namespace tensor_operation
|
||||
} // namespace ck
|
||||
@@ -0,0 +1,27 @@
|
||||
// SPDX-License-Identifier: MIT
|
||||
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
|
||||
#include "ck/library/tensor_operation_instance/device_operation_instance_factory.hpp"
|
||||
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank3_reduce2.hpp"
|
||||
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_type.hpp"
|
||||
|
||||
namespace ck {
|
||||
namespace tensor_operation {
|
||||
namespace device {
|
||||
namespace instance {
|
||||
|
||||
static constexpr index_t RANK = 3;
|
||||
|
||||
void add_device_softmax_f32_f32_rank3_reduce2_instances(
|
||||
std::vector<DeviceSoftmaxPtr<F32, F32, F32, PassThrough, PassThrough, RANK>>& instances)
|
||||
{
|
||||
add_device_operation_instances(instances, device_softmax_f32_f32_instances<RANK, 2>{});
|
||||
}
|
||||
|
||||
} // namespace instance
|
||||
} // namespace device
|
||||
} // namespace tensor_operation
|
||||
} // namespace ck
|
||||
@@ -0,0 +1,27 @@
|
||||
// SPDX-License-Identifier: MIT
|
||||
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
|
||||
#include "ck/library/tensor_operation_instance/device_operation_instance_factory.hpp"
|
||||
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank3_reduce3.hpp"
|
||||
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_type.hpp"
|
||||
|
||||
namespace ck {
|
||||
namespace tensor_operation {
|
||||
namespace device {
|
||||
namespace instance {
|
||||
|
||||
static constexpr index_t RANK = 3;
|
||||
|
||||
void add_device_softmax_f32_f32_rank3_reduce3_instances(
|
||||
std::vector<DeviceSoftmaxPtr<F32, F32, F32, PassThrough, PassThrough, RANK>>& instances)
|
||||
{
|
||||
add_device_operation_instances(instances, device_softmax_f32_f32_instances<RANK, 3>{});
|
||||
}
|
||||
|
||||
} // namespace instance
|
||||
} // namespace device
|
||||
} // namespace tensor_operation
|
||||
} // namespace ck
|
||||
@@ -0,0 +1,27 @@
|
||||
// SPDX-License-Identifier: MIT
|
||||
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
|
||||
#include "ck/library/tensor_operation_instance/device_operation_instance_factory.hpp"
|
||||
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank4_reduce1.hpp"
|
||||
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_type.hpp"
|
||||
|
||||
namespace ck {
|
||||
namespace tensor_operation {
|
||||
namespace device {
|
||||
namespace instance {
|
||||
|
||||
static constexpr index_t RANK = 4;
|
||||
|
||||
void add_device_softmax_f32_f32_rank4_reduce1_instances(
|
||||
std::vector<DeviceSoftmaxPtr<F32, F32, F32, PassThrough, PassThrough, RANK>>& instances)
|
||||
{
|
||||
add_device_operation_instances(instances, device_softmax_f32_f32_instances<RANK, 1>{});
|
||||
}
|
||||
|
||||
} // namespace instance
|
||||
} // namespace device
|
||||
} // namespace tensor_operation
|
||||
} // namespace ck
|
||||
@@ -0,0 +1,27 @@
|
||||
// SPDX-License-Identifier: MIT
|
||||
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
|
||||
#include "ck/library/tensor_operation_instance/device_operation_instance_factory.hpp"
|
||||
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank4_reduce2.hpp"
|
||||
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_type.hpp"
|
||||
|
||||
namespace ck {
|
||||
namespace tensor_operation {
|
||||
namespace device {
|
||||
namespace instance {
|
||||
|
||||
static constexpr index_t RANK = 4;
|
||||
|
||||
void add_device_softmax_f32_f32_rank4_reduce2_instances(
|
||||
std::vector<DeviceSoftmaxPtr<F32, F32, F32, PassThrough, PassThrough, RANK>>& instances)
|
||||
{
|
||||
add_device_operation_instances(instances, device_softmax_f32_f32_instances<RANK, 2>{});
|
||||
}
|
||||
|
||||
} // namespace instance
|
||||
} // namespace device
|
||||
} // namespace tensor_operation
|
||||
} // namespace ck
|
||||
@@ -0,0 +1,27 @@
|
||||
// SPDX-License-Identifier: MIT
|
||||
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
|
||||
#include "ck/library/tensor_operation_instance/device_operation_instance_factory.hpp"
|
||||
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank4_reduce3.hpp"
|
||||
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_type.hpp"
|
||||
|
||||
namespace ck {
|
||||
namespace tensor_operation {
|
||||
namespace device {
|
||||
namespace instance {
|
||||
|
||||
static constexpr index_t RANK = 4;
|
||||
|
||||
void add_device_softmax_f32_f32_rank4_reduce3_instances(
|
||||
std::vector<DeviceSoftmaxPtr<F32, F32, F32, PassThrough, PassThrough, RANK>>& instances)
|
||||
{
|
||||
add_device_operation_instances(instances, device_softmax_f32_f32_instances<RANK, 3>{});
|
||||
}
|
||||
|
||||
} // namespace instance
|
||||
} // namespace device
|
||||
} // namespace tensor_operation
|
||||
} // namespace ck
|
||||
@@ -0,0 +1,27 @@
|
||||
// SPDX-License-Identifier: MIT
|
||||
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
|
||||
#include "ck/library/tensor_operation_instance/device_operation_instance_factory.hpp"
|
||||
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank4_reduce4.hpp"
|
||||
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_type.hpp"
|
||||
|
||||
namespace ck {
|
||||
namespace tensor_operation {
|
||||
namespace device {
|
||||
namespace instance {
|
||||
|
||||
static constexpr index_t RANK = 4;
|
||||
|
||||
void add_device_softmax_f32_f32_rank4_reduce4_instances(
|
||||
std::vector<DeviceSoftmaxPtr<F32, F32, F32, PassThrough, PassThrough, RANK>>& instances)
|
||||
{
|
||||
add_device_operation_instances(instances, device_softmax_f32_f32_instances<RANK, 4>{});
|
||||
}
|
||||
|
||||
} // namespace instance
|
||||
} // namespace device
|
||||
} // namespace tensor_operation
|
||||
} // namespace ck
|
||||
@@ -0,0 +1,40 @@
|
||||
// SPDX-License-Identifier: MIT
|
||||
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_i8_i8_instance_rank3_reduce1.hpp"
|
||||
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_i8_i8_instance_rank3_reduce2.hpp"
|
||||
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_i8_i8_instance_rank3_reduce3.hpp"
|
||||
|
||||
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_i8_i8_instance_rank4_reduce1.hpp"
|
||||
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_i8_i8_instance_rank4_reduce2.hpp"
|
||||
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_i8_i8_instance_rank4_reduce3.hpp"
|
||||
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_i8_i8_instance_rank4_reduce4.hpp"
|
||||
|
||||
namespace ck {
|
||||
namespace tensor_operation {
|
||||
namespace device {
|
||||
namespace instance {
|
||||
|
||||
void add_device_softmax_i8_i8_rank3_instances(
|
||||
std::vector<DeviceSoftmaxPtr<I8, F32, I8, PassThrough, PassThrough, 3>>& instances)
|
||||
{
|
||||
add_device_softmax_i8_i8_rank3_reduce1_instances(instances);
|
||||
add_device_softmax_i8_i8_rank3_reduce2_instances(instances);
|
||||
add_device_softmax_i8_i8_rank3_reduce3_instances(instances);
|
||||
}
|
||||
|
||||
void add_device_softmax_i8_i8_rank4_instances(
|
||||
std::vector<DeviceSoftmaxPtr<I8, F32, I8, PassThrough, PassThrough, 4>>& instances)
|
||||
{
|
||||
add_device_softmax_i8_i8_rank4_reduce1_instances(instances);
|
||||
add_device_softmax_i8_i8_rank4_reduce2_instances(instances);
|
||||
add_device_softmax_i8_i8_rank4_reduce3_instances(instances);
|
||||
add_device_softmax_i8_i8_rank4_reduce4_instances(instances);
|
||||
}
|
||||
|
||||
} // namespace instance
|
||||
} // namespace device
|
||||
} // namespace tensor_operation
|
||||
} // namespace ck
|
||||
@@ -0,0 +1,27 @@
|
||||
// SPDX-License-Identifier: MIT
|
||||
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
|
||||
#include "ck/library/tensor_operation_instance/device_operation_instance_factory.hpp"
|
||||
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_i8_i8_instance_rank3_reduce1.hpp"
|
||||
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_i8_i8_instance_type.hpp"
|
||||
|
||||
namespace ck {
|
||||
namespace tensor_operation {
|
||||
namespace device {
|
||||
namespace instance {
|
||||
|
||||
static constexpr index_t RANK = 3;
|
||||
|
||||
void add_device_softmax_i8_i8_rank3_reduce1_instances(
|
||||
std::vector<DeviceSoftmaxPtr<I8, F32, I8, PassThrough, PassThrough, RANK>>& instances)
|
||||
{
|
||||
add_device_operation_instances(instances, device_softmax_i8_i8_instances<RANK, 1>{});
|
||||
}
|
||||
|
||||
} // namespace instance
|
||||
} // namespace device
|
||||
} // namespace tensor_operation
|
||||
} // namespace ck
|
||||
@@ -0,0 +1,27 @@
|
||||
// SPDX-License-Identifier: MIT
|
||||
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
|
||||
#include "ck/library/tensor_operation_instance/device_operation_instance_factory.hpp"
|
||||
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_i8_i8_instance_rank3_reduce2.hpp"
|
||||
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_i8_i8_instance_type.hpp"
|
||||
|
||||
namespace ck {
|
||||
namespace tensor_operation {
|
||||
namespace device {
|
||||
namespace instance {
|
||||
|
||||
static constexpr index_t RANK = 3;
|
||||
|
||||
void add_device_softmax_i8_i8_rank3_reduce2_instances(
|
||||
std::vector<DeviceSoftmaxPtr<I8, F32, I8, PassThrough, PassThrough, RANK>>& instances)
|
||||
{
|
||||
add_device_operation_instances(instances, device_softmax_i8_i8_instances<RANK, 2>{});
|
||||
}
|
||||
|
||||
} // namespace instance
|
||||
} // namespace device
|
||||
} // namespace tensor_operation
|
||||
} // namespace ck
|
||||
@@ -0,0 +1,27 @@
|
||||
// SPDX-License-Identifier: MIT
|
||||
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
|
||||
#include "ck/library/tensor_operation_instance/device_operation_instance_factory.hpp"
|
||||
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_i8_i8_instance_rank3_reduce3.hpp"
|
||||
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_i8_i8_instance_type.hpp"
|
||||
|
||||
namespace ck {
|
||||
namespace tensor_operation {
|
||||
namespace device {
|
||||
namespace instance {
|
||||
|
||||
static constexpr index_t RANK = 3;
|
||||
|
||||
void add_device_softmax_i8_i8_rank3_reduce3_instances(
|
||||
std::vector<DeviceSoftmaxPtr<I8, F32, I8, PassThrough, PassThrough, RANK>>& instances)
|
||||
{
|
||||
add_device_operation_instances(instances, device_softmax_i8_i8_instances<RANK, 3>{});
|
||||
}
|
||||
|
||||
} // namespace instance
|
||||
} // namespace device
|
||||
} // namespace tensor_operation
|
||||
} // namespace ck
|
||||
@@ -0,0 +1,27 @@
|
||||
// SPDX-License-Identifier: MIT
|
||||
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
|
||||
#include "ck/library/tensor_operation_instance/device_operation_instance_factory.hpp"
|
||||
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_i8_i8_instance_rank4_reduce1.hpp"
|
||||
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_i8_i8_instance_type.hpp"
|
||||
|
||||
namespace ck {
|
||||
namespace tensor_operation {
|
||||
namespace device {
|
||||
namespace instance {
|
||||
|
||||
static constexpr index_t RANK = 4;
|
||||
|
||||
void add_device_softmax_i8_i8_rank4_reduce1_instances(
|
||||
std::vector<DeviceSoftmaxPtr<I8, F32, I8, PassThrough, PassThrough, RANK>>& instances)
|
||||
{
|
||||
add_device_operation_instances(instances, device_softmax_i8_i8_instances<RANK, 1>{});
|
||||
}
|
||||
|
||||
} // namespace instance
|
||||
} // namespace device
|
||||
} // namespace tensor_operation
|
||||
} // namespace ck
|
||||
@@ -0,0 +1,27 @@
|
||||
// SPDX-License-Identifier: MIT
|
||||
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
|
||||
#include "ck/library/tensor_operation_instance/device_operation_instance_factory.hpp"
|
||||
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_i8_i8_instance_rank4_reduce2.hpp"
|
||||
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_i8_i8_instance_type.hpp"
|
||||
|
||||
namespace ck {
|
||||
namespace tensor_operation {
|
||||
namespace device {
|
||||
namespace instance {
|
||||
|
||||
static constexpr index_t RANK = 4;
|
||||
|
||||
void add_device_softmax_i8_i8_rank4_reduce2_instances(
|
||||
std::vector<DeviceSoftmaxPtr<I8, F32, I8, PassThrough, PassThrough, RANK>>& instances)
|
||||
{
|
||||
add_device_operation_instances(instances, device_softmax_i8_i8_instances<RANK, 2>{});
|
||||
}
|
||||
|
||||
} // namespace instance
|
||||
} // namespace device
|
||||
} // namespace tensor_operation
|
||||
} // namespace ck
|
||||
@@ -0,0 +1,27 @@
|
||||
// SPDX-License-Identifier: MIT
|
||||
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
|
||||
#include "ck/library/tensor_operation_instance/device_operation_instance_factory.hpp"
|
||||
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_i8_i8_instance_rank4_reduce3.hpp"
|
||||
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_i8_i8_instance_type.hpp"
|
||||
|
||||
namespace ck {
|
||||
namespace tensor_operation {
|
||||
namespace device {
|
||||
namespace instance {
|
||||
|
||||
static constexpr index_t RANK = 4;
|
||||
|
||||
void add_device_softmax_i8_i8_rank4_reduce3_instances(
|
||||
std::vector<DeviceSoftmaxPtr<I8, F32, I8, PassThrough, PassThrough, RANK>>& instances)
|
||||
{
|
||||
add_device_operation_instances(instances, device_softmax_i8_i8_instances<RANK, 3>{});
|
||||
}
|
||||
|
||||
} // namespace instance
|
||||
} // namespace device
|
||||
} // namespace tensor_operation
|
||||
} // namespace ck
|
||||
@@ -0,0 +1,27 @@
|
||||
// SPDX-License-Identifier: MIT
|
||||
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
|
||||
#include "ck/library/tensor_operation_instance/device_operation_instance_factory.hpp"
|
||||
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_i8_i8_instance_rank4_reduce4.hpp"
|
||||
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_i8_i8_instance_type.hpp"
|
||||
|
||||
namespace ck {
|
||||
namespace tensor_operation {
|
||||
namespace device {
|
||||
namespace instance {
|
||||
|
||||
static constexpr index_t RANK = 4;
|
||||
|
||||
void add_device_softmax_i8_i8_rank4_reduce4_instances(
|
||||
std::vector<DeviceSoftmaxPtr<I8, F32, I8, PassThrough, PassThrough, RANK>>& instances)
|
||||
{
|
||||
add_device_operation_instances(instances, device_softmax_i8_i8_instances<RANK, 4>{});
|
||||
}
|
||||
|
||||
} // namespace instance
|
||||
} // namespace device
|
||||
} // namespace tensor_operation
|
||||
} // namespace ck
|
||||
Reference in New Issue
Block a user