Padded Generic Kernel Instance (#730)

* Add NumReduceDim template parameter to DeviceSoftmax and Softmax client API to simplify instances collecting

* Move the generic kernel instance to be the first of the instance list for elementwise op of normalization

* Add GetGenericInstance() interface for DeviceOperationInstanceFactory class of DeviceSoftmax

* Add testing of GetGenericInstance() in client_example of Softmax

* Revert "Add testing of GetGenericInstance() in client_example of Softmax"

This reverts commit f629cd9a93.

* Revert "Add GetGenericInstance() interface for DeviceOperationInstanceFactory class of DeviceSoftmax"

This reverts commit a9f0d000eb.

* Support generic kernel instance to be the first instance returned by GetInstances() for GroupNorm

* Move generic kernel instance to separate tuple for elementwise op of normalization

* Remove un-used files for softmax instance

* Store generic kernel instance to separate tuple for softmax

* Add IsSupported checking for generic instance to client example of softmax

* Replace the get_device_normalize_from_mean_meansquare_instances() by the DeviceOperationInstanceFactory class for elementwise-normalization

* clang-format fix

* Remove int8 from softmax instances

---------

Co-authored-by: zjing14 <zhangjing14@gmail.com>

[ROCm/composable_kernel commit: 0d9118226b]
This commit is contained in:
Qianfeng
2023-06-17 12:43:11 +08:00
committed by GitHub
parent 033da551a7
commit eebebd33c6
76 changed files with 552 additions and 790 deletions

View File

@@ -1,13 +1,4 @@
add_instance_library(device_softmax_instance
device_softmax_i8_i8_instance.cpp
device_softmax_i8_i8_instance_rank3_reduce1.cpp
device_softmax_i8_i8_instance_rank3_reduce2.cpp
device_softmax_i8_i8_instance_rank3_reduce3.cpp
device_softmax_i8_i8_instance_rank4_reduce1.cpp
device_softmax_i8_i8_instance_rank4_reduce2.cpp
device_softmax_i8_i8_instance_rank4_reduce3.cpp
device_softmax_i8_i8_instance_rank4_reduce4.cpp
device_softmax_f16_f16_instance.cpp
device_softmax_f16_f16_instance_rank3_reduce1.cpp
device_softmax_f16_f16_instance_rank3_reduce2.cpp
device_softmax_f16_f16_instance_rank3_reduce3.cpp
@@ -15,7 +6,6 @@ add_instance_library(device_softmax_instance
device_softmax_f16_f16_instance_rank4_reduce2.cpp
device_softmax_f16_f16_instance_rank4_reduce3.cpp
device_softmax_f16_f16_instance_rank4_reduce4.cpp
device_softmax_f32_f32_instance.cpp
device_softmax_f32_f32_instance_rank3_reduce1.cpp
device_softmax_f32_f32_instance_rank3_reduce2.cpp
device_softmax_f32_f32_instance_rank3_reduce3.cpp

View File

@@ -1,40 +0,0 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
#include <vector>
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank3_reduce1.hpp"
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank3_reduce2.hpp"
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank3_reduce3.hpp"
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank4_reduce1.hpp"
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank4_reduce2.hpp"
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank4_reduce3.hpp"
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank4_reduce4.hpp"
namespace ck {
namespace tensor_operation {
namespace device {
namespace instance {
void add_device_softmax_f16_f16_rank3_instances(
std::vector<DeviceSoftmaxPtr<F16, F32, F16, PassThrough, PassThrough, 3>>& instances)
{
add_device_softmax_f16_f16_rank3_reduce1_instances(instances);
add_device_softmax_f16_f16_rank3_reduce2_instances(instances);
add_device_softmax_f16_f16_rank3_reduce3_instances(instances);
}
void add_device_softmax_f16_f16_rank4_instances(
std::vector<DeviceSoftmaxPtr<F16, F32, F16, PassThrough, PassThrough, 4>>& instances)
{
add_device_softmax_f16_f16_rank4_reduce1_instances(instances);
add_device_softmax_f16_f16_rank4_reduce2_instances(instances);
add_device_softmax_f16_f16_rank4_reduce3_instances(instances);
add_device_softmax_f16_f16_rank4_reduce4_instances(instances);
}
} // namespace instance
} // namespace device
} // namespace tensor_operation
} // namespace ck

View File

@@ -13,12 +13,11 @@ namespace tensor_operation {
namespace device {
namespace instance {
static constexpr index_t RANK = 3;
void add_device_softmax_f16_f16_rank3_reduce1_instances(
std::vector<DeviceSoftmaxPtr<F16, F32, F16, PassThrough, PassThrough, RANK>>& instances)
std::vector<DeviceSoftmaxPtr<F16, F32, F16, PassThrough, PassThrough, 3, 1>>& instances)
{
add_device_operation_instances(instances, device_softmax_f16_f16_instances<RANK, 1>{});
add_device_operation_instances(instances, device_softmax_f16_f16_generic_instance<3, 1>{});
add_device_operation_instances(instances, device_softmax_f16_f16_instances<3, 1>{});
}
} // namespace instance

View File

@@ -13,12 +13,11 @@ namespace tensor_operation {
namespace device {
namespace instance {
static constexpr index_t RANK = 3;
void add_device_softmax_f16_f16_rank3_reduce2_instances(
std::vector<DeviceSoftmaxPtr<F16, F32, F16, PassThrough, PassThrough, RANK>>& instances)
std::vector<DeviceSoftmaxPtr<F16, F32, F16, PassThrough, PassThrough, 3, 2>>& instances)
{
add_device_operation_instances(instances, device_softmax_f16_f16_instances<RANK, 2>{});
add_device_operation_instances(instances, device_softmax_f16_f16_generic_instance<3, 2>{});
add_device_operation_instances(instances, device_softmax_f16_f16_instances<3, 2>{});
}
} // namespace instance

View File

@@ -13,12 +13,11 @@ namespace tensor_operation {
namespace device {
namespace instance {
static constexpr index_t RANK = 3;
void add_device_softmax_f16_f16_rank3_reduce3_instances(
std::vector<DeviceSoftmaxPtr<F16, F32, F16, PassThrough, PassThrough, RANK>>& instances)
std::vector<DeviceSoftmaxPtr<F16, F32, F16, PassThrough, PassThrough, 3, 3>>& instances)
{
add_device_operation_instances(instances, device_softmax_f16_f16_instances<RANK, 3>{});
add_device_operation_instances(instances, device_softmax_f16_f16_generic_instance<3, 3>{});
add_device_operation_instances(instances, device_softmax_f16_f16_instances<3, 3>{});
}
} // namespace instance

View File

@@ -13,12 +13,11 @@ namespace tensor_operation {
namespace device {
namespace instance {
static constexpr index_t RANK = 4;
void add_device_softmax_f16_f16_rank4_reduce1_instances(
std::vector<DeviceSoftmaxPtr<F16, F32, F16, PassThrough, PassThrough, RANK>>& instances)
std::vector<DeviceSoftmaxPtr<F16, F32, F16, PassThrough, PassThrough, 4, 1>>& instances)
{
add_device_operation_instances(instances, device_softmax_f16_f16_instances<RANK, 1>{});
add_device_operation_instances(instances, device_softmax_f16_f16_generic_instance<4, 1>{});
add_device_operation_instances(instances, device_softmax_f16_f16_instances<4, 1>{});
}
} // namespace instance

View File

@@ -13,12 +13,11 @@ namespace tensor_operation {
namespace device {
namespace instance {
static constexpr index_t RANK = 4;
void add_device_softmax_f16_f16_rank4_reduce2_instances(
std::vector<DeviceSoftmaxPtr<F16, F32, F16, PassThrough, PassThrough, RANK>>& instances)
std::vector<DeviceSoftmaxPtr<F16, F32, F16, PassThrough, PassThrough, 4, 2>>& instances)
{
add_device_operation_instances(instances, device_softmax_f16_f16_instances<RANK, 2>{});
add_device_operation_instances(instances, device_softmax_f16_f16_generic_instance<4, 2>{});
add_device_operation_instances(instances, device_softmax_f16_f16_instances<4, 2>{});
}
} // namespace instance

View File

@@ -13,12 +13,11 @@ namespace tensor_operation {
namespace device {
namespace instance {
static constexpr index_t RANK = 4;
void add_device_softmax_f16_f16_rank4_reduce3_instances(
std::vector<DeviceSoftmaxPtr<F16, F32, F16, PassThrough, PassThrough, RANK>>& instances)
std::vector<DeviceSoftmaxPtr<F16, F32, F16, PassThrough, PassThrough, 4, 3>>& instances)
{
add_device_operation_instances(instances, device_softmax_f16_f16_instances<RANK, 3>{});
add_device_operation_instances(instances, device_softmax_f16_f16_generic_instance<4, 3>{});
add_device_operation_instances(instances, device_softmax_f16_f16_instances<4, 3>{});
}
} // namespace instance

View File

@@ -13,12 +13,11 @@ namespace tensor_operation {
namespace device {
namespace instance {
static constexpr index_t RANK = 4;
void add_device_softmax_f16_f16_rank4_reduce4_instances(
std::vector<DeviceSoftmaxPtr<F16, F32, F16, PassThrough, PassThrough, RANK>>& instances)
std::vector<DeviceSoftmaxPtr<F16, F32, F16, PassThrough, PassThrough, 4, 4>>& instances)
{
add_device_operation_instances(instances, device_softmax_f16_f16_instances<RANK, 4>{});
add_device_operation_instances(instances, device_softmax_f16_f16_generic_instance<4, 4>{});
add_device_operation_instances(instances, device_softmax_f16_f16_instances<4, 4>{});
}
} // namespace instance

View File

@@ -1,40 +0,0 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
#include <vector>
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank3_reduce1.hpp"
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank3_reduce2.hpp"
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank3_reduce3.hpp"
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank4_reduce1.hpp"
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank4_reduce2.hpp"
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank4_reduce3.hpp"
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank4_reduce4.hpp"
namespace ck {
namespace tensor_operation {
namespace device {
namespace instance {
void add_device_softmax_f32_f32_rank3_instances(
std::vector<DeviceSoftmaxPtr<F32, F32, F32, PassThrough, PassThrough, 3>>& instances)
{
add_device_softmax_f32_f32_rank3_reduce1_instances(instances);
add_device_softmax_f32_f32_rank3_reduce2_instances(instances);
add_device_softmax_f32_f32_rank3_reduce3_instances(instances);
}
void add_device_softmax_f32_f32_rank4_instances(
std::vector<DeviceSoftmaxPtr<F32, F32, F32, PassThrough, PassThrough, 4>>& instances)
{
add_device_softmax_f32_f32_rank4_reduce1_instances(instances);
add_device_softmax_f32_f32_rank4_reduce2_instances(instances);
add_device_softmax_f32_f32_rank4_reduce3_instances(instances);
add_device_softmax_f32_f32_rank4_reduce4_instances(instances);
}
} // namespace instance
} // namespace device
} // namespace tensor_operation
} // namespace ck

View File

@@ -13,12 +13,11 @@ namespace tensor_operation {
namespace device {
namespace instance {
static constexpr index_t RANK = 3;
void add_device_softmax_f32_f32_rank3_reduce1_instances(
std::vector<DeviceSoftmaxPtr<F32, F32, F32, PassThrough, PassThrough, RANK>>& instances)
std::vector<DeviceSoftmaxPtr<F32, F32, F32, PassThrough, PassThrough, 3, 1>>& instances)
{
add_device_operation_instances(instances, device_softmax_f32_f32_instances<RANK, 1>{});
add_device_operation_instances(instances, device_softmax_f32_f32_generic_instance<3, 1>{});
add_device_operation_instances(instances, device_softmax_f32_f32_instances<3, 1>{});
}
} // namespace instance

View File

@@ -13,12 +13,11 @@ namespace tensor_operation {
namespace device {
namespace instance {
static constexpr index_t RANK = 3;
void add_device_softmax_f32_f32_rank3_reduce2_instances(
std::vector<DeviceSoftmaxPtr<F32, F32, F32, PassThrough, PassThrough, RANK>>& instances)
std::vector<DeviceSoftmaxPtr<F32, F32, F32, PassThrough, PassThrough, 3, 2>>& instances)
{
add_device_operation_instances(instances, device_softmax_f32_f32_instances<RANK, 2>{});
add_device_operation_instances(instances, device_softmax_f32_f32_generic_instance<3, 2>{});
add_device_operation_instances(instances, device_softmax_f32_f32_instances<3, 2>{});
}
} // namespace instance

View File

@@ -13,12 +13,11 @@ namespace tensor_operation {
namespace device {
namespace instance {
static constexpr index_t RANK = 3;
void add_device_softmax_f32_f32_rank3_reduce3_instances(
std::vector<DeviceSoftmaxPtr<F32, F32, F32, PassThrough, PassThrough, RANK>>& instances)
std::vector<DeviceSoftmaxPtr<F32, F32, F32, PassThrough, PassThrough, 3, 3>>& instances)
{
add_device_operation_instances(instances, device_softmax_f32_f32_instances<RANK, 3>{});
add_device_operation_instances(instances, device_softmax_f32_f32_generic_instance<3, 3>{});
add_device_operation_instances(instances, device_softmax_f32_f32_instances<3, 3>{});
}
} // namespace instance

View File

@@ -13,12 +13,11 @@ namespace tensor_operation {
namespace device {
namespace instance {
static constexpr index_t RANK = 4;
void add_device_softmax_f32_f32_rank4_reduce1_instances(
std::vector<DeviceSoftmaxPtr<F32, F32, F32, PassThrough, PassThrough, RANK>>& instances)
std::vector<DeviceSoftmaxPtr<F32, F32, F32, PassThrough, PassThrough, 4, 1>>& instances)
{
add_device_operation_instances(instances, device_softmax_f32_f32_instances<RANK, 1>{});
add_device_operation_instances(instances, device_softmax_f32_f32_generic_instance<4, 1>{});
add_device_operation_instances(instances, device_softmax_f32_f32_instances<4, 1>{});
}
} // namespace instance

View File

@@ -13,12 +13,11 @@ namespace tensor_operation {
namespace device {
namespace instance {
static constexpr index_t RANK = 4;
void add_device_softmax_f32_f32_rank4_reduce2_instances(
std::vector<DeviceSoftmaxPtr<F32, F32, F32, PassThrough, PassThrough, RANK>>& instances)
std::vector<DeviceSoftmaxPtr<F32, F32, F32, PassThrough, PassThrough, 4, 2>>& instances)
{
add_device_operation_instances(instances, device_softmax_f32_f32_instances<RANK, 2>{});
add_device_operation_instances(instances, device_softmax_f32_f32_generic_instance<4, 2>{});
add_device_operation_instances(instances, device_softmax_f32_f32_instances<4, 2>{});
}
} // namespace instance

View File

@@ -13,12 +13,11 @@ namespace tensor_operation {
namespace device {
namespace instance {
static constexpr index_t RANK = 4;
void add_device_softmax_f32_f32_rank4_reduce3_instances(
std::vector<DeviceSoftmaxPtr<F32, F32, F32, PassThrough, PassThrough, RANK>>& instances)
std::vector<DeviceSoftmaxPtr<F32, F32, F32, PassThrough, PassThrough, 4, 3>>& instances)
{
add_device_operation_instances(instances, device_softmax_f32_f32_instances<RANK, 3>{});
add_device_operation_instances(instances, device_softmax_f32_f32_generic_instance<4, 3>{});
add_device_operation_instances(instances, device_softmax_f32_f32_instances<4, 3>{});
}
} // namespace instance

View File

@@ -13,12 +13,11 @@ namespace tensor_operation {
namespace device {
namespace instance {
static constexpr index_t RANK = 4;
void add_device_softmax_f32_f32_rank4_reduce4_instances(
std::vector<DeviceSoftmaxPtr<F32, F32, F32, PassThrough, PassThrough, RANK>>& instances)
std::vector<DeviceSoftmaxPtr<F32, F32, F32, PassThrough, PassThrough, 4, 4>>& instances)
{
add_device_operation_instances(instances, device_softmax_f32_f32_instances<RANK, 4>{});
add_device_operation_instances(instances, device_softmax_f32_f32_generic_instance<4, 4>{});
add_device_operation_instances(instances, device_softmax_f32_f32_instances<4, 4>{});
}
} // namespace instance

View File

@@ -1,40 +0,0 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
#include <vector>
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_i8_i8_instance_rank3_reduce1.hpp"
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_i8_i8_instance_rank3_reduce2.hpp"
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_i8_i8_instance_rank3_reduce3.hpp"
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_i8_i8_instance_rank4_reduce1.hpp"
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_i8_i8_instance_rank4_reduce2.hpp"
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_i8_i8_instance_rank4_reduce3.hpp"
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_i8_i8_instance_rank4_reduce4.hpp"
namespace ck {
namespace tensor_operation {
namespace device {
namespace instance {
void add_device_softmax_i8_i8_rank3_instances(
std::vector<DeviceSoftmaxPtr<I8, F32, I8, PassThrough, PassThrough, 3>>& instances)
{
add_device_softmax_i8_i8_rank3_reduce1_instances(instances);
add_device_softmax_i8_i8_rank3_reduce2_instances(instances);
add_device_softmax_i8_i8_rank3_reduce3_instances(instances);
}
void add_device_softmax_i8_i8_rank4_instances(
std::vector<DeviceSoftmaxPtr<I8, F32, I8, PassThrough, PassThrough, 4>>& instances)
{
add_device_softmax_i8_i8_rank4_reduce1_instances(instances);
add_device_softmax_i8_i8_rank4_reduce2_instances(instances);
add_device_softmax_i8_i8_rank4_reduce3_instances(instances);
add_device_softmax_i8_i8_rank4_reduce4_instances(instances);
}
} // namespace instance
} // namespace device
} // namespace tensor_operation
} // namespace ck

View File

@@ -1,27 +0,0 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
#include <vector>
#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
#include "ck/library/tensor_operation_instance/device_operation_instance_factory.hpp"
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_i8_i8_instance_rank3_reduce1.hpp"
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_i8_i8_instance_type.hpp"
namespace ck {
namespace tensor_operation {
namespace device {
namespace instance {
static constexpr index_t RANK = 3;
void add_device_softmax_i8_i8_rank3_reduce1_instances(
std::vector<DeviceSoftmaxPtr<I8, F32, I8, PassThrough, PassThrough, RANK>>& instances)
{
add_device_operation_instances(instances, device_softmax_i8_i8_instances<RANK, 1>{});
}
} // namespace instance
} // namespace device
} // namespace tensor_operation
} // namespace ck

View File

@@ -1,27 +0,0 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
#include <vector>
#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
#include "ck/library/tensor_operation_instance/device_operation_instance_factory.hpp"
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_i8_i8_instance_rank3_reduce2.hpp"
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_i8_i8_instance_type.hpp"
namespace ck {
namespace tensor_operation {
namespace device {
namespace instance {
static constexpr index_t RANK = 3;
void add_device_softmax_i8_i8_rank3_reduce2_instances(
std::vector<DeviceSoftmaxPtr<I8, F32, I8, PassThrough, PassThrough, RANK>>& instances)
{
add_device_operation_instances(instances, device_softmax_i8_i8_instances<RANK, 2>{});
}
} // namespace instance
} // namespace device
} // namespace tensor_operation
} // namespace ck

View File

@@ -1,27 +0,0 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
#include <vector>
#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
#include "ck/library/tensor_operation_instance/device_operation_instance_factory.hpp"
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_i8_i8_instance_rank3_reduce3.hpp"
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_i8_i8_instance_type.hpp"
namespace ck {
namespace tensor_operation {
namespace device {
namespace instance {
static constexpr index_t RANK = 3;
void add_device_softmax_i8_i8_rank3_reduce3_instances(
std::vector<DeviceSoftmaxPtr<I8, F32, I8, PassThrough, PassThrough, RANK>>& instances)
{
add_device_operation_instances(instances, device_softmax_i8_i8_instances<RANK, 3>{});
}
} // namespace instance
} // namespace device
} // namespace tensor_operation
} // namespace ck

View File

@@ -1,27 +0,0 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
#include <vector>
#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
#include "ck/library/tensor_operation_instance/device_operation_instance_factory.hpp"
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_i8_i8_instance_rank4_reduce1.hpp"
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_i8_i8_instance_type.hpp"
namespace ck {
namespace tensor_operation {
namespace device {
namespace instance {
static constexpr index_t RANK = 4;
void add_device_softmax_i8_i8_rank4_reduce1_instances(
std::vector<DeviceSoftmaxPtr<I8, F32, I8, PassThrough, PassThrough, RANK>>& instances)
{
add_device_operation_instances(instances, device_softmax_i8_i8_instances<RANK, 1>{});
}
} // namespace instance
} // namespace device
} // namespace tensor_operation
} // namespace ck

View File

@@ -1,27 +0,0 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
#include <vector>
#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
#include "ck/library/tensor_operation_instance/device_operation_instance_factory.hpp"
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_i8_i8_instance_rank4_reduce2.hpp"
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_i8_i8_instance_type.hpp"
namespace ck {
namespace tensor_operation {
namespace device {
namespace instance {
static constexpr index_t RANK = 4;
void add_device_softmax_i8_i8_rank4_reduce2_instances(
std::vector<DeviceSoftmaxPtr<I8, F32, I8, PassThrough, PassThrough, RANK>>& instances)
{
add_device_operation_instances(instances, device_softmax_i8_i8_instances<RANK, 2>{});
}
} // namespace instance
} // namespace device
} // namespace tensor_operation
} // namespace ck

View File

@@ -1,27 +0,0 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
#include <vector>
#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
#include "ck/library/tensor_operation_instance/device_operation_instance_factory.hpp"
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_i8_i8_instance_rank4_reduce3.hpp"
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_i8_i8_instance_type.hpp"
namespace ck {
namespace tensor_operation {
namespace device {
namespace instance {
static constexpr index_t RANK = 4;
void add_device_softmax_i8_i8_rank4_reduce3_instances(
std::vector<DeviceSoftmaxPtr<I8, F32, I8, PassThrough, PassThrough, RANK>>& instances)
{
add_device_operation_instances(instances, device_softmax_i8_i8_instances<RANK, 3>{});
}
} // namespace instance
} // namespace device
} // namespace tensor_operation
} // namespace ck

View File

@@ -1,27 +0,0 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
#include <vector>
#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
#include "ck/library/tensor_operation_instance/device_operation_instance_factory.hpp"
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_i8_i8_instance_rank4_reduce4.hpp"
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_i8_i8_instance_type.hpp"
namespace ck {
namespace tensor_operation {
namespace device {
namespace instance {
static constexpr index_t RANK = 4;
void add_device_softmax_i8_i8_rank4_reduce4_instances(
std::vector<DeviceSoftmaxPtr<I8, F32, I8, PassThrough, PassThrough, RANK>>& instances)
{
add_device_operation_instances(instances, device_softmax_i8_i8_instances<RANK, 4>{});
}
} // namespace instance
} // namespace device
} // namespace tensor_operation
} // namespace ck