Padded Generic Kernel Instance (#730)

* Add NumReduceDim template parameter to DeviceSoftmax and Softmax client API to simplify instances collecting

* Move the generic kernel instance to be the first of the instance list for elementwise op of normalization

* Add GetGenericInstance() interface for DeviceOperationInstanceFactory class of DeviceSoftmax

* Add testing of GetGenericInstance() in client_example of Softmax

* Revert "Add testing of GetGenericInstance() in client_example of Softmax"

This reverts commit f629cd9a93.

* Revert "Add GetGenericInstance() interface for DeviceOperationInstanceFactory class of DeviceSoftmax"

This reverts commit a9f0d000eb.

* Support generic kernel instance to be the first instance returned by GetInstances() for GroupNorm

* Move generic kernel instance to separate tuple for elementwise op of normalization

* Remove un-used files for softmax instance

* Store generic kernel instance to separate tuple for softmax

* Add IsSupported checking for generic instance to client example of softmax

* Replace the get_device_normalize_from_mean_meansquare_instances() by the DeviceOperationInstanceFactory class for elementwise-normalization

* clang-format fix

* Remove int8 from softmax instances

---------

Co-authored-by: zjing14 <zhangjing14@gmail.com>

[ROCm/composable_kernel commit: 0d9118226b]
This commit is contained in:
Qianfeng
2023-06-17 12:43:11 +08:00
committed by GitHub
parent 033da551a7
commit eebebd33c6
76 changed files with 552 additions and 790 deletions

View File

@@ -92,27 +92,76 @@ int profile_softmax(int argc, char* argv[])
{
if(data_type == SoftmaxDataType::F16_F16)
{
ck::profiler::profile_softmax_impl<ck::half_t, float, ck::half_t, 3>(do_verification,
init_method,
do_log,
time_kernel,
length,
stride,
reduce,
double(alpha),
double(beta));
if(reduce.size() == 1)
ck::profiler::profile_softmax_impl<ck::half_t, float, ck::half_t, 3, 1>(
do_verification,
init_method,
do_log,
time_kernel,
length,
stride,
reduce,
double(alpha),
double(beta));
else if(reduce.size() == 2)
ck::profiler::profile_softmax_impl<ck::half_t, float, ck::half_t, 3, 2>(
do_verification,
init_method,
do_log,
time_kernel,
length,
stride,
reduce,
double(alpha),
double(beta));
else if(reduce.size() == 3)
ck::profiler::profile_softmax_impl<ck::half_t, float, ck::half_t, 3, 3>(
do_verification,
init_method,
do_log,
time_kernel,
length,
stride,
reduce,
double(alpha),
double(beta));
else
throw std::runtime_error("invalid number of dimensions to reduce");
}
else if(data_type == SoftmaxDataType::F32_F32)
{
ck::profiler::profile_softmax_impl<float, float, float, 3>(do_verification,
init_method,
do_log,
time_kernel,
length,
stride,
reduce,
double(alpha),
double(beta));
if(reduce.size() == 1)
ck::profiler::profile_softmax_impl<float, float, float, 3, 1>(do_verification,
init_method,
do_log,
time_kernel,
length,
stride,
reduce,
double(alpha),
double(beta));
else if(reduce.size() == 2)
ck::profiler::profile_softmax_impl<float, float, float, 3, 2>(do_verification,
init_method,
do_log,
time_kernel,
length,
stride,
reduce,
double(alpha),
double(beta));
else if(reduce.size() == 3)
ck::profiler::profile_softmax_impl<float, float, float, 3, 3>(do_verification,
init_method,
do_log,
time_kernel,
length,
stride,
reduce,
double(alpha),
double(beta));
else
throw std::runtime_error("invalid number of dimensions to reduce");
}
else
{
@@ -124,27 +173,97 @@ int profile_softmax(int argc, char* argv[])
{
if(data_type == SoftmaxDataType::F16_F16)
{
ck::profiler::profile_softmax_impl<ck::half_t, float, ck::half_t, 4>(do_verification,
init_method,
do_log,
time_kernel,
length,
stride,
reduce,
double(alpha),
double(beta));
if(reduce.size() == 1)
ck::profiler::profile_softmax_impl<ck::half_t, float, ck::half_t, 4, 1>(
do_verification,
init_method,
do_log,
time_kernel,
length,
stride,
reduce,
double(alpha),
double(beta));
else if(reduce.size() == 2)
ck::profiler::profile_softmax_impl<ck::half_t, float, ck::half_t, 4, 2>(
do_verification,
init_method,
do_log,
time_kernel,
length,
stride,
reduce,
double(alpha),
double(beta));
else if(reduce.size() == 3)
ck::profiler::profile_softmax_impl<ck::half_t, float, ck::half_t, 4, 3>(
do_verification,
init_method,
do_log,
time_kernel,
length,
stride,
reduce,
double(alpha),
double(beta));
else if(reduce.size() == 4)
ck::profiler::profile_softmax_impl<ck::half_t, float, ck::half_t, 4, 4>(
do_verification,
init_method,
do_log,
time_kernel,
length,
stride,
reduce,
double(alpha),
double(beta));
else
throw std::runtime_error("invalid number of dimensions to reduce");
}
else if(data_type == SoftmaxDataType::F32_F32)
{
ck::profiler::profile_softmax_impl<float, float, float, 4>(do_verification,
init_method,
do_log,
time_kernel,
length,
stride,
reduce,
double(alpha),
double(beta));
if(reduce.size() == 1)
ck::profiler::profile_softmax_impl<float, float, float, 4, 1>(do_verification,
init_method,
do_log,
time_kernel,
length,
stride,
reduce,
double(alpha),
double(beta));
else if(reduce.size() == 2)
ck::profiler::profile_softmax_impl<float, float, float, 4, 2>(do_verification,
init_method,
do_log,
time_kernel,
length,
stride,
reduce,
double(alpha),
double(beta));
else if(reduce.size() == 3)
ck::profiler::profile_softmax_impl<float, float, float, 4, 3>(do_verification,
init_method,
do_log,
time_kernel,
length,
stride,
reduce,
double(alpha),
double(beta));
else if(reduce.size() == 4)
ck::profiler::profile_softmax_impl<float, float, float, 4, 4>(do_verification,
init_method,
do_log,
time_kernel,
length,
stride,
reduce,
double(alpha),
double(beta));
else
throw std::runtime_error("invalid number of dimensions to reduce");
}
else
{