mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-16 10:59:55 +00:00
Add int4 example for convnd_fwd_bias_relu_add (#375)
* Add int4 example for convnd_fwd_bias_relu_add
* Fix AddReluAdd for building without int4 support
* Update CMakeLists.txt
* Format
* Convert int4 tensors for int8 kernel
* Fix device memory allocation
* Format
* Format
[ROCm/composable_kernel commit: b73ae24234]
This commit is contained in:
@@ -1,11 +1,11 @@
|
||||
add_example_executable(example_grouped_convnd_fwd_bias_relu_add_xdl_fp16 grouped_convnd_fwd_bias_relu_add_xdl_fp16.cpp)
|
||||
target_link_libraries(example_grouped_convnd_fwd_bias_relu_add_xdl_fp16 PRIVATE utility)
|
||||
|
||||
add_example_executable(example_grouped_convnd_fwd_bias_relu_add_xdl_fp32 grouped_convnd_fwd_bias_relu_add_xdl_fp32.cpp)
|
||||
target_link_libraries(example_grouped_convnd_fwd_bias_relu_add_xdl_fp32 PRIVATE utility)
|
||||
|
||||
add_example_executable(example_grouped_convnd_fwd_bias_relu_add_xdl_bf16 grouped_convnd_fwd_bias_relu_add_xdl_bf16.cpp)
|
||||
target_link_libraries(example_grouped_convnd_fwd_bias_relu_add_xdl_bf16 PRIVATE utility)
|
||||
|
||||
add_example_executable(example_grouped_convnd_fwd_bias_relu_add_xdl_int8 grouped_convnd_fwd_bias_relu_add_xdl_int8.cpp)
|
||||
target_link_libraries(example_grouped_convnd_fwd_bias_relu_add_xdl_int8 PRIVATE utility)
|
||||
|
||||
if(USE_BITINT_EXTENSION_INT4)
|
||||
add_example_executable(example_grouped_convnd_fwd_bias_relu_add_xdl_int4 grouped_convnd_fwd_bias_relu_add_xdl_int4.cpp)
|
||||
endif() # USE_BITINT_EXTENSION_INT4
|
||||
|
||||
@@ -26,13 +26,16 @@ void print_helper_msg()
|
||||
}
|
||||
|
||||
template <ck::index_t NDimSpatial,
|
||||
typename InDataType,
|
||||
typename WeiDataType,
|
||||
typename InKernelDataType,
|
||||
typename WeiKernelDataType,
|
||||
typename CShuffleDataType,
|
||||
typename OutDataType,
|
||||
typename OutKernelDataType,
|
||||
typename InElementOp,
|
||||
typename WeiElementOp,
|
||||
typename OutElementOp,
|
||||
typename InUserDataType,
|
||||
typename WeiUserDataType,
|
||||
typename OutUserDataType,
|
||||
typename DeviceConvNDFwdInstance>
|
||||
int run_grouped_conv_fwd_bias_relu_add(bool do_verification,
|
||||
int init_method,
|
||||
@@ -47,12 +50,12 @@ int run_grouped_conv_fwd_bias_relu_add(bool do_verification,
|
||||
const WeiElementOp& wei_element_op,
|
||||
const OutElementOp& out_element_op)
|
||||
{
|
||||
Tensor<InDataType> in(in_g_n_c_wis_desc);
|
||||
Tensor<WeiDataType> wei(wei_g_k_c_xs_desc);
|
||||
Tensor<OutDataType> bias(bias_g_n_k_wos_desc);
|
||||
Tensor<OutDataType> residual(residual_g_n_k_wos_desc);
|
||||
Tensor<OutDataType> out_host(out_g_n_k_wos_desc);
|
||||
Tensor<OutDataType> out_device(out_g_n_k_wos_desc);
|
||||
Tensor<InUserDataType> in(in_g_n_c_wis_desc);
|
||||
Tensor<WeiUserDataType> wei(wei_g_k_c_xs_desc);
|
||||
Tensor<OutUserDataType> bias(bias_g_n_k_wos_desc);
|
||||
Tensor<OutUserDataType> residual(residual_g_n_k_wos_desc);
|
||||
Tensor<OutUserDataType> out_host(out_g_n_k_wos_desc);
|
||||
Tensor<OutKernelDataType> out_device(out_g_n_k_wos_desc);
|
||||
|
||||
std::cout << "in: " << in.mDesc << std::endl;
|
||||
std::cout << "wei: " << wei.mDesc << std::endl;
|
||||
@@ -64,26 +67,38 @@ int run_grouped_conv_fwd_bias_relu_add(bool do_verification,
|
||||
{
|
||||
case 0: break;
|
||||
case 1:
|
||||
in.GenerateTensorValue(GeneratorTensor_2<InDataType>{-5, 5});
|
||||
wei.GenerateTensorValue(GeneratorTensor_2<WeiDataType>{-5, 5});
|
||||
bias.GenerateTensorValue(GeneratorTensor_2<OutDataType>{-5, 5});
|
||||
in.GenerateTensorValue(GeneratorTensor_2<InUserDataType>{-5, 5});
|
||||
wei.GenerateTensorValue(GeneratorTensor_2<WeiUserDataType>{-5, 5});
|
||||
bias.GenerateTensorValue(GeneratorTensor_2<OutUserDataType>{-5, 5});
|
||||
break;
|
||||
default:
|
||||
in.GenerateTensorValue(GeneratorTensor_3<InDataType>{0.0, 1.0});
|
||||
wei.GenerateTensorValue(GeneratorTensor_3<WeiDataType>{-0.5, 0.5});
|
||||
bias.GenerateTensorValue(GeneratorTensor_3<OutDataType>{-0.5, 0.5});
|
||||
in.GenerateTensorValue(GeneratorTensor_3<InUserDataType>{0.0, 1.0});
|
||||
wei.GenerateTensorValue(GeneratorTensor_3<WeiUserDataType>{-0.5, 0.5});
|
||||
bias.GenerateTensorValue(GeneratorTensor_3<OutUserDataType>{-0.5, 0.5});
|
||||
}
|
||||
|
||||
DeviceMem in_device_buf(sizeof(InDataType) * in.mDesc.GetElementSpaceSize());
|
||||
DeviceMem wei_device_buf(sizeof(WeiDataType) * wei.mDesc.GetElementSpaceSize());
|
||||
DeviceMem bias_device_buf(sizeof(OutDataType) * bias.mDesc.GetElementSpaceSize());
|
||||
DeviceMem residual_device_buf(sizeof(OutDataType) * residual.mDesc.GetElementSpaceSize());
|
||||
DeviceMem out_device_buf(sizeof(OutDataType) * out_device.mDesc.GetElementSpaceSize());
|
||||
DeviceMem in_device_buf(sizeof(InKernelDataType) * in.mDesc.GetElementSpaceSize());
|
||||
DeviceMem wei_device_buf(sizeof(WeiKernelDataType) * wei.mDesc.GetElementSpaceSize());
|
||||
DeviceMem bias_device_buf(sizeof(OutKernelDataType) * bias.mDesc.GetElementSpaceSize());
|
||||
DeviceMem residual_device_buf(sizeof(OutKernelDataType) * residual.mDesc.GetElementSpaceSize());
|
||||
DeviceMem out_device_buf(sizeof(OutKernelDataType) * out_device.mDesc.GetElementSpaceSize());
|
||||
|
||||
#ifdef CK_EXPERIMENTAL_BIT_INT_EXTENSION_INT4
|
||||
const Tensor<InKernelDataType> in_converted(in);
|
||||
const Tensor<WeiKernelDataType> wei_converted(wei);
|
||||
const Tensor<OutKernelDataType> bias_converted(bias);
|
||||
const Tensor<OutKernelDataType> residual_converted(residual);
|
||||
|
||||
in_device_buf.ToDevice(in_converted.mData.data());
|
||||
wei_device_buf.ToDevice(wei_converted.mData.data());
|
||||
bias_device_buf.ToDevice(bias_converted.mData.data());
|
||||
residual_device_buf.ToDevice(residual_converted.mData.data());
|
||||
#else // CK_EXPERIMENTAL_BIT_INT_EXTENSION_INT4
|
||||
in_device_buf.ToDevice(in.mData.data());
|
||||
wei_device_buf.ToDevice(wei.mData.data());
|
||||
bias_device_buf.ToDevice(bias.mData.data());
|
||||
residual_device_buf.ToDevice(residual.mData.data());
|
||||
#endif // CK_EXPERIMENTAL_BIT_INT_EXTENSION_INT4
|
||||
|
||||
std::array<ck::index_t, NDimSpatial + 3> a_g_n_c_wis_lengths{};
|
||||
std::array<ck::index_t, NDimSpatial + 3> a_g_n_c_wis_strides{};
|
||||
@@ -154,7 +169,7 @@ int run_grouped_conv_fwd_bias_relu_add(bool do_verification,
|
||||
float avg_time = invoker.Run(argument, StreamConfig{nullptr, time_kernel});
|
||||
|
||||
std::size_t flop = conv_param.GetFlops();
|
||||
std::size_t num_btype = conv_param.GetByte<InDataType, WeiDataType, OutDataType>();
|
||||
std::size_t num_btype = conv_param.GetByte<InUserDataType, WeiUserDataType, OutUserDataType>();
|
||||
|
||||
float tflops = static_cast<float>(flop) / 1.E9 / avg_time;
|
||||
float gb_per_sec = num_btype / 1.E6 / avg_time;
|
||||
@@ -168,8 +183,8 @@ int run_grouped_conv_fwd_bias_relu_add(bool do_verification,
|
||||
Tensor<CShuffleDataType> c_host(out_g_n_k_wos_desc);
|
||||
|
||||
auto ref_conv = ck::tensor_operation::host::ReferenceConvFwd<NDimSpatial,
|
||||
InDataType,
|
||||
WeiDataType,
|
||||
InUserDataType,
|
||||
WeiUserDataType,
|
||||
CShuffleDataType,
|
||||
InElementOp,
|
||||
WeiElementOp,
|
||||
@@ -196,10 +211,22 @@ int run_grouped_conv_fwd_bias_relu_add(bool do_verification,
|
||||
|
||||
out_device_buf.FromDevice(out_device.mData.data());
|
||||
|
||||
#ifdef CK_EXPERIMENTAL_BIT_INT_EXTENSION_INT4
|
||||
const Tensor<OutUserDataType> out_device_converted(out_device);
|
||||
|
||||
return ck::utils::check_err(out_device_converted.mData,
|
||||
out_host.mData,
|
||||
"Error: incorrect results!",
|
||||
1e-5f,
|
||||
1e-4f)
|
||||
? 0
|
||||
: 1;
|
||||
#else // CK_EXPERIMENTAL_BIT_INT_EXTENSION_INT4
|
||||
return ck::utils::check_err(
|
||||
out_device.mData, out_host.mData, "Error: incorrect results!", 1e-5f, 1e-4f)
|
||||
? 0
|
||||
: 1;
|
||||
#endif // CK_EXPERIMENTAL_BIT_INT_EXTENSION_INT4
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
@@ -7,13 +7,19 @@
|
||||
|
||||
#include "ck/library/utility/convolution_host_tensor_descriptor_helper.hpp"
|
||||
|
||||
using InDataType = ck::bhalf_t;
|
||||
using WeiDataType = ck::bhalf_t;
|
||||
using AccDataType = float;
|
||||
using CShuffleDataType = float;
|
||||
using BiasDataType = ck::bhalf_t;
|
||||
using ResidualDataType = ck::bhalf_t;
|
||||
using OutDataType = ck::bhalf_t;
|
||||
// kernel data types
|
||||
using InKernelDataType = ck::bhalf_t;
|
||||
using WeiKernelDataType = ck::bhalf_t;
|
||||
using AccDataType = float;
|
||||
using CShuffleDataType = float;
|
||||
using BiasKernelDataType = ck::bhalf_t;
|
||||
using ResidualKernelDataType = ck::bhalf_t;
|
||||
using OutKernelDataType = ck::bhalf_t;
|
||||
|
||||
// tensor data types
|
||||
using InUserDataType = InKernelDataType;
|
||||
using WeiUserDataType = WeiKernelDataType;
|
||||
using OutUserDataType = OutKernelDataType;
|
||||
|
||||
template <ck::index_t... Is>
|
||||
using S = ck::Sequence<Is...>;
|
||||
@@ -40,12 +46,12 @@ using DeviceGroupedConvNDFwdInstance =
|
||||
WeiLayout,
|
||||
ck::Tuple<BiasLayout, ResidualLayout>,
|
||||
OutLayout,
|
||||
InDataType,
|
||||
WeiDataType,
|
||||
InKernelDataType,
|
||||
WeiKernelDataType,
|
||||
AccDataType,
|
||||
CShuffleDataType,
|
||||
ck::Tuple<BiasDataType, ResidualDataType>,
|
||||
OutDataType,
|
||||
ck::Tuple<BiasKernelDataType, ResidualKernelDataType>,
|
||||
OutKernelDataType,
|
||||
InElementOp,
|
||||
WeiElementOp,
|
||||
OutElementOp,
|
||||
@@ -181,13 +187,16 @@ int main(int argc, char* argv[])
|
||||
});
|
||||
|
||||
return run_grouped_conv_fwd_bias_relu_add<1,
|
||||
InDataType,
|
||||
WeiDataType,
|
||||
InKernelDataType,
|
||||
WeiKernelDataType,
|
||||
CShuffleDataType,
|
||||
OutDataType,
|
||||
OutKernelDataType,
|
||||
InElementOp,
|
||||
WeiElementOp,
|
||||
OutElementOp,
|
||||
InUserDataType,
|
||||
WeiUserDataType,
|
||||
OutUserDataType,
|
||||
DeviceGroupedConvNDFwdInstance<1,
|
||||
InLayout,
|
||||
WeiLayout,
|
||||
@@ -290,13 +299,16 @@ int main(int argc, char* argv[])
|
||||
});
|
||||
|
||||
return run_grouped_conv_fwd_bias_relu_add<2,
|
||||
InDataType,
|
||||
WeiDataType,
|
||||
InKernelDataType,
|
||||
WeiKernelDataType,
|
||||
CShuffleDataType,
|
||||
OutDataType,
|
||||
OutKernelDataType,
|
||||
InElementOp,
|
||||
WeiElementOp,
|
||||
OutElementOp,
|
||||
InUserDataType,
|
||||
WeiUserDataType,
|
||||
OutUserDataType,
|
||||
DeviceGroupedConvNDFwdInstance<2,
|
||||
InLayout,
|
||||
WeiLayout,
|
||||
@@ -413,13 +425,16 @@ int main(int argc, char* argv[])
|
||||
});
|
||||
|
||||
return run_grouped_conv_fwd_bias_relu_add<3,
|
||||
InDataType,
|
||||
WeiDataType,
|
||||
InKernelDataType,
|
||||
WeiKernelDataType,
|
||||
CShuffleDataType,
|
||||
OutDataType,
|
||||
OutKernelDataType,
|
||||
InElementOp,
|
||||
WeiElementOp,
|
||||
OutElementOp,
|
||||
InUserDataType,
|
||||
WeiUserDataType,
|
||||
OutUserDataType,
|
||||
DeviceGroupedConvNDFwdInstance<3,
|
||||
InLayout,
|
||||
WeiLayout,
|
||||
|
||||
@@ -7,13 +7,19 @@
|
||||
|
||||
#include "ck/library/utility/convolution_host_tensor_descriptor_helper.hpp"
|
||||
|
||||
using InDataType = ck::half_t;
|
||||
using WeiDataType = ck::half_t;
|
||||
using AccDataType = float;
|
||||
using CShuffleDataType = ck::half_t;
|
||||
using BiasDataType = ck::half_t;
|
||||
using ResidualDataType = ck::half_t;
|
||||
using OutDataType = ck::half_t;
|
||||
// kernel data types
|
||||
using InKernelDataType = ck::half_t;
|
||||
using WeiKernelDataType = ck::half_t;
|
||||
using AccDataType = float;
|
||||
using CShuffleDataType = ck::half_t;
|
||||
using BiasKernelDataType = ck::half_t;
|
||||
using ResidualKernelDataType = ck::half_t;
|
||||
using OutKernelDataType = ck::half_t;
|
||||
|
||||
// tensor data types
|
||||
using InUserDataType = InKernelDataType;
|
||||
using WeiUserDataType = WeiKernelDataType;
|
||||
using OutUserDataType = OutKernelDataType;
|
||||
|
||||
template <ck::index_t... Is>
|
||||
using S = ck::Sequence<Is...>;
|
||||
@@ -40,12 +46,12 @@ using DeviceGroupedConvNDFwdInstance =
|
||||
WeiLayout,
|
||||
ck::Tuple<BiasLayout, ResidualLayout>,
|
||||
OutLayout,
|
||||
InDataType,
|
||||
WeiDataType,
|
||||
InKernelDataType,
|
||||
WeiKernelDataType,
|
||||
AccDataType,
|
||||
CShuffleDataType,
|
||||
ck::Tuple<BiasDataType, ResidualDataType>,
|
||||
OutDataType,
|
||||
ck::Tuple<BiasKernelDataType, ResidualKernelDataType>,
|
||||
OutKernelDataType,
|
||||
InElementOp,
|
||||
WeiElementOp,
|
||||
OutElementOp,
|
||||
@@ -181,13 +187,16 @@ int main(int argc, char* argv[])
|
||||
});
|
||||
|
||||
return run_grouped_conv_fwd_bias_relu_add<1,
|
||||
InDataType,
|
||||
WeiDataType,
|
||||
InKernelDataType,
|
||||
WeiKernelDataType,
|
||||
CShuffleDataType,
|
||||
OutDataType,
|
||||
OutKernelDataType,
|
||||
InElementOp,
|
||||
WeiElementOp,
|
||||
OutElementOp,
|
||||
InUserDataType,
|
||||
WeiUserDataType,
|
||||
OutUserDataType,
|
||||
DeviceGroupedConvNDFwdInstance<1,
|
||||
InLayout,
|
||||
WeiLayout,
|
||||
@@ -290,13 +299,16 @@ int main(int argc, char* argv[])
|
||||
});
|
||||
|
||||
return run_grouped_conv_fwd_bias_relu_add<2,
|
||||
InDataType,
|
||||
WeiDataType,
|
||||
InKernelDataType,
|
||||
WeiKernelDataType,
|
||||
CShuffleDataType,
|
||||
OutDataType,
|
||||
OutKernelDataType,
|
||||
InElementOp,
|
||||
WeiElementOp,
|
||||
OutElementOp,
|
||||
InUserDataType,
|
||||
WeiUserDataType,
|
||||
OutUserDataType,
|
||||
DeviceGroupedConvNDFwdInstance<2,
|
||||
InLayout,
|
||||
WeiLayout,
|
||||
@@ -413,13 +425,16 @@ int main(int argc, char* argv[])
|
||||
});
|
||||
|
||||
return run_grouped_conv_fwd_bias_relu_add<3,
|
||||
InDataType,
|
||||
WeiDataType,
|
||||
InKernelDataType,
|
||||
WeiKernelDataType,
|
||||
CShuffleDataType,
|
||||
OutDataType,
|
||||
OutKernelDataType,
|
||||
InElementOp,
|
||||
WeiElementOp,
|
||||
OutElementOp,
|
||||
InUserDataType,
|
||||
WeiUserDataType,
|
||||
OutUserDataType,
|
||||
DeviceGroupedConvNDFwdInstance<3,
|
||||
InLayout,
|
||||
WeiLayout,
|
||||
|
||||
@@ -7,13 +7,19 @@
|
||||
|
||||
#include "ck/library/utility/convolution_host_tensor_descriptor_helper.hpp"
|
||||
|
||||
using InDataType = float;
|
||||
using WeiDataType = float;
|
||||
using AccDataType = float;
|
||||
using CShuffleDataType = float;
|
||||
using BiasDataType = float;
|
||||
using ResidualDataType = float;
|
||||
using OutDataType = float;
|
||||
// kernel data types
|
||||
using InKernelDataType = float;
|
||||
using WeiKernelDataType = float;
|
||||
using AccDataType = float;
|
||||
using CShuffleDataType = float;
|
||||
using BiasKernelDataType = float;
|
||||
using ResidualKernelDataType = float;
|
||||
using OutKernelDataType = float;
|
||||
|
||||
// tensor data types
|
||||
using InUserDataType = InKernelDataType;
|
||||
using WeiUserDataType = WeiKernelDataType;
|
||||
using OutUserDataType = OutKernelDataType;
|
||||
|
||||
template <ck::index_t... Is>
|
||||
using S = ck::Sequence<Is...>;
|
||||
@@ -40,12 +46,12 @@ using DeviceGroupedConvNDFwdInstance =
|
||||
WeiLayout,
|
||||
ck::Tuple<BiasLayout, ResidualLayout>,
|
||||
OutLayout,
|
||||
InDataType,
|
||||
WeiDataType,
|
||||
InKernelDataType,
|
||||
WeiKernelDataType,
|
||||
AccDataType,
|
||||
CShuffleDataType,
|
||||
ck::Tuple<BiasDataType, ResidualDataType>,
|
||||
OutDataType,
|
||||
ck::Tuple<BiasKernelDataType, ResidualKernelDataType>,
|
||||
OutKernelDataType,
|
||||
InElementOp,
|
||||
WeiElementOp,
|
||||
OutElementOp,
|
||||
@@ -181,13 +187,16 @@ int main(int argc, char* argv[])
|
||||
});
|
||||
|
||||
return run_grouped_conv_fwd_bias_relu_add<1,
|
||||
InDataType,
|
||||
WeiDataType,
|
||||
InKernelDataType,
|
||||
WeiKernelDataType,
|
||||
CShuffleDataType,
|
||||
OutDataType,
|
||||
OutKernelDataType,
|
||||
InElementOp,
|
||||
WeiElementOp,
|
||||
OutElementOp,
|
||||
InUserDataType,
|
||||
WeiUserDataType,
|
||||
OutUserDataType,
|
||||
DeviceGroupedConvNDFwdInstance<1,
|
||||
InLayout,
|
||||
WeiLayout,
|
||||
@@ -290,13 +299,16 @@ int main(int argc, char* argv[])
|
||||
});
|
||||
|
||||
return run_grouped_conv_fwd_bias_relu_add<2,
|
||||
InDataType,
|
||||
WeiDataType,
|
||||
InKernelDataType,
|
||||
WeiKernelDataType,
|
||||
CShuffleDataType,
|
||||
OutDataType,
|
||||
OutKernelDataType,
|
||||
InElementOp,
|
||||
WeiElementOp,
|
||||
OutElementOp,
|
||||
InUserDataType,
|
||||
WeiUserDataType,
|
||||
OutUserDataType,
|
||||
DeviceGroupedConvNDFwdInstance<2,
|
||||
InLayout,
|
||||
WeiLayout,
|
||||
@@ -413,13 +425,16 @@ int main(int argc, char* argv[])
|
||||
});
|
||||
|
||||
return run_grouped_conv_fwd_bias_relu_add<3,
|
||||
InDataType,
|
||||
WeiDataType,
|
||||
InKernelDataType,
|
||||
WeiKernelDataType,
|
||||
CShuffleDataType,
|
||||
OutDataType,
|
||||
OutKernelDataType,
|
||||
InElementOp,
|
||||
WeiElementOp,
|
||||
OutElementOp,
|
||||
InUserDataType,
|
||||
WeiUserDataType,
|
||||
OutUserDataType,
|
||||
DeviceGroupedConvNDFwdInstance<3,
|
||||
InLayout,
|
||||
WeiLayout,
|
||||
|
||||
@@ -0,0 +1,459 @@
|
||||
// SPDX-License-Identifier: MIT
|
||||
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
#include "grouped_convnd_fwd_bias_relu_add_common.hpp"
|
||||
|
||||
#include "ck/tensor_operation/gpu/device/device_grouped_conv_fwd_multiple_d_xdl_cshuffle.hpp"
|
||||
|
||||
#include "ck/library/utility/convolution_host_tensor_descriptor_helper.hpp"
|
||||
|
||||
// kernel data types
|
||||
using InKernelDataType = int8_t;
|
||||
using WeiKernelDataType = int8_t;
|
||||
using AccDataType = int32_t;
|
||||
using CShuffleDataType = int8_t;
|
||||
using BiasKernelDataType = int8_t;
|
||||
using ResidualKernelDataType = int8_t;
|
||||
using OutKernelDataType = int8_t;
|
||||
|
||||
// tensor data types
|
||||
using InUserDataType = ck::int4_t;
|
||||
using WeiUserDataType = ck::int4_t;
|
||||
using OutUserDataType = ck::int4_t;
|
||||
|
||||
template <ck::index_t... Is>
|
||||
using S = ck::Sequence<Is...>;
|
||||
|
||||
using InElementOp = ck::tensor_operation::element_wise::PassThrough;
|
||||
using WeiElementOp = ck::tensor_operation::element_wise::PassThrough;
|
||||
using OutElementOp = ck::tensor_operation::element_wise::AddReluAdd;
|
||||
|
||||
static constexpr auto ConvSpec =
|
||||
ck::tensor_operation::device::ConvolutionForwardSpecialization::Default;
|
||||
|
||||
static constexpr auto GemmSpec = ck::tensor_operation::device::GemmSpecialization::MNKPadding;
|
||||
|
||||
template <ck::index_t NDimSpatial,
|
||||
typename InLayout,
|
||||
typename WeiLayout,
|
||||
typename BiasLayout,
|
||||
typename ResidualLayout,
|
||||
typename OutLayout>
|
||||
using DeviceGroupedConvNDFwdInstance =
|
||||
ck::tensor_operation::device::DeviceGroupedConvFwdMultipleD_Xdl_CShuffle<
|
||||
NDimSpatial,
|
||||
InLayout,
|
||||
WeiLayout,
|
||||
ck::Tuple<BiasLayout, ResidualLayout>,
|
||||
OutLayout,
|
||||
InKernelDataType,
|
||||
WeiKernelDataType,
|
||||
AccDataType,
|
||||
CShuffleDataType,
|
||||
ck::Tuple<BiasKernelDataType, ResidualKernelDataType>,
|
||||
OutKernelDataType,
|
||||
InElementOp,
|
||||
WeiElementOp,
|
||||
OutElementOp,
|
||||
ConvSpec, // ConvForwardSpecialization
|
||||
GemmSpec, // GemmSpecialization
|
||||
1, //
|
||||
256, // BlockSize
|
||||
128, // MPerBlock
|
||||
256, // NPerBlock
|
||||
64, // KPerBlock
|
||||
16, // AK1
|
||||
16, // BK1
|
||||
32, // MPerXdl
|
||||
32, // NPerXdl
|
||||
2, // MXdlPerWave
|
||||
4, // NXdlPerWave
|
||||
S<4, 64, 1>, // ABlockTransferThreadClusterLengths_AK0_M_AK1
|
||||
S<1, 0, 2>, // ABlockTransferThreadClusterArrangeOrder
|
||||
S<1, 0, 2>, // ABlockTransferSrcAccessOrder
|
||||
2, // ABlockTransferSrcVectorDim
|
||||
16, // ABlockTransferSrcScalarPerVector
|
||||
16, // ABlockTransferDstScalarPerVector_AK1
|
||||
1, // ABlockLdsExtraM
|
||||
S<4, 64, 1>, // BBlockTransferThreadClusterLengths_BK0_N_BK1
|
||||
S<1, 0, 2>, // BBlockTransferThreadClusterArrangeOrder
|
||||
S<1, 0, 2>, // BBlockTransferSrcAccessOrder
|
||||
2, // BBlockTransferSrcVectorDim
|
||||
16, // BBlockTransferSrcScalarPerVector
|
||||
16, // BBlockTransferDstScalarPerVector_BK1
|
||||
1, // BBlockLdsExtraN
|
||||
1,
|
||||
1,
|
||||
S<1, 64, 1, 4>,
|
||||
16>;
|
||||
|
||||
int main(int argc, char* argv[])
|
||||
{
|
||||
namespace ctc = ck::tensor_layout::convolution;
|
||||
|
||||
print_helper_msg();
|
||||
|
||||
bool do_verification = true;
|
||||
int init_method = 1;
|
||||
bool time_kernel = false;
|
||||
|
||||
// conventional group conv definition
|
||||
// G = 2
|
||||
// [N, C, Hi, Wi] = [128, 384, 71, 71]
|
||||
// [K, C, Y, X] = [512, 192, 3, 3]
|
||||
// [N, K, Ho, Wo] = [128, 512, 36, 36]
|
||||
// CK group conv definition
|
||||
// [G, N, C, Hi, Wi] = [2, 128, 192, 71, 71]
|
||||
// [G, K, C, Y, X] = [2, 256, 192, 3, 3]
|
||||
// [G, N, K, Ho, Wo] = [2, 128, 256, 36, 36]
|
||||
ck::utils::conv::ConvParam conv_param{
|
||||
2, 2, 128, 256, 192, {3, 3}, {71, 71}, {2, 2}, {1, 1}, {1, 1}, {1, 1}};
|
||||
|
||||
if(argc == 1)
|
||||
{
|
||||
// use default
|
||||
}
|
||||
else if(argc == 4)
|
||||
{
|
||||
do_verification = std::stoi(argv[1]);
|
||||
init_method = std::stoi(argv[2]);
|
||||
time_kernel = std::stoi(argv[3]);
|
||||
}
|
||||
else
|
||||
{
|
||||
do_verification = std::stoi(argv[1]);
|
||||
init_method = std::stoi(argv[2]);
|
||||
time_kernel = std::stoi(argv[3]);
|
||||
const ck::index_t num_dim_spatial = std::stoi(argv[4]);
|
||||
|
||||
conv_param = ck::utils::conv::parse_conv_param(num_dim_spatial, 5, argv);
|
||||
}
|
||||
|
||||
const auto in_element_op = InElementOp{};
|
||||
const auto wei_element_op = WeiElementOp{};
|
||||
const auto out_element_op = OutElementOp{};
|
||||
|
||||
if(conv_param.num_dim_spatial_ == 1)
|
||||
{
|
||||
using InLayout = ctc::G_NW_C;
|
||||
using WeiLayout = ctc::G_K_X_C;
|
||||
using BiasLayout = ctc::G_NW_K;
|
||||
using ResidualLayout = ctc::G_NW_K;
|
||||
using OutLayout = ctc::G_NW_K;
|
||||
|
||||
const auto in_g_n_c_wis_desc = HostTensorDescriptor(
|
||||
{conv_param.G_, conv_param.N_, conv_param.C_, conv_param.input_spatial_lengths_[0]},
|
||||
{
|
||||
conv_param.C_, // g
|
||||
conv_param.input_spatial_lengths_[0] * conv_param.G_ * conv_param.C_, // n
|
||||
1, // c
|
||||
conv_param.G_ * conv_param.C_ // wi
|
||||
});
|
||||
|
||||
const auto wei_g_k_c_xs_desc = HostTensorDescriptor(
|
||||
{conv_param.G_, conv_param.K_, conv_param.C_, conv_param.filter_spatial_lengths_[0]},
|
||||
{
|
||||
conv_param.K_ * conv_param.filter_spatial_lengths_[0] * conv_param.C_, // g
|
||||
conv_param.filter_spatial_lengths_[0] * conv_param.C_, // k
|
||||
1, // c
|
||||
conv_param.C_ // x
|
||||
});
|
||||
|
||||
const auto bias_g_n_k_wos_desc = HostTensorDescriptor(
|
||||
{conv_param.G_, conv_param.N_, conv_param.K_, conv_param.output_spatial_lengths_[0]},
|
||||
{
|
||||
conv_param.K_, // g
|
||||
0, // k
|
||||
1, // c
|
||||
0 // x
|
||||
});
|
||||
|
||||
const auto residual_g_n_k_wos_desc = HostTensorDescriptor(
|
||||
{conv_param.G_, conv_param.N_, conv_param.K_, conv_param.output_spatial_lengths_[0]},
|
||||
{
|
||||
conv_param.K_, // g
|
||||
0, // k
|
||||
1, // c
|
||||
0 // x
|
||||
});
|
||||
|
||||
const auto out_g_n_k_wos_desc = HostTensorDescriptor(
|
||||
{conv_param.G_, conv_param.N_, conv_param.K_, conv_param.output_spatial_lengths_[0]},
|
||||
{
|
||||
conv_param.K_, // g
|
||||
conv_param.output_spatial_lengths_[0] * conv_param.G_ * conv_param.K_, // n
|
||||
1, // k
|
||||
conv_param.G_ * conv_param.K_ // wo
|
||||
});
|
||||
|
||||
return run_grouped_conv_fwd_bias_relu_add<1,
|
||||
InKernelDataType,
|
||||
WeiKernelDataType,
|
||||
CShuffleDataType,
|
||||
OutKernelDataType,
|
||||
InElementOp,
|
||||
WeiElementOp,
|
||||
OutElementOp,
|
||||
InUserDataType,
|
||||
WeiUserDataType,
|
||||
OutUserDataType,
|
||||
DeviceGroupedConvNDFwdInstance<1,
|
||||
InLayout,
|
||||
WeiLayout,
|
||||
BiasLayout,
|
||||
ResidualLayout,
|
||||
OutLayout>>(
|
||||
do_verification,
|
||||
init_method,
|
||||
time_kernel,
|
||||
conv_param,
|
||||
in_g_n_c_wis_desc,
|
||||
wei_g_k_c_xs_desc,
|
||||
bias_g_n_k_wos_desc,
|
||||
residual_g_n_k_wos_desc,
|
||||
out_g_n_k_wos_desc,
|
||||
in_element_op,
|
||||
wei_element_op,
|
||||
out_element_op);
|
||||
}
|
||||
else if(conv_param.num_dim_spatial_ == 2)
|
||||
{
|
||||
using InLayout = ctc::G_NHW_C;
|
||||
using WeiLayout = ctc::G_K_YX_C;
|
||||
using BiasLayout = ctc::G_NHW_K;
|
||||
using ResidualLayout = ctc::G_NHW_K;
|
||||
using OutLayout = ctc::G_NHW_K;
|
||||
|
||||
const auto in_g_n_c_wis_desc = HostTensorDescriptor(
|
||||
{conv_param.G_,
|
||||
conv_param.N_,
|
||||
conv_param.C_,
|
||||
conv_param.input_spatial_lengths_[0],
|
||||
conv_param.input_spatial_lengths_[1]},
|
||||
{
|
||||
conv_param.C_, // g
|
||||
conv_param.input_spatial_lengths_[0] * conv_param.input_spatial_lengths_[1] *
|
||||
conv_param.G_ * conv_param.C_, // n
|
||||
1, // c
|
||||
conv_param.input_spatial_lengths_[1] * conv_param.G_ * conv_param.C_, // hi
|
||||
conv_param.G_ * conv_param.C_ // wi
|
||||
});
|
||||
|
||||
const auto wei_g_k_c_xs_desc =
|
||||
HostTensorDescriptor({conv_param.G_,
|
||||
conv_param.K_,
|
||||
conv_param.C_,
|
||||
conv_param.filter_spatial_lengths_[0],
|
||||
conv_param.filter_spatial_lengths_[1]},
|
||||
{
|
||||
conv_param.K_ * conv_param.filter_spatial_lengths_[0] *
|
||||
conv_param.filter_spatial_lengths_[1] * conv_param.C_, // g
|
||||
conv_param.filter_spatial_lengths_[0] *
|
||||
conv_param.filter_spatial_lengths_[1] * conv_param.C_, // k
|
||||
1, // c
|
||||
conv_param.filter_spatial_lengths_[1] * conv_param.C_, // y
|
||||
conv_param.C_ // x
|
||||
});
|
||||
|
||||
const auto bias_g_n_k_wos_desc =
|
||||
HostTensorDescriptor({conv_param.G_,
|
||||
conv_param.N_,
|
||||
conv_param.K_,
|
||||
conv_param.output_spatial_lengths_[0],
|
||||
conv_param.output_spatial_lengths_[1]},
|
||||
{
|
||||
conv_param.K_, // g
|
||||
0, // n
|
||||
1, // k
|
||||
0, // ho
|
||||
0 // wo
|
||||
});
|
||||
|
||||
const auto residual_g_n_k_wos_desc =
|
||||
HostTensorDescriptor({conv_param.G_,
|
||||
conv_param.N_,
|
||||
conv_param.K_,
|
||||
conv_param.output_spatial_lengths_[0],
|
||||
conv_param.output_spatial_lengths_[1]},
|
||||
{
|
||||
conv_param.K_, // g
|
||||
0, // n
|
||||
1, // k
|
||||
0, // ho
|
||||
0 // wo
|
||||
});
|
||||
|
||||
const auto out_g_n_k_wos_desc = HostTensorDescriptor(
|
||||
{conv_param.G_,
|
||||
conv_param.N_,
|
||||
conv_param.K_,
|
||||
conv_param.output_spatial_lengths_[0],
|
||||
conv_param.output_spatial_lengths_[1]},
|
||||
{
|
||||
conv_param.K_, // g
|
||||
conv_param.output_spatial_lengths_[0] * conv_param.output_spatial_lengths_[1] *
|
||||
conv_param.G_ * conv_param.K_, // n
|
||||
1, // k
|
||||
conv_param.output_spatial_lengths_[1] * conv_param.G_ * conv_param.K_, // ho
|
||||
conv_param.G_ * conv_param.K_ // wo
|
||||
});
|
||||
|
||||
return run_grouped_conv_fwd_bias_relu_add<2,
|
||||
InKernelDataType,
|
||||
WeiKernelDataType,
|
||||
CShuffleDataType,
|
||||
OutKernelDataType,
|
||||
InElementOp,
|
||||
WeiElementOp,
|
||||
OutElementOp,
|
||||
InUserDataType,
|
||||
WeiUserDataType,
|
||||
OutUserDataType,
|
||||
DeviceGroupedConvNDFwdInstance<2,
|
||||
InLayout,
|
||||
WeiLayout,
|
||||
BiasLayout,
|
||||
ResidualLayout,
|
||||
OutLayout>>(
|
||||
do_verification,
|
||||
init_method,
|
||||
time_kernel,
|
||||
conv_param,
|
||||
in_g_n_c_wis_desc,
|
||||
wei_g_k_c_xs_desc,
|
||||
bias_g_n_k_wos_desc,
|
||||
residual_g_n_k_wos_desc,
|
||||
out_g_n_k_wos_desc,
|
||||
in_element_op,
|
||||
wei_element_op,
|
||||
out_element_op);
|
||||
}
|
||||
else if(conv_param.num_dim_spatial_ == 3)
|
||||
{
|
||||
using InLayout = ctc::G_NDHW_C;
|
||||
using WeiLayout = ctc::G_K_ZYX_C;
|
||||
using BiasLayout = ctc::G_NDHW_K;
|
||||
using ResidualLayout = ctc::G_NDHW_K;
|
||||
using OutLayout = ctc::G_NDHW_K;
|
||||
|
||||
const auto in_g_n_c_wis_desc = HostTensorDescriptor(
|
||||
{conv_param.G_,
|
||||
conv_param.N_,
|
||||
conv_param.C_,
|
||||
conv_param.input_spatial_lengths_[0],
|
||||
conv_param.input_spatial_lengths_[1],
|
||||
conv_param.input_spatial_lengths_[2]},
|
||||
{
|
||||
conv_param.C_, // g
|
||||
conv_param.input_spatial_lengths_[0] * conv_param.input_spatial_lengths_[1] *
|
||||
conv_param.input_spatial_lengths_[2] * conv_param.G_ * conv_param.C_, // n
|
||||
1, // c
|
||||
conv_param.input_spatial_lengths_[1] * conv_param.input_spatial_lengths_[2] *
|
||||
conv_param.G_ * conv_param.C_, // di
|
||||
conv_param.input_spatial_lengths_[2] * conv_param.G_ * conv_param.C_, // hi
|
||||
conv_param.G_ * conv_param.C_ // wi
|
||||
});
|
||||
|
||||
const auto wei_g_k_c_xs_desc = HostTensorDescriptor(
|
||||
{conv_param.G_,
|
||||
conv_param.K_,
|
||||
conv_param.C_,
|
||||
conv_param.filter_spatial_lengths_[0],
|
||||
conv_param.filter_spatial_lengths_[1],
|
||||
conv_param.filter_spatial_lengths_[2]},
|
||||
{
|
||||
conv_param.K_ * conv_param.filter_spatial_lengths_[0] *
|
||||
conv_param.filter_spatial_lengths_[1] * conv_param.filter_spatial_lengths_[2] *
|
||||
conv_param.C_, // g
|
||||
conv_param.filter_spatial_lengths_[0] * conv_param.filter_spatial_lengths_[1] *
|
||||
conv_param.filter_spatial_lengths_[2] * conv_param.C_, // k
|
||||
1, // c
|
||||
conv_param.filter_spatial_lengths_[1] * conv_param.filter_spatial_lengths_[2] *
|
||||
conv_param.C_, // z
|
||||
conv_param.filter_spatial_lengths_[2] * conv_param.C_, // y
|
||||
conv_param.C_ // x
|
||||
});
|
||||
|
||||
const auto bias_g_n_k_wos_desc =
|
||||
HostTensorDescriptor({conv_param.G_,
|
||||
conv_param.N_,
|
||||
conv_param.K_,
|
||||
conv_param.output_spatial_lengths_[0],
|
||||
conv_param.output_spatial_lengths_[1],
|
||||
conv_param.output_spatial_lengths_[2]},
|
||||
{
|
||||
conv_param.K_, // g
|
||||
0, // n
|
||||
1, // k
|
||||
0, // z
|
||||
0, // y
|
||||
0 // x
|
||||
});
|
||||
|
||||
const auto residual_g_n_k_wos_desc =
|
||||
HostTensorDescriptor({conv_param.G_,
|
||||
conv_param.N_,
|
||||
conv_param.K_,
|
||||
conv_param.output_spatial_lengths_[0],
|
||||
conv_param.output_spatial_lengths_[1],
|
||||
conv_param.output_spatial_lengths_[2]},
|
||||
{
|
||||
conv_param.K_, // g
|
||||
0, // n
|
||||
1, // k
|
||||
0, // z
|
||||
0, // y
|
||||
0 // x
|
||||
});
|
||||
|
||||
const auto out_g_n_k_wos_desc = HostTensorDescriptor(
|
||||
{conv_param.G_,
|
||||
conv_param.N_,
|
||||
conv_param.K_,
|
||||
conv_param.output_spatial_lengths_[0],
|
||||
conv_param.output_spatial_lengths_[1],
|
||||
conv_param.output_spatial_lengths_[2]},
|
||||
{
|
||||
conv_param.K_, // g
|
||||
conv_param.output_spatial_lengths_[0] * conv_param.output_spatial_lengths_[1] *
|
||||
conv_param.output_spatial_lengths_[2] * conv_param.G_ * conv_param.K_, // n
|
||||
1, // k
|
||||
conv_param.output_spatial_lengths_[1] * conv_param.output_spatial_lengths_[2] *
|
||||
conv_param.G_ * conv_param.K_, // do
|
||||
conv_param.output_spatial_lengths_[2] * conv_param.G_ * conv_param.K_, // ho
|
||||
conv_param.G_ * conv_param.K_ // wo
|
||||
});
|
||||
|
||||
return run_grouped_conv_fwd_bias_relu_add<3,
|
||||
InKernelDataType,
|
||||
WeiKernelDataType,
|
||||
CShuffleDataType,
|
||||
OutKernelDataType,
|
||||
InElementOp,
|
||||
WeiElementOp,
|
||||
OutElementOp,
|
||||
InUserDataType,
|
||||
WeiUserDataType,
|
||||
OutUserDataType,
|
||||
DeviceGroupedConvNDFwdInstance<3,
|
||||
InLayout,
|
||||
WeiLayout,
|
||||
BiasLayout,
|
||||
ResidualLayout,
|
||||
OutLayout>>(
|
||||
do_verification,
|
||||
init_method,
|
||||
time_kernel,
|
||||
conv_param,
|
||||
in_g_n_c_wis_desc,
|
||||
wei_g_k_c_xs_desc,
|
||||
bias_g_n_k_wos_desc,
|
||||
residual_g_n_k_wos_desc,
|
||||
out_g_n_k_wos_desc,
|
||||
in_element_op,
|
||||
wei_element_op,
|
||||
out_element_op);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -7,13 +7,19 @@
|
||||
|
||||
#include "ck/library/utility/convolution_host_tensor_descriptor_helper.hpp"
|
||||
|
||||
using InDataType = int8_t;
|
||||
using WeiDataType = int8_t;
|
||||
using AccDataType = int32_t;
|
||||
using CShuffleDataType = int8_t;
|
||||
using BiasDataType = int8_t;
|
||||
using ResidualDataType = int8_t;
|
||||
using OutDataType = int8_t;
|
||||
// kernel data types
|
||||
using InKernelDataType = int8_t;
|
||||
using WeiKernelDataType = int8_t;
|
||||
using AccDataType = int32_t;
|
||||
using CShuffleDataType = int8_t;
|
||||
using BiasKernelDataType = int8_t;
|
||||
using ResidualKernelDataType = int8_t;
|
||||
using OutKernelDataType = int8_t;
|
||||
|
||||
// tensor data types
|
||||
using InUserDataType = InKernelDataType;
|
||||
using WeiUserDataType = WeiKernelDataType;
|
||||
using OutUserDataType = OutKernelDataType;
|
||||
|
||||
template <ck::index_t... Is>
|
||||
using S = ck::Sequence<Is...>;
|
||||
@@ -40,12 +46,12 @@ using DeviceGroupedConvNDFwdInstance =
|
||||
WeiLayout,
|
||||
ck::Tuple<BiasLayout, ResidualLayout>,
|
||||
OutLayout,
|
||||
InDataType,
|
||||
WeiDataType,
|
||||
InKernelDataType,
|
||||
WeiKernelDataType,
|
||||
AccDataType,
|
||||
CShuffleDataType,
|
||||
ck::Tuple<BiasDataType, ResidualDataType>,
|
||||
OutDataType,
|
||||
ck::Tuple<BiasKernelDataType, ResidualKernelDataType>,
|
||||
OutKernelDataType,
|
||||
InElementOp,
|
||||
WeiElementOp,
|
||||
OutElementOp,
|
||||
@@ -181,13 +187,16 @@ int main(int argc, char* argv[])
|
||||
});
|
||||
|
||||
return run_grouped_conv_fwd_bias_relu_add<1,
|
||||
InDataType,
|
||||
WeiDataType,
|
||||
InKernelDataType,
|
||||
WeiKernelDataType,
|
||||
CShuffleDataType,
|
||||
OutDataType,
|
||||
OutKernelDataType,
|
||||
InElementOp,
|
||||
WeiElementOp,
|
||||
OutElementOp,
|
||||
InUserDataType,
|
||||
WeiUserDataType,
|
||||
OutUserDataType,
|
||||
DeviceGroupedConvNDFwdInstance<1,
|
||||
InLayout,
|
||||
WeiLayout,
|
||||
@@ -290,13 +299,16 @@ int main(int argc, char* argv[])
|
||||
});
|
||||
|
||||
return run_grouped_conv_fwd_bias_relu_add<2,
|
||||
InDataType,
|
||||
WeiDataType,
|
||||
InKernelDataType,
|
||||
WeiKernelDataType,
|
||||
CShuffleDataType,
|
||||
OutDataType,
|
||||
OutKernelDataType,
|
||||
InElementOp,
|
||||
WeiElementOp,
|
||||
OutElementOp,
|
||||
InUserDataType,
|
||||
WeiUserDataType,
|
||||
OutUserDataType,
|
||||
DeviceGroupedConvNDFwdInstance<2,
|
||||
InLayout,
|
||||
WeiLayout,
|
||||
@@ -413,13 +425,16 @@ int main(int argc, char* argv[])
|
||||
});
|
||||
|
||||
return run_grouped_conv_fwd_bias_relu_add<3,
|
||||
InDataType,
|
||||
WeiDataType,
|
||||
InKernelDataType,
|
||||
WeiKernelDataType,
|
||||
CShuffleDataType,
|
||||
OutDataType,
|
||||
OutKernelDataType,
|
||||
InElementOp,
|
||||
WeiElementOp,
|
||||
OutElementOp,
|
||||
InUserDataType,
|
||||
WeiUserDataType,
|
||||
OutUserDataType,
|
||||
DeviceGroupedConvNDFwdInstance<3,
|
||||
InLayout,
|
||||
WeiLayout,
|
||||
|
||||
@@ -98,6 +98,18 @@ struct AddReluAdd
|
||||
int32_t c = b + x2;
|
||||
y = c;
|
||||
}
|
||||
|
||||
#ifdef CK_EXPERIMENTAL_BIT_INT_EXTENSION_INT4
|
||||
template <>
|
||||
__host__ __device__ constexpr void operator()<int4_t, int8_t, int4_t, int4_t>(
|
||||
int4_t& y, const int8_t& x0, const int4_t& x1, const int4_t& x2) const
|
||||
{
|
||||
int32_t a = x0 + x1;
|
||||
int32_t b = a > 0 ? a : 0;
|
||||
int32_t c = b + x2;
|
||||
y = c;
|
||||
}
|
||||
#endif // CK_EXPERIMENTAL_BIT_INT_EXTENSION_INT4
|
||||
};
|
||||
|
||||
struct AddHardswishAdd
|
||||
|
||||
Reference in New Issue
Block a user