gemm/Conv xdlops + dlops quantization (#625)

* Add conv perlayer quantization

* Add gemm_dlops quantization

* Support int8 for innerproduct

* Refine gemm dlops int8 kernel parameter

* Support gfx908(MI100) and gfx90a(MI200)

* clang-format

* Rename example number

* Support different layout for d tensor

* Add conv dlops perchannel quantization example

* Move to example 40

* Extract the common code for different platform (dlops and xdlops)

* Move ot subfolder. Prepare to add other op of quantization

* Refine the quantization instance library

* Add conv dl instances and client example

* Remove unnecessary type

* Add gemm quantization instance

* Add external api and client example

* Refine num_bytes

* Separete different layout to different cpp

* Add more xdl instances

* Revert "Remove unnecessary type"

This reverts commit 820869182f.

* Remove CShuffleDataType in dlops
Let acc and CShuffleDataType be the same in xdlops

---------

Co-authored-by: zjing14 <zhangjing14@gmail.com>
This commit is contained in:
rocking5566
2023-03-16 04:29:40 +08:00
committed by GitHub
parent a2d5ca8e95
commit 16dc18e0f9
60 changed files with 3186 additions and 717 deletions

View File

@@ -1,6 +1,7 @@
#pragma once
#include "ck/utility/data_type.hpp"
// #include "ck/utility/get_id.hpp"
namespace ck {
namespace tensor_operation {
@@ -17,18 +18,27 @@ struct Activation_Mul_Clamp
__host__ __device__ constexpr void operator()(int8_t& y, const int32_t& x) const
{
float x_fp32 = ck::type_convert<float>(x);
activationOp_(x_fp32, x_fp32);
float y_fp32 = math::clamp(requantScale_ * x_fp32, -128.f, 127.f);
y = ck::type_convert<int8_t>(y_fp32);
float y_fp32 = ck::type_convert<float>(x);
activationOp_(y_fp32, y_fp32);
y_fp32 = math::clamp(requantScale_ * y_fp32, -128.f, 127.f);
y = ck::type_convert<int8_t>(y_fp32);
}
__host__ __device__ constexpr void operator()(float& y, const int32_t& x) const
__device__ constexpr void operator()(int32_t& y, const int32_t& x) const
{
// We might type_convert to int8 after lambda in someplace
float x_fp32 = ck::type_convert<float>(x);
activationOp_(x_fp32, x_fp32);
y = math::clamp(requantScale_ * x_fp32, -128.f, 127.f);
// CAUSION - We might type_convert to int8 in threadwise copy
// eg. GridwiseGemmDlMultipleD_km_kn_mn
float y_fp32 = ck::type_convert<float>(x);
activationOp_(y_fp32, y_fp32);
y_fp32 = math::clamp(requantScale_ * y_fp32, -128.f, 127.f);
y = ck::type_convert<int32_t>(y_fp32);
}
__host__ constexpr void operator()(float& y, const float& x) const
{
// CAUSION - We might float in & float out in reference code
activationOp_(y, x);
y = math::clamp(requantScale_ * y, -128.f, 127.f);
}
float requantScale_;
@@ -51,6 +61,17 @@ struct Activation_Mul2_Clamp
y = ck::type_convert<int8_t>(y_fp32);
}
__device__ constexpr void
operator()(int32_t& y, const int32_t& x, const float& requantScale) const
{
// CAUSION - We might type_convert to int8 in threadwise copy
// eg. GridwiseGemmDlMultipleD_km_kn_mn
float y_fp32 = ck::type_convert<float>(x);
activationOp_(y_fp32, y_fp32);
y_fp32 = math::clamp(requantScale * y_fp32, -128.f, 127.f);
y = ck::type_convert<int32_t>(y_fp32);
}
Activation activationOp_;
};
@@ -72,6 +93,17 @@ struct Add_Activation_Mul_Clamp
y = ck::type_convert<int8_t>(y_fp32);
}
__host__ __device__ constexpr void
operator()(int32_t& y, const int32_t& x, const int32_t& bias) const
{
// CAUSION - We might type_convert to int8 in threadwise copy
// eg. GridwiseGemmDlMultipleD_km_kn_mn
float y_fp32 = ck::type_convert<float>(x + bias);
activationOp_(y_fp32, y_fp32);
y_fp32 = math::clamp(requantScale_ * y_fp32, -128.f, 127.f);
y = ck::type_convert<int32_t>(y_fp32);
}
float requantScale_;
Activation activationOp_;
};
@@ -92,6 +124,17 @@ struct Add_Activation_Mul2_Clamp
y = ck::type_convert<int8_t>(y_fp32);
}
__host__ __device__ constexpr void
operator()(int32_t& y, const int32_t& x, const int32_t& bias, const float& requantScale) const
{
// CAUSION - We might type_convert to int8 in threadwise copy
// eg. GridwiseGemmDlMultipleD_km_kn_mn
float y_fp32 = ck::type_convert<float>(x + bias);
activationOp_(y_fp32, y_fp32);
y_fp32 = math::clamp(requantScale * y_fp32, -128.f, 127.f);
y = ck::type_convert<int32_t>(y_fp32);
}
Activation activationOp_;
};