mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-03 05:01:25 +00:00
gemm/Conv xdlops + dlops quantization (#625)
* Add conv perlayer quantization
* Add gemm_dlops quantization
* Support int8 for innerproduct
* Refine gemm dlops int8 kernel parameter
* Support gfx908(MI100) and gfx90a(MI200)
* clang-format
* Rename example number
* Support different layout for d tensor
* Add conv dlops perchannel quantization example
* Move to example 40
* Extract the common code for different platform (dlops and xdlops)
* Move ot subfolder. Prepare to add other op of quantization
* Refine the quantization instance library
* Add conv dl instances and client example
* Remove unnecessary type
* Add gemm quantization instance
* Add external api and client example
* Refine num_bytes
* Separete different layout to different cpp
* Add more xdl instances
* Revert "Remove unnecessary type"
This reverts commit 820869182f.
* Remove CShuffleDataType in dlops
Let acc and CShuffleDataType be the same in xdlops
---------
Co-authored-by: zjing14 <zhangjing14@gmail.com>
This commit is contained in:
@@ -1,6 +1,7 @@
|
||||
#pragma once
|
||||
|
||||
#include "ck/utility/data_type.hpp"
|
||||
// #include "ck/utility/get_id.hpp"
|
||||
|
||||
namespace ck {
|
||||
namespace tensor_operation {
|
||||
@@ -17,18 +18,27 @@ struct Activation_Mul_Clamp
|
||||
|
||||
__host__ __device__ constexpr void operator()(int8_t& y, const int32_t& x) const
|
||||
{
|
||||
float x_fp32 = ck::type_convert<float>(x);
|
||||
activationOp_(x_fp32, x_fp32);
|
||||
float y_fp32 = math::clamp(requantScale_ * x_fp32, -128.f, 127.f);
|
||||
y = ck::type_convert<int8_t>(y_fp32);
|
||||
float y_fp32 = ck::type_convert<float>(x);
|
||||
activationOp_(y_fp32, y_fp32);
|
||||
y_fp32 = math::clamp(requantScale_ * y_fp32, -128.f, 127.f);
|
||||
y = ck::type_convert<int8_t>(y_fp32);
|
||||
}
|
||||
|
||||
__host__ __device__ constexpr void operator()(float& y, const int32_t& x) const
|
||||
__device__ constexpr void operator()(int32_t& y, const int32_t& x) const
|
||||
{
|
||||
// We might type_convert to int8 after lambda in someplace
|
||||
float x_fp32 = ck::type_convert<float>(x);
|
||||
activationOp_(x_fp32, x_fp32);
|
||||
y = math::clamp(requantScale_ * x_fp32, -128.f, 127.f);
|
||||
// CAUSION - We might type_convert to int8 in threadwise copy
|
||||
// eg. GridwiseGemmDlMultipleD_km_kn_mn
|
||||
float y_fp32 = ck::type_convert<float>(x);
|
||||
activationOp_(y_fp32, y_fp32);
|
||||
y_fp32 = math::clamp(requantScale_ * y_fp32, -128.f, 127.f);
|
||||
y = ck::type_convert<int32_t>(y_fp32);
|
||||
}
|
||||
|
||||
__host__ constexpr void operator()(float& y, const float& x) const
|
||||
{
|
||||
// CAUSION - We might float in & float out in reference code
|
||||
activationOp_(y, x);
|
||||
y = math::clamp(requantScale_ * y, -128.f, 127.f);
|
||||
}
|
||||
|
||||
float requantScale_;
|
||||
@@ -51,6 +61,17 @@ struct Activation_Mul2_Clamp
|
||||
y = ck::type_convert<int8_t>(y_fp32);
|
||||
}
|
||||
|
||||
__device__ constexpr void
|
||||
operator()(int32_t& y, const int32_t& x, const float& requantScale) const
|
||||
{
|
||||
// CAUSION - We might type_convert to int8 in threadwise copy
|
||||
// eg. GridwiseGemmDlMultipleD_km_kn_mn
|
||||
float y_fp32 = ck::type_convert<float>(x);
|
||||
activationOp_(y_fp32, y_fp32);
|
||||
y_fp32 = math::clamp(requantScale * y_fp32, -128.f, 127.f);
|
||||
y = ck::type_convert<int32_t>(y_fp32);
|
||||
}
|
||||
|
||||
Activation activationOp_;
|
||||
};
|
||||
|
||||
@@ -72,6 +93,17 @@ struct Add_Activation_Mul_Clamp
|
||||
y = ck::type_convert<int8_t>(y_fp32);
|
||||
}
|
||||
|
||||
__host__ __device__ constexpr void
|
||||
operator()(int32_t& y, const int32_t& x, const int32_t& bias) const
|
||||
{
|
||||
// CAUSION - We might type_convert to int8 in threadwise copy
|
||||
// eg. GridwiseGemmDlMultipleD_km_kn_mn
|
||||
float y_fp32 = ck::type_convert<float>(x + bias);
|
||||
activationOp_(y_fp32, y_fp32);
|
||||
y_fp32 = math::clamp(requantScale_ * y_fp32, -128.f, 127.f);
|
||||
y = ck::type_convert<int32_t>(y_fp32);
|
||||
}
|
||||
|
||||
float requantScale_;
|
||||
Activation activationOp_;
|
||||
};
|
||||
@@ -92,6 +124,17 @@ struct Add_Activation_Mul2_Clamp
|
||||
y = ck::type_convert<int8_t>(y_fp32);
|
||||
}
|
||||
|
||||
__host__ __device__ constexpr void
|
||||
operator()(int32_t& y, const int32_t& x, const int32_t& bias, const float& requantScale) const
|
||||
{
|
||||
// CAUSION - We might type_convert to int8 in threadwise copy
|
||||
// eg. GridwiseGemmDlMultipleD_km_kn_mn
|
||||
float y_fp32 = ck::type_convert<float>(x + bias);
|
||||
activationOp_(y_fp32, y_fp32);
|
||||
y_fp32 = math::clamp(requantScale * y_fp32, -128.f, 127.f);
|
||||
y = ck::type_convert<int32_t>(y_fp32);
|
||||
}
|
||||
|
||||
Activation activationOp_;
|
||||
};
|
||||
|
||||
|
||||
Reference in New Issue
Block a user