mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-14 10:09:41 +00:00
Adding more instances of grouped convolution 3d forward for FP8 with ConvScale element-wise operation and ReLU activation. (#1386)
* Add CMakePresets configurations.
* Add ConvScale+ReLU Functor and an Example
* Account for ReLU FLOPs.
* Add instances of 3D convolutions with ConvscaleRelu operation.
* Implement Client Example
* Cleanup
[ROCm/composable_kernel commit: 802a8a1df1]
This commit is contained in:
committed by
GitHub
parent
8073a8d846
commit
ece7edc492
@@ -1025,6 +1025,31 @@ struct ConvScale
|
||||
float scale_out_;
|
||||
};
|
||||
|
||||
struct ConvScaleRelu
|
||||
{
|
||||
__host__ __device__ ConvScaleRelu(float scale_in = 1.f,
|
||||
float scale_wei = 1.f,
|
||||
float scale_out = 1.f)
|
||||
: scale_in_(scale_in), scale_wei_(scale_wei), scale_out_(scale_out)
|
||||
{
|
||||
}
|
||||
|
||||
template <typename E, typename C>
|
||||
__host__ __device__ void operator()(E& e, const C& c) const;
|
||||
|
||||
template <>
|
||||
__host__ __device__ void operator()<f8_t, float>(f8_t& e, const float& c) const
|
||||
{
|
||||
float x;
|
||||
Relu{}.template operator()<float>(x, c * scale_in_ * scale_wei_);
|
||||
e = type_convert<f8_t>(x * scale_out_);
|
||||
};
|
||||
|
||||
float scale_in_;
|
||||
float scale_wei_;
|
||||
float scale_out_;
|
||||
};
|
||||
|
||||
// support fastconvert of int8 to fp16
|
||||
|
||||
template <typename InputDataType, typename OutputDataType, index_t RegPackNumber>
|
||||
|
||||
Reference in New Issue
Block a user