mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-04-20 06:49:15 +00:00
Adding more instances of grouped convolution 3d forward for FP8 with ConvScale element-wise operation and ReLU activation. (#1386)
* Add CMakePresets configurations. * Add ConvScale+ReLU Functor and an Example * Account for ReLU FLOPs. * Add instances of 3D convolutions with ConvscaleRelu operation. * Implement Client Example * Cleanup
This commit is contained in:
committed by
GitHub
parent
1ff4f25138
commit
802a8a1df1
@@ -1025,6 +1025,31 @@ struct ConvScale
|
||||
float scale_out_;
|
||||
};
|
||||
|
||||
struct ConvScaleRelu
|
||||
{
|
||||
__host__ __device__ ConvScaleRelu(float scale_in = 1.f,
|
||||
float scale_wei = 1.f,
|
||||
float scale_out = 1.f)
|
||||
: scale_in_(scale_in), scale_wei_(scale_wei), scale_out_(scale_out)
|
||||
{
|
||||
}
|
||||
|
||||
template <typename E, typename C>
|
||||
__host__ __device__ void operator()(E& e, const C& c) const;
|
||||
|
||||
template <>
|
||||
__host__ __device__ void operator()<f8_t, float>(f8_t& e, const float& c) const
|
||||
{
|
||||
float x;
|
||||
Relu{}.template operator()<float>(x, c * scale_in_ * scale_wei_);
|
||||
e = type_convert<f8_t>(x * scale_out_);
|
||||
};
|
||||
|
||||
float scale_in_;
|
||||
float scale_wei_;
|
||||
float scale_out_;
|
||||
};
|
||||
|
||||
// support fastconvert of int8 to fp16
|
||||
|
||||
template <typename InputDataType, typename OutputDataType, index_t RegPackNumber>
|
||||
|
||||
Reference in New Issue
Block a user