Adding more instances of grouped convolution 3d forward for FP8 with ConvScale element-wise operation and ReLU activation. (#1386)

* Add CMakePresets configurations.

* Add ConvScale+ReLU Functor and an Example

* Account for ReLU FLOPs.

* Add instances of 3D convolutions with ConvscaleRelu operation.

* Implement Client Example

* Cleanup
This commit is contained in:
Andriy Roshchenko
2024-07-16 11:51:49 -04:00
committed by GitHub
parent 1ff4f25138
commit 802a8a1df1
12 changed files with 1072 additions and 0 deletions

View File

@@ -1025,6 +1025,31 @@ struct ConvScale
float scale_out_;
};
struct ConvScaleRelu
{
__host__ __device__ ConvScaleRelu(float scale_in = 1.f,
float scale_wei = 1.f,
float scale_out = 1.f)
: scale_in_(scale_in), scale_wei_(scale_wei), scale_out_(scale_out)
{
}
template <typename E, typename C>
__host__ __device__ void operator()(E& e, const C& c) const;
template <>
__host__ __device__ void operator()<f8_t, float>(f8_t& e, const float& c) const
{
float x;
Relu{}.template operator()<float>(x, c * scale_in_ * scale_wei_);
e = type_convert<f8_t>(x * scale_out_);
};
float scale_in_;
float scale_wei_;
float scale_out_;
};
// support fastconvert of int8 to fp16
template <typename InputDataType, typename OutputDataType, index_t RegPackNumber>