mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-04-20 14:59:17 +00:00
Adding more instances of grouped convolution 3d forward for FP8 with ConvScale+Bias element-wise operation. (#1412)
* Add CMakePresets configurations. * Add binary elementwise ConvScaleAdd and an example. * Numerical verification of results. Observed significant irregularities in F8 to F32 type conversions: ```log ConvScaleAdd: float=145.000000 f8_t=160.000000 e=144.000000 ConvScaleAdd: float=97.000000 f8_t=96.000000 e=104.000000 ConvScaleAdd: float=65.000000 f8_t=64.000000 e=72.000000 ``` * Implemented ConvScaleAdd + Example. * Add ConvScale+Bias Instances * Add Client Example for ConvScale+Bias * Fix number of bytes in an example.. * Cleanup.
This commit is contained in:
committed by
GitHub
parent
ffabd70a15
commit
4a8a1befd5
@@ -638,6 +638,32 @@ struct AddSilu
|
||||
}
|
||||
};
|
||||
|
||||
struct ConvScaleAdd
|
||||
{
|
||||
__host__ __device__ ConvScaleAdd(float scale_in = 1.f,
|
||||
float scale_wei = 1.f,
|
||||
float scale_out = 1.f)
|
||||
: scale_in_(scale_in), scale_wei_(scale_wei), scale_out_(scale_out)
|
||||
{
|
||||
}
|
||||
|
||||
template <typename E, typename C, typename D>
|
||||
__host__ __device__ void operator()(E& e, const C& c, const D& d) const;
|
||||
|
||||
template <>
|
||||
__host__ __device__ void
|
||||
operator()<f8_t, float, float>(f8_t& e, const float& c, const float& d) const
|
||||
{
|
||||
float x;
|
||||
Add{}.template operator()<float>(x, c * scale_in_ * scale_wei_, d);
|
||||
e = type_convert<f8_t>(x * scale_out_);
|
||||
};
|
||||
|
||||
float scale_in_;
|
||||
float scale_wei_;
|
||||
float scale_out_;
|
||||
};
|
||||
|
||||
} // namespace element_wise
|
||||
} // namespace tensor_operation
|
||||
} // namespace ck
|
||||
|
||||
Reference in New Issue
Block a user