Add contraction_fp64 example (#570)

* add contraction_bilinear

* add contraction_scale_xdl_fp64

* reduce tile size to avoid register spill

---------

Co-authored-by: root <root@ctr-ubbsmc16.amd.com>
This commit is contained in:
zjing14
2023-02-15 12:00:58 -06:00
committed by GitHub
parent 6a6163a3d1
commit 24c9ee1d22
6 changed files with 854 additions and 2 deletions

View File

@@ -150,6 +150,13 @@ struct Bilinear
template <typename Y, typename X0, typename X1>
__host__ __device__ constexpr void operator()(Y&, const X0&, const X1&) const;
template <>
__host__ __device__ constexpr void
operator()<double, double, double>(double& y, const double& x0, const double& x1) const
{
y = alpha_ * x0 + beta_ * x1;
};
template <>
__host__ __device__ constexpr void
operator()<float, float, float>(float& y, const float& x0, const float& x1) const

View File

@@ -95,6 +95,12 @@ struct Scale
y = scale_ * x;
};
template <>
__host__ __device__ void operator()<double, double>(double& y, const double& x) const
{
y = scale_ * x;
};
float scale_;
};