No raw index calculation (#31)

* Replace most raw index calculation to coordinate transformation
* Overhaul blockwise and threadwise GEMM
* Overhaul driver for gridwies GEMM kernel

Co-authored-by: Jing Zhang <jizhan@amd.com>
This commit is contained in:
Chao Liu
2021-05-11 00:09:25 -05:00
committed by GitHub
parent d075adf126
commit 01055d95d9
23 changed files with 2494 additions and 2933 deletions

View File

@@ -43,11 +43,17 @@ struct multiplies_v2
};
template <class T>
struct maxer
struct maximize
{
__host__ __device__ constexpr T operator()(T a, T b) const { return a >= b ? a : b; }
};
template <class T>
struct minimize
{
__host__ __device__ constexpr T operator()(T a, T b) const { return a <= b ? a : b; }
};
template <class T>
struct integer_divide_ceiler
{