mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-17 03:19:48 +00:00
Restructure gridwise and blockwise GEMM, add tensor contraction and FWD-v4r5 (#36)
* experimenting magic number division
* overhauling fwd-v4r4 to clearly reflect transformation graph
* added fwd-v4r5
* bug fix for make_dynamic_naive_tensor_descriptor_aligned_v2
* bug fix and added sanity-check in transform_dynamic_tensor_descriptor
* added conv_driver_v2
[ROCm/composable_kernel commit: 30072aec37]
This commit is contained in:
@@ -74,7 +74,7 @@ __host__ __device__ constexpr auto integer_divide_floor(X x, Y y)
|
||||
template <class X, class Y>
|
||||
__host__ __device__ constexpr auto integer_divide_ceil(X x, Y y)
|
||||
{
|
||||
return (x + y - 1) / y;
|
||||
return (x + y - Number<1>{}) / y;
|
||||
}
|
||||
|
||||
template <class X, class Y>
|
||||
|
||||
Reference in New Issue
Block a user