mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-12 17:26:00 +00:00
refactor
This commit is contained in:
@@ -76,6 +76,7 @@ blockwise_4d_tensor_pointwise_operation_unary(DstDesc, Float* __restrict__ p_dst
|
||||
}
|
||||
}
|
||||
|
||||
// Function: p_dst[reorder[i0], reorder[i1], reorder[i2], reorder[i3]] = p_src[i0,i1,i2,i3]
|
||||
// TODO: in order to optimize mem access for different mem type,
|
||||
// need to write specialized version
|
||||
template <unsigned BlockSize,
|
||||
|
||||
@@ -11,3 +11,5 @@ struct is_same<T, T>
|
||||
{
|
||||
static const bool value = true;
|
||||
};
|
||||
|
||||
__device__ unsigned get_thread_local_id() { return threadIdx.x; }
|
||||
|
||||
Reference in New Issue
Block a user