mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-06-10 08:18:26 +00:00
* support dynamic tensor descriptor * use buffer load OOB feature for padding case * add navi support * add int8x4 inference kernel Co-authored-by: Chao Liu <chao@ixt-rack-81.local.lan> Co-authored-by: Jing Zhang <jizhan@amd.com>
15 lines
336 B
C++
15 lines
336 B
C++
#ifndef CK_GRIDWISE_OPERATION_KERNEL_WRAPPER
|
|
#define CK_GRIDWISE_OPERATION_KERNEL_WRAPPER
|
|
|
|
template <typename GridwiseOp, typename... Xs>
|
|
__global__ void
|
|
#if CK_USE_LAUNCH_BOUNDS
|
|
__launch_bounds__(CK_MAX_THREAD_PER_BLOCK, CK_MIN_BLOCK_PER_CU)
|
|
#endif
|
|
run_gridwise_operation(Xs... xs)
|
|
{
|
|
GridwiseOp{}.Run(xs...);
|
|
}
|
|
|
|
#endif
|