mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-17 11:30:02 +00:00
* Add elementwise operation kernel and example
* Add comment
* Add template argument of dim . Prepare to support multiple dimension
* Rename example
* Support 1 dimension
* Add static assert
* Add comment
* Extract pad
* Remove redundant argument
* Support any dimension for elementwise operation
* Remove line
* Let it be the multiple number of CU
* Move thread per block to the parameter of constructor
* rename threadPerBlock with blockSize
* Support double
* rename kernel function name
* remove redundant include header
* Refine type
* Need to the final dimension
* Refine variable name
* Refine type
* Use index_t instead of int in API
Co-authored-by: rocking <chunylai@amd.com>
[ROCm/composable_kernel commit: aafc3ac27a]
25 lines
613 B
C++
25 lines
613 B
C++
#pragma once
|
|
#include "config.hpp"
|
|
|
|
namespace ck {
|
|
|
|
__host__ __device__ constexpr index_t get_warp_size()
|
|
{
|
|
// warpSize is defined by HIP
|
|
return warpSize;
|
|
}
|
|
|
|
__device__ index_t get_thread_local_1d_id() { return threadIdx.x; }
|
|
|
|
__device__ index_t get_thread_global_1d_id() { return blockIdx.x * blockDim.x + threadIdx.x; }
|
|
|
|
__device__ index_t get_warp_local_1d_id() { return threadIdx.x / get_warp_size(); }
|
|
|
|
__device__ index_t get_block_1d_id() { return blockIdx.x; }
|
|
|
|
__device__ index_t get_grid_size() { return gridDim.x; }
|
|
|
|
__device__ index_t get_block_size() { return blockDim.x; }
|
|
|
|
} // namespace ck
|