mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-13 09:45:56 +00:00
Dynamic tensor descriptor (#24)
* support dynamic tensor descriptor * use buffer load OOB feature for padding case * add navi support * add int8x4 inference kernel Co-authored-by: Chao Liu <chao@ixt-rack-81.local.lan> Co-authored-by: Jing Zhang <jizhan@amd.com>
This commit is contained in:
@@ -158,7 +158,7 @@ struct ParallelTensorFunctor
|
||||
return indices;
|
||||
}
|
||||
|
||||
void operator()(std::size_t num_thread) const
|
||||
void operator()(std::size_t num_thread = std::thread::hardware_concurrency()) const
|
||||
{
|
||||
std::size_t work_per_thread = (mN1d + num_thread - 1) / num_thread;
|
||||
|
||||
|
||||
Reference in New Issue
Block a user