mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-20 21:09:08 +00:00
use single threaded tensor generator (#161)
[ROCm/composable_kernel commit: f015c77687]
This commit is contained in:
@@ -277,7 +277,7 @@ struct ReductionHost
|
||||
out_indices[dst_offset] = accuIndex;
|
||||
};
|
||||
|
||||
std::size_t num_thread = std::thread::hardware_concurrency();
|
||||
std::size_t num_thread = 1;
|
||||
std::size_t work_per_thread =
|
||||
(invariant_dim_indexes.size() + num_thread - 1) / num_thread;
|
||||
|
||||
@@ -374,7 +374,7 @@ struct ReductionHost
|
||||
out_data[dst_offset] = type_convert<OutDataType>(accuVal);
|
||||
};
|
||||
|
||||
std::size_t num_thread = std::thread::hardware_concurrency();
|
||||
std::size_t num_thread = 1;
|
||||
std::size_t work_per_thread =
|
||||
(invariant_dim_indexes.size() + num_thread - 1) / num_thread;
|
||||
|
||||
|
||||
@@ -163,7 +163,7 @@ struct ParallelTensorFunctor
|
||||
return indices;
|
||||
}
|
||||
|
||||
void operator()(std::size_t num_thread = std::thread::hardware_concurrency()) const
|
||||
void operator()(std::size_t num_thread = 1) const
|
||||
{
|
||||
std::size_t work_per_thread = (mN1d + num_thread - 1) / num_thread;
|
||||
|
||||
@@ -213,7 +213,7 @@ struct Tensor
|
||||
Tensor(const HostTensorDescriptor& desc) : mDesc(desc), mData(mDesc.GetElementSpace()) {}
|
||||
|
||||
template <typename G>
|
||||
void GenerateTensorValue(G g, std::size_t num_thread = std::thread::hardware_concurrency())
|
||||
void GenerateTensorValue(G g, std::size_t num_thread = 1)
|
||||
{
|
||||
switch(mDesc.GetNumOfDimension())
|
||||
{
|
||||
|
||||
Reference in New Issue
Block a user