use single threaded tensor generator (#161)

[ROCm/composable_kernel commit: f015c77687]
This commit is contained in:
Anthony Chang
2022-03-31 11:28:30 +08:00
committed by GitHub
parent 297ef9795d
commit 8bb6c6e120
20 changed files with 26 additions and 22 deletions

View File

@@ -277,7 +277,7 @@ struct ReductionHost
out_indices[dst_offset] = accuIndex;
};
std::size_t num_thread = std::thread::hardware_concurrency();
std::size_t num_thread = 1;
std::size_t work_per_thread =
(invariant_dim_indexes.size() + num_thread - 1) / num_thread;
@@ -374,7 +374,7 @@ struct ReductionHost
out_data[dst_offset] = type_convert<OutDataType>(accuVal);
};
std::size_t num_thread = std::thread::hardware_concurrency();
std::size_t num_thread = 1;
std::size_t work_per_thread =
(invariant_dim_indexes.size() + num_thread - 1) / num_thread;

View File

@@ -163,7 +163,7 @@ struct ParallelTensorFunctor
return indices;
}
void operator()(std::size_t num_thread = std::thread::hardware_concurrency()) const
void operator()(std::size_t num_thread = 1) const
{
std::size_t work_per_thread = (mN1d + num_thread - 1) / num_thread;
@@ -213,7 +213,7 @@ struct Tensor
Tensor(const HostTensorDescriptor& desc) : mDesc(desc), mData(mDesc.GetElementSpace()) {}
template <typename G>
void GenerateTensorValue(G g, std::size_t num_thread = std::thread::hardware_concurrency())
void GenerateTensorValue(G g, std::size_t num_thread = 1)
{
switch(mDesc.GetNumOfDimension())
{