use single threaded tensor generator (#161)

[ROCm/composable_kernel commit: f015c77687]
This commit is contained in:
Anthony Chang
2022-03-31 11:28:30 +08:00
committed by GitHub
parent 297ef9795d
commit 8bb6c6e120
20 changed files with 26 additions and 22 deletions

View File

@@ -120,7 +120,7 @@ int test_gemm(const gemmArgs& args)
f_host_tensor_descriptor(args.M, args.N, args.StrideC, c_row_major));
// init data
std::size_t num_thread = std::thread::hardware_concurrency();
std::size_t num_thread = 1;
a_m_k.GenerateTensorValue(GeneratorTensor_2<float>{-5, 5}, num_thread);
b_k_n.GenerateTensorValue(GeneratorTensor_2<float>{-5, 5}, num_thread);
// set zero to c_device_buf

View File

@@ -101,7 +101,7 @@ bool test_reduce_no_index_impl(int init_method,
size_t invariant_total_length = out.mDesc.GetElementSize();
size_t reduce_total_length = in.mDesc.GetElementSize() / invariant_total_length;
std::size_t num_thread = std::thread::hardware_concurrency();
std::size_t num_thread = 1;
switch(init_method)
{

View File

@@ -99,7 +99,7 @@ bool test_reduce_with_index_impl(int init_method,
size_t invariant_total_length = out.mDesc.GetElementSize();
size_t reduce_total_length = in.mDesc.GetElementSize() / invariant_total_length;
std::size_t num_thread = std::thread::hardware_concurrency();
std::size_t num_thread = 1;
switch(init_method)
{