mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-03 13:11:25 +00:00
[CK_TILE] Generate random tensor values with multiple threads (#3324)
This commit is contained in:
@@ -3,6 +3,9 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#ifdef __linux__
|
||||
#include <sched.h>
|
||||
#endif
|
||||
#include <thread>
|
||||
#include <utility>
|
||||
|
||||
@@ -24,4 +27,50 @@ struct joinable_thread : std::thread
|
||||
this->join();
|
||||
}
|
||||
};
|
||||
|
||||
inline unsigned int get_available_cpu_cores()
|
||||
{
|
||||
#if defined(__linux__)
|
||||
cpu_set_t cpu_set;
|
||||
if(sched_getaffinity(0, sizeof(cpu_set_t), &cpu_set) == 0)
|
||||
{
|
||||
unsigned int cpu_count = CPU_COUNT(&cpu_set);
|
||||
if(cpu_count > 0)
|
||||
return cpu_count;
|
||||
}
|
||||
#endif
|
||||
// Fallback if sched_getaffinity unavailable or fails
|
||||
return std::thread::hardware_concurrency();
|
||||
}
|
||||
|
||||
class cpu_core_guard
|
||||
{
|
||||
#if defined(__linux__)
|
||||
cpu_set_t original_cpu_set_;
|
||||
|
||||
public:
|
||||
cpu_core_guard(unsigned int num_cores) : original_cpu_set_()
|
||||
{
|
||||
// save original cpu set
|
||||
sched_getaffinity(0, sizeof(cpu_set_t), &original_cpu_set_);
|
||||
|
||||
// set new cpu set
|
||||
cpu_set_t new_cpu_set;
|
||||
CPU_ZERO(&new_cpu_set);
|
||||
for(unsigned int i = 0; i < num_cores; ++i)
|
||||
{
|
||||
#pragma clang diagnostic push
|
||||
#pragma clang diagnostic ignored "-Wold-style-cast"
|
||||
CPU_SET(i, &new_cpu_set); // NOLINT(old-style-cast)
|
||||
#pragma clang diagnostic pop
|
||||
}
|
||||
sched_setaffinity(0, sizeof(cpu_set_t), &new_cpu_set);
|
||||
}
|
||||
~cpu_core_guard()
|
||||
{
|
||||
// restore original cpu set
|
||||
sched_setaffinity(0, sizeof(cpu_set_t), &original_cpu_set_);
|
||||
}
|
||||
#endif
|
||||
};
|
||||
} // namespace ck_tile
|
||||
|
||||
Reference in New Issue
Block a user