[CK_TILE] Generate random tensor values with multiple threads (#3324)

This commit is contained in:
Yi DING
2025-12-09 11:02:33 +08:00
committed by GitHub
parent c363a98d41
commit c1c2e41a03
6 changed files with 286 additions and 66 deletions

View File

@@ -3,6 +3,9 @@
#pragma once
#ifdef __linux__
#include <sched.h>
#endif
#include <thread>
#include <utility>
@@ -24,4 +27,50 @@ struct joinable_thread : std::thread
this->join();
}
};
inline unsigned int get_available_cpu_cores()
{
#if defined(__linux__)
cpu_set_t cpu_set;
if(sched_getaffinity(0, sizeof(cpu_set_t), &cpu_set) == 0)
{
unsigned int cpu_count = CPU_COUNT(&cpu_set);
if(cpu_count > 0)
return cpu_count;
}
#endif
// Fallback if sched_getaffinity unavailable or fails
return std::thread::hardware_concurrency();
}
class cpu_core_guard
{
#if defined(__linux__)
cpu_set_t original_cpu_set_;
public:
cpu_core_guard(unsigned int num_cores) : original_cpu_set_()
{
// save original cpu set
sched_getaffinity(0, sizeof(cpu_set_t), &original_cpu_set_);
// set new cpu set
cpu_set_t new_cpu_set;
CPU_ZERO(&new_cpu_set);
for(unsigned int i = 0; i < num_cores; ++i)
{
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wold-style-cast"
CPU_SET(i, &new_cpu_set); // NOLINT(old-style-cast)
#pragma clang diagnostic pop
}
sched_setaffinity(0, sizeof(cpu_set_t), &new_cpu_set);
}
~cpu_core_guard()
{
// restore original cpu set
sched_setaffinity(0, sizeof(cpu_set_t), &original_cpu_set_);
}
#endif
};
} // namespace ck_tile