mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-04-20 14:59:17 +00:00
Update unsigned long literals and format specifiers to work correctly in Windows (#3483)
Previously, the code used unsigned long for literals and format specifiers to represent 64-bit unsigned values. While this worked on Linux, it caused compatibility issues on Windows. The C++ standard does not guarantee that long is 64 bits. On LP64 systems (e.g., Linux), long maps to 64-bit values, but on LLP64 systems (e.g., Windows), long maps to 32-bit values. This discrepancy led to incorrect behavior when assuming unsigned long was always 64-bit. This commit updates all relevant literals and format specifiers to explicitly use 64-bit unsigned types, ensuring consistent behavior across platforms.
This commit is contained in:
@@ -55,9 +55,10 @@ struct FillUniformDistribution
|
||||
const auto total_bytes = total * sizeof(T_iter);
|
||||
|
||||
// max 80 threads; at least 2MB per thread
|
||||
const size_t available_cpu_cores = get_available_cpu_cores();
|
||||
const size_t num_thread =
|
||||
min(80UL, available_cpu_cores, integer_divide_ceil(total_bytes, 0x200000UL));
|
||||
const size_t available_cpu_cores = get_available_cpu_cores();
|
||||
constexpr uint64_t MAX_THREAD_COUNT = 80;
|
||||
const size_t num_thread = min(
|
||||
MAX_THREAD_COUNT, available_cpu_cores, integer_divide_ceil(total_bytes, 0x200000UL));
|
||||
constexpr size_t BLOCK_BYTES = 64;
|
||||
constexpr size_t BLOCK_SIZE = BLOCK_BYTES / sizeof(T_iter);
|
||||
const size_t num_blocks = integer_divide_ceil(total_bytes, BLOCK_BYTES);
|
||||
|
||||
Reference in New Issue
Block a user