mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-14 02:02:46 +00:00
Update unsigned long literals and format specifiers to work correctly in Windows (#3483)
Previously, the code used unsigned long for literals and format specifiers to represent 64-bit unsigned values. While this worked on Linux, it caused compatibility issues on Windows.
The C++ standard does not guarantee that long is 64 bits. On LP64 systems (e.g., Linux), long maps to 64-bit values, but on LLP64 systems (e.g., Windows), long maps to 32-bit values. This discrepancy led to incorrect behavior when assuming unsigned long was always 64-bit.
This commit updates all relevant literals and format specifiers to explicitly use 64-bit unsigned types, ensuring consistent behavior across platforms.
[ROCm/composable_kernel commit: ec23be0b9d]
This commit is contained in:
@@ -334,13 +334,13 @@ bool test_moe_sorting(ck_tile::ArgParser args)
|
||||
if(moe_buf_bytes > 0)
|
||||
{
|
||||
#if MOE_SORTING_FMOE_2D_BUF
|
||||
printf("moe_buf:%lu(%d,%d), ",
|
||||
printf("moe_buf:%" PRIu64 "(%d,%d), ",
|
||||
static_cast<uint64_t>(moe_buf_bytes),
|
||||
moe_buf_interm_dim,
|
||||
moe_buf_elem_bytes);
|
||||
#else
|
||||
|
||||
printf("moe_buf:%lu, ", static_cast<uint64_t>(moe_buf_bytes));
|
||||
printf("moe_buf:%" PRIu64 ", ", static_cast<uint64_t>(moe_buf_bytes));
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
@@ -55,9 +55,10 @@ struct FillUniformDistribution
|
||||
const auto total_bytes = total * sizeof(T_iter);
|
||||
|
||||
// max 80 threads; at least 2MB per thread
|
||||
const size_t available_cpu_cores = get_available_cpu_cores();
|
||||
const size_t num_thread =
|
||||
min(80UL, available_cpu_cores, integer_divide_ceil(total_bytes, 0x200000UL));
|
||||
const size_t available_cpu_cores = get_available_cpu_cores();
|
||||
constexpr uint64_t MAX_THREAD_COUNT = 80;
|
||||
const size_t num_thread = min(
|
||||
MAX_THREAD_COUNT, available_cpu_cores, integer_divide_ceil(total_bytes, 0x200000UL));
|
||||
constexpr size_t BLOCK_BYTES = 64;
|
||||
constexpr size_t BLOCK_SIZE = BLOCK_BYTES / sizeof(T_iter);
|
||||
const size_t num_blocks = integer_divide_ceil(total_bytes, BLOCK_BYTES);
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cinttypes>
|
||||
#include <cstdlib>
|
||||
#include <thread>
|
||||
|
||||
@@ -28,7 +29,7 @@ CK_TILE_HOST void reference_grouped_conv_bwd_data(HostTensor<InDataType>& input,
|
||||
output.get_num_of_dimension() == NDimSpatial + 3))
|
||||
{
|
||||
|
||||
printf("%lu %lu %lu",
|
||||
printf("%" PRIu64 " %" PRIu64 " %" PRIu64,
|
||||
input.get_num_of_dimension(),
|
||||
weight.get_num_of_dimension(),
|
||||
output.get_num_of_dimension());
|
||||
|
||||
@@ -246,9 +246,11 @@ struct UniversalGemmBasePolicy
|
||||
}
|
||||
else // A is in RowMajor
|
||||
{
|
||||
constexpr auto DataTypeSize = sizeof(ADataType);
|
||||
constexpr auto DataTypeSize = sizeof(ADataType);
|
||||
constexpr uint64_t MinLdsLayer = 1ULL;
|
||||
constexpr auto MLdsLayer =
|
||||
max(1UL, get_n_lds_banks() * get_n_words_per_128b() / KPerBlock / DataTypeSize);
|
||||
max(MinLdsLayer,
|
||||
get_n_lds_banks() * get_n_words_per_128b() / KPerBlock / DataTypeSize);
|
||||
|
||||
constexpr index_t NBanks = get_n_lds_banks();
|
||||
static_assert(NBanks == 32 || NBanks == 64, "Unexpected LDS bank count");
|
||||
@@ -442,11 +444,13 @@ struct UniversalGemmBasePolicy
|
||||
}
|
||||
else // B is Column Major
|
||||
{
|
||||
constexpr index_t KPack = GetSmemPackB<Problem>();
|
||||
constexpr auto BK0 = number<KPerBlock / KPack>{};
|
||||
constexpr auto DataTypeSize = sizeof(BDataType);
|
||||
constexpr index_t KPack = GetSmemPackB<Problem>();
|
||||
constexpr auto BK0 = number<KPerBlock / KPack>{};
|
||||
constexpr auto DataTypeSize = sizeof(BDataType);
|
||||
constexpr uint64_t MinLdsLayer = 1ULL;
|
||||
constexpr auto NLdsLayer =
|
||||
max(1UL, get_n_lds_banks() * get_n_words_per_128b() / KPerBlock / DataTypeSize);
|
||||
max(MinLdsLayer,
|
||||
get_n_lds_banks() * get_n_words_per_128b() / KPerBlock / DataTypeSize);
|
||||
|
||||
constexpr index_t NBanks = get_n_lds_banks();
|
||||
static_assert(NBanks == 32 || NBanks == 64, "Unexpected LDS bank count");
|
||||
|
||||
@@ -236,13 +236,13 @@ class TestCkTileMoeSorting : public ::testing::Test
|
||||
if(moe_buf_bytes > 0)
|
||||
{
|
||||
#if MOE_SORTING_FMOE_2D_BUF
|
||||
printf("moe_buf:%lu(%d,%d), ",
|
||||
printf("moe_buf:%" PRIu64 "(%d,%d), ",
|
||||
static_cast<uint64_t>(moe_buf_bytes),
|
||||
moe_buf_interm_dim,
|
||||
moe_buf_elem_bytes);
|
||||
#else
|
||||
|
||||
printf("moe_buf:%lu, ", static_cast<uint64_t>(moe_buf_bytes));
|
||||
printf("moe_buf:%" PRIu64 ", ", static_cast<uint64_t>(moe_buf_bytes));
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
@@ -26,6 +26,7 @@ using TestTypes = ::testing::Types<float, fp16_t, fp8_t, pk_fp4_t>;
|
||||
TYPED_TEST_SUITE(FillUniformDistributionTest, TestTypes);
|
||||
|
||||
// Test that multiple runs with the same seed produce identical results
|
||||
#ifndef _WIN32
|
||||
TYPED_TEST(FillUniformDistributionTest, ConsistencyWithSameSeed)
|
||||
{
|
||||
using T = TypeParam;
|
||||
@@ -53,6 +54,7 @@ TYPED_TEST(FillUniformDistributionTest, ConsistencyWithSameSeed)
|
||||
<< "First and second fill should be identical";
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
// Test consistency across different data sizes (which affects threading)
|
||||
TYPED_TEST(FillUniformDistributionTest, ConsistencyAcrossSizes)
|
||||
|
||||
Reference in New Issue
Block a user