mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-04 13:41:24 +00:00
Fix CK Tile DP + 2 Tile Stream-K Validation Errors (#3269)
When there are multiple workgroups contributing to a tile, when using atomics, there may be round off error in cases where the accumulator type is not the same as the C type. To compute an error tolerance for test validation, the Stream-K Tile Partitioner has a function called estimate_num_wgs_per_tile to estimate the number of workgroups per tile. That said, this function only provides an estimate. In some cases for DP+2TSK, the function returns 1 rather than the more accurate value of 2. Thus, this change updates the estimate_num_wgs_per_tile function to explicitely return the value of 2 in cases for DP+2TSK to ensure that we have a better error tolerance to avoid test failures due to round-off error.
This commit is contained in:
@@ -13,7 +13,7 @@ list(APPEND EXAMPLE_GEMM_COMPILE_COMPUTE_V4_OPTIONS
|
||||
set(EXAMPLE_GEMM_COMPILE_COMPUTE_ASYNC_OPTIONS ${EXAMPLE_GEMM_COMPILE_COMPUTE_V4_OPTIONS})
|
||||
|
||||
# Currently test_ck_tile_streamk_smoke is only built on gfx9
|
||||
if(GPU_TARGETS MATCHES "gfx9")
|
||||
if(GPU_TARGETS MATCHES "gfx90a|gfx942|gfx950")
|
||||
|
||||
include_directories(BEFORE ${CMAKE_CURRENT_SOURCE_DIR})
|
||||
|
||||
|
||||
@@ -84,7 +84,7 @@ TEST(StreamKTilePartitionerBaseEstimateNumWgsPerTile, EstimateNumWgsPerTileLower
|
||||
ck_tile::StreamKTilePartitionerBase<Config::GemmShape> tile_partitioner{
|
||||
Config::M, Config::N, Config::K, Config::GRID};
|
||||
|
||||
EXPECT_EQ(tile_partitioner.estimate_num_wgs_per_tile(), 1);
|
||||
EXPECT_EQ(tile_partitioner.estimate_num_wgs_per_tile(), 2);
|
||||
}
|
||||
|
||||
TEST(StreamKTilePartitionerBaseEstimateNumWgsPerTile, EstimateNumWgsPerTileEqualValue)
|
||||
|
||||
Reference in New Issue
Block a user