[CK_TILE] Refine FP32 => FP16/BF16 Conversion (#3215)

* [CK_TILE] Refine FP32 => FP16/BF16 Conversion

* Thank you Copilot

* Rename fix

* Fix example

* Fix accu checking

* Fix

* Fix
This commit is contained in:
Yi DING
2025-11-21 02:50:26 +08:00
committed by GitHub
parent 07314ac543
commit 8b284a63a4
7 changed files with 61 additions and 14 deletions

View File

@@ -3,7 +3,10 @@ if(CK_USE_OCP_FP8)
list(APPEND EXAMPLE_GEMM_COMPILE_OPTIONS -DCK_TILE_USE_OCP_FP8)
endif()
# Use standard asm for rtn bf16 conversion instead of turncate
list(APPEND EXAMPLE_GEMM_COMPILE_OPTIONS -DCK_TILE_FLOAT_TO_BFLOAT16_DEFAULT=3)
if(GPU_TARGETS MATCHES "gfx94|gfx95")
add_gtest_executable(test_ck_tile_grouped_gemm_multi_d test_grouped_gemm_multi_d.cpp)
target_compile_options(test_ck_tile_grouped_gemm_multi_d PRIVATE ${EXAMPLE_GEMM_COMPILE_OPTIONS})
endif()
endif()