From 653bc83f8a0d071e80ede8d08fc38839ad31dac6 Mon Sep 17 00:00:00 2001 From: OscarXu Date: Wed, 28 May 2025 21:05:21 -0500 Subject: [PATCH] Remove rocm6.3 workaround flags and macro --- example/65_gemm_multiply_multiply/CMakeLists.txt | 4 ++-- include/ck/ck.hpp | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/example/65_gemm_multiply_multiply/CMakeLists.txt b/example/65_gemm_multiply_multiply/CMakeLists.txt index 2197b221d6..f9f25744b6 100644 --- a/example/65_gemm_multiply_multiply/CMakeLists.txt +++ b/example/65_gemm_multiply_multiply/CMakeLists.txt @@ -42,8 +42,8 @@ set(GEMM_OPTIONS) list(APPEND GEMM_OPTIONS "SHELL: -mllvm -greedy-reverse-local-assignment=1 -mllvm --slp-threshold=-32") list(APPEND GEMM_OPTIONS -v --save-temps -Wno-gnu-line-marker) set(BLOCKSCALE_GEMM_OPTIONS) -# list(APPEND BLOCKSCALE_GEMM_OPTIONS "SHELL: -mllvm -greedy-reverse-local-assignment=1 -mllvm --slp-threshold=-32 -mllvm --disable-schedmodel-in-sched-mi=1 -mllvm --amdgpu-sched-strategy=gcn-iterative-max-occupancy-experimental -mllvm --misched-bottomup=1") -list(APPEND BLOCKSCALE_GEMM_OPTIONS "SHELL: -mllvm -greedy-reverse-local-assignment=1 -mllvm --slp-threshold=-32") +list(APPEND BLOCKSCALE_GEMM_OPTIONS "SHELL: -mllvm -greedy-reverse-local-assignment=1 -mllvm --slp-threshold=-32 -mllvm --schedmodel=0 -mllvm --amdgpu-sched-strategy=gcn-iterative-max-occupancy-experimental -mllvm --misched-bottomup=1") +# list(APPEND BLOCKSCALE_GEMM_OPTIONS "SHELL: -mllvm -greedy-reverse-local-assignment=1 -mllvm --slp-threshold=-32") list(APPEND BLOCKSCALE_GEMM_OPTIONS -v --save-temps -Wno-gnu-line-marker) target_compile_options(example_gemm_multiply_multiply_xdl_fp8_bpreshuffle PRIVATE ${GEMM_OPTIONS}) target_compile_options(example_moe_gemm1_xdl_fp8 PRIVATE ${GEMM_OPTIONS}) diff --git a/include/ck/ck.hpp b/include/ck/ck.hpp index 7ab8494a59..e001127f88 100644 --- a/include/ck/ck.hpp +++ b/include/ck/ck.hpp @@ -170,7 +170,7 @@ #define CK_USE_PK4_LAYOUT_SHUFFLE 1 // using .co compiled shader for moe_stage2_blockscale -#define CK_USE_ASM_MOE_BLOCKSCALE 1 +#define CK_USE_ASM_MOE_BLOCKSCALE 0 // block synchronization only s_wait lgkmcnt(0), not vmcnt(0) #define CK_EXPERIMENTAL_BLOCK_SYNC_LDS_WITHOUT_SYNC_VMEM 1