flag and barrier fix for copmiler branch MainOpSelV3

This commit is contained in:
OscarXu
2025-05-29 03:13:11 -05:00
parent 653bc83f8a
commit 52d68c9529
2 changed files with 4 additions and 4 deletions

View File

@@ -42,7 +42,7 @@ set(GEMM_OPTIONS)
list(APPEND GEMM_OPTIONS "SHELL: -mllvm -greedy-reverse-local-assignment=1 -mllvm --slp-threshold=-32")
list(APPEND GEMM_OPTIONS -v --save-temps -Wno-gnu-line-marker)
set(BLOCKSCALE_GEMM_OPTIONS)
list(APPEND BLOCKSCALE_GEMM_OPTIONS "SHELL: -mllvm -greedy-reverse-local-assignment=1 -mllvm --slp-threshold=-32 -mllvm --schedmodel=0 -mllvm --amdgpu-sched-strategy=gcn-iterative-max-occupancy-experimental -mllvm --misched-bottomup=1")
list(APPEND BLOCKSCALE_GEMM_OPTIONS "SHELL: -mllvm -greedy-reverse-local-assignment=1 -mllvm --slp-threshold=-32 -mllvm --schedmodel=0 -mllvm --amdgpu-sched-strategy=gcn-iterative-max-occupancy-experimental -mllvm --misched-topdown=1")
# list(APPEND BLOCKSCALE_GEMM_OPTIONS "SHELL: -mllvm -greedy-reverse-local-assignment=1 -mllvm --slp-threshold=-32")
list(APPEND BLOCKSCALE_GEMM_OPTIONS -v --save-temps -Wno-gnu-line-marker)
target_compile_options(example_gemm_multiply_multiply_xdl_fp8_bpreshuffle PRIVATE ${GEMM_OPTIONS})

View File

@@ -286,7 +286,7 @@ struct BlockwiseGemmXdlops_pipeline_moe_blockscale_bpreshuffle_v3<
{
__builtin_amdgcn_sched_group_barrier(0x100, ds_read_a_mfma_rate, 0); // DS read
}
__builtin_amdgcn_sched_group_barrier(0x800, 2, 0); // v_pk_fma
// __builtin_amdgcn_sched_group_barrier(0x800, 2, 0); // v_pk_fma
});
// __builtin_amdgcn_sched_barrier(0);
});
@@ -319,7 +319,7 @@ struct BlockwiseGemmXdlops_pipeline_moe_blockscale_bpreshuffle_v3<
{
__builtin_amdgcn_sched_group_barrier(0x100, ds_read_a_mfma_rate, 0); // DS read
}
__builtin_amdgcn_sched_group_barrier(0x800, 2, 0); // v_pk_fma
// __builtin_amdgcn_sched_group_barrier(0x800, 2, 0); // v_pk_fma
});
// __builtin_amdgcn_sched_barrier(0);
});
@@ -338,7 +338,7 @@ struct BlockwiseGemmXdlops_pipeline_moe_blockscale_bpreshuffle_v3<
{
__builtin_amdgcn_sched_group_barrier(0x100, ds_read_a_mfma_rate, 0); // DS read
}
__builtin_amdgcn_sched_group_barrier(0x800, 2, 0); // v_pk_fma
// __builtin_amdgcn_sched_group_barrier(0x800, 2, 0); // v_pk_fma
});
// __builtin_amdgcn_sched_barrier(0);
});