mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-06-29 11:16:59 +00:00
flag and barrier fix for copmiler branch MainOpSelV3
This commit is contained in:
@@ -42,7 +42,7 @@ set(GEMM_OPTIONS)
|
||||
list(APPEND GEMM_OPTIONS "SHELL: -mllvm -greedy-reverse-local-assignment=1 -mllvm --slp-threshold=-32")
|
||||
list(APPEND GEMM_OPTIONS -v --save-temps -Wno-gnu-line-marker)
|
||||
set(BLOCKSCALE_GEMM_OPTIONS)
|
||||
list(APPEND BLOCKSCALE_GEMM_OPTIONS "SHELL: -mllvm -greedy-reverse-local-assignment=1 -mllvm --slp-threshold=-32 -mllvm --schedmodel=0 -mllvm --amdgpu-sched-strategy=gcn-iterative-max-occupancy-experimental -mllvm --misched-bottomup=1")
|
||||
list(APPEND BLOCKSCALE_GEMM_OPTIONS "SHELL: -mllvm -greedy-reverse-local-assignment=1 -mllvm --slp-threshold=-32 -mllvm --schedmodel=0 -mllvm --amdgpu-sched-strategy=gcn-iterative-max-occupancy-experimental -mllvm --misched-topdown=1")
|
||||
# list(APPEND BLOCKSCALE_GEMM_OPTIONS "SHELL: -mllvm -greedy-reverse-local-assignment=1 -mllvm --slp-threshold=-32")
|
||||
list(APPEND BLOCKSCALE_GEMM_OPTIONS -v --save-temps -Wno-gnu-line-marker)
|
||||
target_compile_options(example_gemm_multiply_multiply_xdl_fp8_bpreshuffle PRIVATE ${GEMM_OPTIONS})
|
||||
|
||||
@@ -286,7 +286,7 @@ struct BlockwiseGemmXdlops_pipeline_moe_blockscale_bpreshuffle_v3<
|
||||
{
|
||||
__builtin_amdgcn_sched_group_barrier(0x100, ds_read_a_mfma_rate, 0); // DS read
|
||||
}
|
||||
__builtin_amdgcn_sched_group_barrier(0x800, 2, 0); // v_pk_fma
|
||||
// __builtin_amdgcn_sched_group_barrier(0x800, 2, 0); // v_pk_fma
|
||||
});
|
||||
// __builtin_amdgcn_sched_barrier(0);
|
||||
});
|
||||
@@ -319,7 +319,7 @@ struct BlockwiseGemmXdlops_pipeline_moe_blockscale_bpreshuffle_v3<
|
||||
{
|
||||
__builtin_amdgcn_sched_group_barrier(0x100, ds_read_a_mfma_rate, 0); // DS read
|
||||
}
|
||||
__builtin_amdgcn_sched_group_barrier(0x800, 2, 0); // v_pk_fma
|
||||
// __builtin_amdgcn_sched_group_barrier(0x800, 2, 0); // v_pk_fma
|
||||
});
|
||||
// __builtin_amdgcn_sched_barrier(0);
|
||||
});
|
||||
@@ -338,7 +338,7 @@ struct BlockwiseGemmXdlops_pipeline_moe_blockscale_bpreshuffle_v3<
|
||||
{
|
||||
__builtin_amdgcn_sched_group_barrier(0x100, ds_read_a_mfma_rate, 0); // DS read
|
||||
}
|
||||
__builtin_amdgcn_sched_group_barrier(0x800, 2, 0); // v_pk_fma
|
||||
// __builtin_amdgcn_sched_group_barrier(0x800, 2, 0); // v_pk_fma
|
||||
});
|
||||
// __builtin_amdgcn_sched_barrier(0);
|
||||
});
|
||||
|
||||
Reference in New Issue
Block a user