fix moe gemm2 for gfx950 (#2164)

Co-authored-by: mtgu0705 <mtgu@amd.com>

[ROCm/composable_kernel commit: a23390163d]
This commit is contained in:
Mingtao Gu
2025-05-09 23:25:31 +08:00
committed by GitHub
parent 83180829b6
commit 0afe569703
2 changed files with 2 additions and 2 deletions

View File

@@ -7,7 +7,7 @@ add_example_executable(example_gemm_multiply_multiply_xdl_int8 gemm_multiply_mul
add_example_executable(example_moe_gemm1_xdl_fp8 moe_gemm1_xdl_fp8.cpp)
add_example_executable(example_moe_gemm2_xdl_fp8 moe_gemm2_xdl_fp8.cpp)
list(APPEND gpu_list gfx942)
list(APPEND gpu_list gfx942 gfx950)
set(target 0)
foreach(gpu IN LISTS GPU_TARGETS)
if(gpu IN_LIST gpu_list AND target EQUAL 0)

View File

@@ -281,7 +281,7 @@ int main(int argc, char* argv[])
break;
case 4:
a0_t_k_k.GenerateTensorValue(GeneratorTensor_1<A0DataType>{});
b0_e_n_k.GenerateTensorValue(GeneratorTensor_2<A0DataType>{-2, 2});
b0_e_n_k.GenerateTensorValue(GeneratorTensor_2<B0DataType>{-2, 2});
d0_t_n.GenerateTensorValue(GeneratorTensor_1<D0DataType>{});
d1_e_n.GenerateTensorValue(GeneratorTensor_1<D1DataType>{});
d2_e_n.GenerateTensorValue(GeneratorTensor_1<D2DataType>{});