Disable gemm_blockscale_f8 on gfx90a by default. (#3338)

* disable gemm_blockscale_f8 instances on gfx90a by default

* fix cmake logic, diasble some cmake output

* fix cmake logic
This commit is contained in:
Illia Silin
2025-12-02 11:33:33 -08:00
committed by GitHub
parent 280bc42191
commit 2c284a1780
5 changed files with 45 additions and 61 deletions

View File

@@ -45,7 +45,7 @@ if(SUPPORTED_GPU_TARGETS MATCHES "gfx9")
endif()
endif()
if(SUPPORTED_GPU_TARGETS MATCHES "gfx9" OR SUPPORTED_GPU_TARGETS MATCHES "gfx1[12]")
if(SUPPORTED_GPU_TARGETS MATCHES "gfx9|gfx1[12]")
if(DTYPES MATCHES "fp16" OR NOT DEFINED DTYPES)
list(APPEND PROFILER_OPS profile_gemm_reduce.cpp)
list(APPEND PROFILER_OPS profile_batched_gemm_add_relu_gemm_add.cpp)
@@ -59,7 +59,7 @@ if(SUPPORTED_GPU_TARGETS MATCHES "gfx9" OR SUPPORTED_GPU_TARGETS MATCHES "gfx1[1
list(APPEND PROFILER_OPS profile_grouped_gemm_tile_loop.cpp)
list(APPEND PROFILER_OPS profile_grouped_gemm_multiply_tile_loop.cpp)
endif()
if(SUPPORTED_GPU_TARGETS MATCHES "gfx9[45]" OR SUPPORTED_GPU_TARGETS MATCHES "gfx12")
if(SUPPORTED_GPU_TARGETS MATCHES "gfx9[45]|gfx12")
list(APPEND PROFILER_OPS profile_gemm_multiply_multiply_wp.cpp)
list(APPEND PROFILER_OPS profile_gemm_ab_scale.cpp)
list(APPEND PROFILER_OPS profile_gemm_blockscale_wp.cpp)
@@ -90,7 +90,7 @@ if(SUPPORTED_GPU_TARGETS MATCHES "gfx(9[45]|1[12])")
list(APPEND PROFILER_OPS profile_gemm_multiply_multiply.cpp)
endif()
if(SUPPORTED_GPU_TARGETS MATCHES "gfx9" OR SUPPORTED_GPU_TARGETS MATCHES "gfx1[12]")
if(SUPPORTED_GPU_TARGETS MATCHES "gfx9|gfx1[12]")
list(APPEND PROFILER_OPS profile_gemm_universal.cpp)
list(APPEND PROFILER_OPS profile_batched_gemm.cpp)
list(APPEND PROFILER_OPS profile_batched_gemm_b_scale.cpp)
@@ -164,7 +164,7 @@ list(APPEND DEVICE_INSTANCES device_column_to_image_instance)
list(APPEND DEVICE_INSTANCES device_transpose_instance)
list(APPEND DEVICE_INSTANCES device_permute_scale_instance)
if(SUPPORTED_GPU_TARGETS MATCHES "gfx9" OR SUPPORTED_GPU_TARGETS MATCHES "gfx1[12]")
if(SUPPORTED_GPU_TARGETS MATCHES "gfx9|gfx1[12]")
if(DTYPES MATCHES "fp32" OR DTYPES MATCHES "fp64" OR NOT DEFINED DTYPES)
list(APPEND DEVICE_INSTANCES device_contraction_bilinear_instance)
list(APPEND DEVICE_INSTANCES device_contraction_scale_instance)
@@ -184,11 +184,11 @@ if(SUPPORTED_GPU_TARGETS MATCHES "gfx9" OR SUPPORTED_GPU_TARGETS MATCHES "gfx1[1
list(APPEND DEVICE_INSTANCES device_grouped_gemm_tile_loop_instance)
endif()
list(APPEND DEVICE_INSTANCES device_batched_gemm_reduce_instance)
if(SUPPORTED_GPU_TARGETS MATCHES "gfx9[45]" OR SUPPORTED_GPU_TARGETS MATCHES "gfx12")
if(SUPPORTED_GPU_TARGETS MATCHES "gfx9[45]|gfx12")
list(APPEND DEVICE_INSTANCES device_gemm_multiply_multiply_wp_instance)
list(APPEND DEVICE_INSTANCES device_gemm_universal_preshuffle_instance)
endif()
if(SUPPORTED_GPU_TARGETS MATCHES "gfx9[45]" OR SUPPORTED_GPU_TARGETS MATCHES "gfx1[12]")
if(SUPPORTED_GPU_TARGETS MATCHES "gfx9[45]|gfx1[12]")
list(APPEND DEVICE_INSTANCES device_gemm_ab_scale_instance)
list(APPEND DEVICE_INSTANCES device_gemm_blockscale_wp_instance)
endif()
@@ -228,7 +228,7 @@ if(SUPPORTED_GPU_TARGETS MATCHES "gfx(9[45]|1[12])")
list(APPEND DEVICE_INSTANCES device_gemm_multiply_multiply_instance)
endif()
if(SUPPORTED_GPU_TARGETS MATCHES "gfx9" OR SUPPORTED_GPU_TARGETS MATCHES "gfx1[12]")
if(SUPPORTED_GPU_TARGETS MATCHES "gfx9|gfx1[12]")
list(APPEND DEVICE_INSTANCES device_gemm_universal_instance)
list(APPEND DEVICE_INSTANCES device_batched_gemm_instance)
list(APPEND DEVICE_INSTANCES device_gemm_b_scale_instance)