Adding fix for the gfx908 to the GEMM MFMA implementaitons of WarpGem… (#2751)

* Adding fix for the gfx908 to the GEMM MFMA implementaitons of WarpGemmMfmaBf16Bf16F32M4N64K16 WarpGemmMfmaBf16Bf16F32M64N4K16 * Adding support for offload target gfx9-4-generic * This duplication here isn't ideal
2026-04-20 14:59:17 +00:00 · 2025-09-02 04:35:07 -04:00
parent 33418b201f
commit 022f369deb
3 changed files with 62 additions and 8 deletions
--- a/include/ck/ck.hpp
+++ b/include/ck/ck.hpp
@@ -50,10 +50,11 @@
 #endif

 // define general macros for various architectures
-#if defined(__gfx908__) || defined(__gfx90a__) || defined(__gfx942__) || defined(__gfx950__)
+#if defined(__gfx908__) || defined(__gfx90a__) || defined(__gfx942__) || \
+    defined(__gfx950__) || defined(__gfx9_4_generic__)
 #define __gfx9__
 #endif
-#if defined(__gfx942__) || defined(__gfx950__)
+#if defined(__gfx942__) || defined(__gfx950__) || defined(__gfx9_4_generic__)
 #define __gfx94__
 #endif
 #if defined(__gfx1010__) || defined(__gfx1011__) || defined(__gfx1012__)