Add compiler flags for ROCm versions 6.2+ (#1429)

* add compiler flags to fix compiler issues * fix typo. * disable test_smfmac_op on all devices except gfx942 * specify full path to compiler in CI
2026-06-05 20:55:59 +00:00 · 2024-08-01 08:27:52 -07:00
parent 6648fd3b04
commit d311c95396
3 changed files with 51 additions and 48 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -180,7 +180,16 @@ if(NOT WIN32 AND ${hip_VERSION_FLAT} GREATER 500723302)
 endif()
 if(NOT WIN32 AND ${hip_VERSION_FLAT} GREATER 600140090)
   message("Adding the enable-post-misched=0 compiler flag")
-   add_compile_options(-mllvm -enable-post-misched=0)
+   add_compile_options("SHELL: -mllvm -enable-post-misched=0")
+endif()
+if(NOT WIN32 AND ${hip_VERSION_FLAT} GREATER 600241132 AND ${hip_VERSION_FLAT} LESS 600300000)
+   message("Adding the amdgpu-coerce-illegal-types=1")
+   add_compile_options("SHELL: -mllvm -amdgpu-coerce-illegal-types=1")
+endif()
+if(NOT WIN32 AND ${hip_VERSION_FLAT} GREATER 600241132)
+   message("Adding -amdgpu-early-inline-all=true and -amdgpu-function-calls=false")
+   add_compile_options("SHELL: -mllvm -amdgpu-early-inline-all=true")
+   add_compile_options("SHELL: -mllvm -amdgpu-function-calls=false")
 endif()
 #
 # Seperate linking jobs from compiling
--- a/20
+++ b/20
@@ -86,17 +86,7 @@ def check_host() {

 def build_compiler(){
    def compiler
-    if (params.BUILD_COMPILER == "hipcc"){
-        compiler = '/opt/rocm/bin/hipcc'
-    }
-    else{
-        if (params.COMPILER_VERSION == "amd-staging" || params.COMPILER_VERSION == "amd-mainline-open" || params.COMPILER_COMMIT != ""){
-            compiler = "/llvm-project/build/bin/clang++"
-        }
-        else{
-            compiler = "/opt/rocm/llvm/bin/clang++"
-        }        
-    }
+    compiler = "${params.BUILD_COMPILER}"
    return compiler
 }

@@ -664,8 +654,8 @@ def process_results(Map conf=[:]){
 //launch develop branch daily at 23:00 UT in FULL_QA mode and at 19:00 UT with latest staging compiler version
 CRON_SETTINGS = BRANCH_NAME == "develop" ? '''0 23 * * * % RUN_FULL_QA=true;ROCMVERSION=6.1; RUN_CK_TILE_TESTS=true
                                              0 21 * * * % ROCMVERSION=6.1;hipTensor_test=true
-                                              0 19 * * * % BUILD_DOCKER=true;DL_KERNELS=true;COMPILER_VERSION=amd-staging;COMPILER_COMMIT=;USE_SCCACHE=false
-                                              0 17 * * * % BUILD_DOCKER=true;DL_KERNELS=true;COMPILER_VERSION=amd-mainline-open;COMPILER_COMMIT=;USE_SCCACHE=false
+                                              0 19 * * * % BUILD_DOCKER=true;DL_KERNELS=true;COMPILER_VERSION=amd-staging;BUILD_COMPILER=/llvm-project/build/bin/clang++;USE_SCCACHE=false
+                                              0 17 * * * % BUILD_DOCKER=true;DL_KERNELS=true;COMPILER_VERSION=amd-mainline-open;BUILD_COMPILER=/llvm-project/build/bin/clang++;USE_SCCACHE=false
                                              0 15 * * * % BUILD_INSTANCES_ONLY=true;RUN_CODEGEN_TESTS=false;RUN_PERFORMANCE_TESTS=false;USE_SCCACHE=false''' : ""

 pipeline {
@@ -699,8 +689,8 @@ pipeline {
            description: 'Specify which commit of compiler branch to use: leave blank to use the latest commit (default), or use some specific commit of llvm-project branch.')
        string(
            name: 'BUILD_COMPILER', 
-            defaultValue: 'clang', 
-            description: 'Specify whether to build CK with hipcc or with clang (default).')
+            defaultValue: '/opt/rocm/llvm/bin/clang++', 
+            description: 'Build CK with /opt/rocm/bin/hipcc, /llvm-project/build/bin/clang++, or with /opt/rocm/llvm/bin/clang++ (default).')
        booleanParam(
            name: "RUN_FULL_QA",
            defaultValue: false,
--- a/test/smfmac_op/smfmac_op_xdl.cpp
+++ b/test/smfmac_op/smfmac_op_xdl.cpp
@@ -13,6 +13,7 @@
 #include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
 #include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
 #include "test/smfmac_op/smfmac_op_util.hpp"
+#include "ck/host_utility/device_prop.hpp"

 using BF16        = ck::bhalf_t;
 using F16         = ck::half_t;
@@ -38,40 +39,43 @@ class TestSmfmac : public ::testing::Test

    void Run()
    {
-        bool pass                     = true;
-        constexpr auto matmul_default = ck::smfmac_op_util::matmul<Src1Type,
-                                                                   Src1VecSize,
-                                                                   Src2Type,
-                                                                   Src2VecSize,
-                                                                   GPUAccType,
-                                                                   AccVecSize,
-                                                                   DstType,
-                                                                   M,
-                                                                   N,
-                                                                   K>;
+        bool pass = true;
+        if(ck::get_device_name() == "gfx942")
+        {
+            constexpr auto matmul_default = ck::smfmac_op_util::matmul<Src1Type,
+                                                                       Src1VecSize,
+                                                                       Src2Type,
+                                                                       Src2VecSize,
+                                                                       GPUAccType,
+                                                                       AccVecSize,
+                                                                       DstType,
+                                                                       M,
+                                                                       N,
+                                                                       K>;

-        constexpr auto smfmac_kernel_container = std::make_tuple(matmul_default);
-
-        ck::static_for<0, std::tuple_size_v<decltype(smfmac_kernel_container)>, 1>{}([&](auto i) {
-            pass &= ck::smfmac_op_util::TestSmfmac<
-                std::tuple_element_t<i.value, decltype(smfmac_kernel_container)>,
-                Src1Type,
-                Src2Type,
-                DstType,
-                GPUAccType,
-                CPUAccType,
-                decltype(Row{}),
-                decltype(Row{}),
-                decltype(Row{}),
-                PassThrough,
-                PassThrough,
-                PassThrough,
-                AccVecSize,
-                M,
-                N,
-                K>{}(std::get<ck::Number<i>{}>(smfmac_kernel_container));
-        });
+            constexpr auto smfmac_kernel_container = std::make_tuple(matmul_default);

+            ck::static_for<0, std::tuple_size_v<decltype(smfmac_kernel_container)>, 1>{}(
+                [&](auto i) {
+                    pass &= ck::smfmac_op_util::TestSmfmac<
+                        std::tuple_element_t<i.value, decltype(smfmac_kernel_container)>,
+                        Src1Type,
+                        Src2Type,
+                        DstType,
+                        GPUAccType,
+                        CPUAccType,
+                        decltype(Row{}),
+                        decltype(Row{}),
+                        decltype(Row{}),
+                        PassThrough,
+                        PassThrough,
+                        PassThrough,
+                        AccVecSize,
+                        M,
+                        N,
+                        K>{}(std::get<ck::Number<i>{}>(smfmac_kernel_container));
+                });
+        }
        EXPECT_TRUE(pass);
    }
 };