Compile for gfx908 and gfx90a (#130)

* adding compilation for multiple targets * fix build * clean * update Jekinsfile * update readme * update Jenkins * use ck::half_t instead of ushort for bf16 * rename enum classes * clean * rename * clean
2026-05-12 17:26:00 +00:00 · 2022-03-31 12:33:34 -05:00
parent ecf337bab5
commit cd167e492a
227 changed files with 1398 additions and 2944 deletions
--- a/profiler/src/profile_reduce.cpp
+++ b/profiler/src/profile_reduce.cpp
@@ -20,9 +20,9 @@

 using namespace std;

-using ck::NanPropagation_t;
-using ck::ReduceTensorIndices_t;
-using ck::ReduceTensorOp_t;
+using ck::NanPropagation;
+using ck::ReduceTensorIndices;
+using ck::ReduceTensorOp;

 static struct option long_options[] = {{"inLengths", required_argument, nullptr, 'D'},
                                       {"reduceDims", required_argument, nullptr, 'R'},
@@ -84,7 +84,7 @@ static std::vector<T> getTypeValuesFromString(const char* cstr_values)
    return (values);
 }

-enum struct appDataType_t
+enum struct AppDataType
 {
    appHalf     = 0,
    appFloat    = 1,
@@ -130,18 +130,18 @@ class AppArgs

    std::vector<float> scales;

-    ReduceTensorOp_t reduceOp = ReduceTensorOp_t::ADD;
-    appDataType_t compTypeId  = appDataType_t::appFloat;
-    appDataType_t outTypeId   = appDataType_t::appFloat;
+    ReduceTensorOp reduceOp = ReduceTensorOp::ADD;
+    AppDataType compTypeId  = AppDataType::appFloat;
+    AppDataType outTypeId   = AppDataType::appFloat;

    bool compType_assigned = false;
    bool outType_assigned  = false;

-    NanPropagation_t nanOpt          = NanPropagation_t::NOT_PROPAGATE_NAN;
-    ReduceTensorIndices_t indicesOpt = ReduceTensorIndices_t::NO_INDICES;
-    bool do_log                      = false;
-    bool do_verification             = false;
-    bool do_dumpout                  = false;
+    NanPropagation nanOpt          = NanPropagation::NOT_PROPAGATE_NAN;
+    ReduceTensorIndices indicesOpt = ReduceTensorIndices::NO_INDICES;
+    bool do_log                    = false;
+    bool do_verification           = false;
+    bool do_dumpout                = false;

    int init_method;
    int nrepeat;
@@ -213,33 +213,33 @@ class AppArgs
                if(!optarg)
                    throw std::runtime_error("Invalid option format!");

-                reduceOp = static_cast<ReduceTensorOp_t>(std::atoi(optarg));
+                reduceOp = static_cast<ReduceTensorOp>(std::atoi(optarg));
                break;
            case 'C':
                if(!optarg)
                    throw std::runtime_error("Invalid option format!");

-                compTypeId        = static_cast<appDataType_t>(std::atoi(optarg));
+                compTypeId        = static_cast<AppDataType>(std::atoi(optarg));
                compType_assigned = true;
                break;
            case 'W':
                if(!optarg)
                    throw std::runtime_error("Invalid option format!");

-                outTypeId        = static_cast<appDataType_t>(std::atoi(optarg));
+                outTypeId        = static_cast<AppDataType>(std::atoi(optarg));
                outType_assigned = true;
                break;
            case 'N':
                if(!optarg)
                    throw std::runtime_error("Invalid option format!");

-                nanOpt = static_cast<NanPropagation_t>(std::atoi(optarg));
+                nanOpt = static_cast<NanPropagation>(std::atoi(optarg));
                break;
            case 'I':
                if(!optarg)
                    throw std::runtime_error("Invalid option format!");

-                indicesOpt = static_cast<ReduceTensorIndices_t>(std::atoi(optarg));
+                indicesOpt = static_cast<ReduceTensorIndices>(std::atoi(optarg));
                break;
            case 'S':
                if(!optarg)
@@ -303,10 +303,10 @@ class AppArgs
            scales.push_back(0.0f);
        };

-        if(reduceOp == ReduceTensorOp_t::MIN || reduceOp == ReduceTensorOp_t::MAX ||
-           reduceOp == ReduceTensorOp_t::AMAX)
+        if(reduceOp == ReduceTensorOp::MIN || reduceOp == ReduceTensorOp::MAX ||
+           reduceOp == ReduceTensorOp::AMAX)
        {
-            if(indicesOpt != ReduceTensorIndices_t::NO_INDICES)
+            if(indicesOpt != ReduceTensorIndices::NO_INDICES)
                need_indices = true;

            // for indexable operations, no need to assign compType and outType, just let them be
@@ -333,22 +333,22 @@ int profile_reduce(int argc, char* argv[])

    check_reduce_dims(rank, args.reduceDims);

-    if(args.reduceOp == ReduceTensorOp_t::MUL || args.reduceOp == ReduceTensorOp_t::NORM1)
+    if(args.reduceOp == ReduceTensorOp::MUL || args.reduceOp == ReduceTensorOp::NORM1)
        throw std::runtime_error("MUL and NORM1 are not supported by composable kernel!");

    if(args.use_half)
    {
        if(!args.compType_assigned)
-            args.compTypeId = appDataType_t::appHalf;
+            args.compTypeId = AppDataType::appHalf;

        if(args.outType_assigned &&
-           (args.outTypeId != appDataType_t::appHalf && args.outTypeId != appDataType_t::appFloat))
-            args.outTypeId = appDataType_t::appFloat;
+           (args.outTypeId != AppDataType::appHalf && args.outTypeId != AppDataType::appFloat))
+            args.outTypeId = AppDataType::appFloat;

        if(!args.outType_assigned)
-            args.outTypeId = appDataType_t::appHalf;
+            args.outTypeId = AppDataType::appHalf;

-        if(args.compTypeId == appDataType_t::appHalf)
+        if(args.compTypeId == AppDataType::appHalf)
        {
            profile_reduce_impl<ck::half_t, ck::half_t, ck::half_t>(args.do_verification,
                                                                    args.init_method,
@@ -363,7 +363,7 @@ int profile_reduce(int argc, char* argv[])
                                                                    args.scales[0],
                                                                    args.scales[1]);
        }
-        else if(args.compTypeId == appDataType_t::appFloat)
+        else if(args.compTypeId == AppDataType::appFloat)
        {
            profile_reduce_impl<ck::half_t, float, ck::half_t>(args.do_verification,
                                                               args.init_method,
@@ -399,16 +399,16 @@ int profile_reduce(int argc, char* argv[])
    else if(args.use_int8)
    {
        if(!args.compType_assigned)
-            args.compTypeId = appDataType_t::appInt8;
+            args.compTypeId = AppDataType::appInt8;

        if(args.outType_assigned &&
-           (args.outTypeId != appDataType_t::appInt8 && args.outTypeId != appDataType_t::appInt32))
-            args.outTypeId = appDataType_t::appInt32;
+           (args.outTypeId != AppDataType::appInt8 && args.outTypeId != AppDataType::appInt32))
+            args.outTypeId = AppDataType::appInt32;

        if(!args.outType_assigned)
-            args.outTypeId = appDataType_t::appInt8;
+            args.outTypeId = AppDataType::appInt8;

-        if(args.compTypeId == appDataType_t::appInt8)
+        if(args.compTypeId == AppDataType::appInt8)
        {
            profile_reduce_impl<int8_t, int8_t, int8_t>(args.do_verification,
                                                        args.init_method,
@@ -423,7 +423,7 @@ int profile_reduce(int argc, char* argv[])
                                                        args.scales[0],
                                                        args.scales[1]);
        }
-        else if(args.compTypeId == appDataType_t::appInt32)
+        else if(args.compTypeId == AppDataType::appInt32)
        {
            profile_reduce_impl<int8_t, int32_t, int8_t>(args.do_verification,
                                                         args.init_method,
@@ -443,12 +443,12 @@ int profile_reduce(int argc, char* argv[])
    }
    else if(args.use_bf16)
    {
-        if(args.outType_assigned && (args.outTypeId != appDataType_t::appBFloat16 &&
-                                     args.outTypeId != appDataType_t::appFloat))
-            args.outTypeId = appDataType_t::appFloat;
+        if(args.outType_assigned &&
+           (args.outTypeId != AppDataType::appBFloat16 && args.outTypeId != AppDataType::appFloat))
+            args.outTypeId = AppDataType::appFloat;

        if(!args.outType_assigned)
-            args.outTypeId = appDataType_t::appBFloat16;
+            args.outTypeId = AppDataType::appBFloat16;

        profile_reduce_impl<ck::bhalf_t, float, ck::bhalf_t>(args.do_verification,
                                                             args.init_method,
@@ -465,7 +465,7 @@ int profile_reduce(int argc, char* argv[])
    }
    else
    {
-        if(args.compTypeId == appDataType_t::appFloat)
+        if(args.compTypeId == AppDataType::appFloat)
        {
            profile_reduce_impl<float, float, float>(args.do_verification,
                                                     args.init_method,
@@ -480,7 +480,7 @@ int profile_reduce(int argc, char* argv[])
                                                     args.scales[0],
                                                     args.scales[1]);
        }
-        else if(args.compTypeId == appDataType_t::appDouble)
+        else if(args.compTypeId == AppDataType::appDouble)
        {
            profile_reduce_impl<float, double, float>(args.do_verification,
                                                      args.init_method,