Compile for gfx908 and gfx90a (#130)

* adding compilation for multiple targets

* fix build

* clean

* update Jekinsfile

* update readme

* update Jenkins

* use ck::half_t instead of ushort for bf16

* rename enum classes

* clean

* rename

* clean
This commit is contained in:
Chao Liu
2022-03-31 12:33:34 -05:00
committed by GitHub
parent ecf337bab5
commit cd167e492a
227 changed files with 1398 additions and 2944 deletions

View File

@@ -20,9 +20,9 @@
using namespace std;
using ck::NanPropagation_t;
using ck::ReduceTensorIndices_t;
using ck::ReduceTensorOp_t;
using ck::NanPropagation;
using ck::ReduceTensorIndices;
using ck::ReduceTensorOp;
static struct option long_options[] = {{"inLengths", required_argument, nullptr, 'D'},
{"reduceDims", required_argument, nullptr, 'R'},
@@ -84,7 +84,7 @@ static std::vector<T> getTypeValuesFromString(const char* cstr_values)
return (values);
}
enum struct appDataType_t
enum struct AppDataType
{
appHalf = 0,
appFloat = 1,
@@ -130,18 +130,18 @@ class AppArgs
std::vector<float> scales;
ReduceTensorOp_t reduceOp = ReduceTensorOp_t::ADD;
appDataType_t compTypeId = appDataType_t::appFloat;
appDataType_t outTypeId = appDataType_t::appFloat;
ReduceTensorOp reduceOp = ReduceTensorOp::ADD;
AppDataType compTypeId = AppDataType::appFloat;
AppDataType outTypeId = AppDataType::appFloat;
bool compType_assigned = false;
bool outType_assigned = false;
NanPropagation_t nanOpt = NanPropagation_t::NOT_PROPAGATE_NAN;
ReduceTensorIndices_t indicesOpt = ReduceTensorIndices_t::NO_INDICES;
bool do_log = false;
bool do_verification = false;
bool do_dumpout = false;
NanPropagation nanOpt = NanPropagation::NOT_PROPAGATE_NAN;
ReduceTensorIndices indicesOpt = ReduceTensorIndices::NO_INDICES;
bool do_log = false;
bool do_verification = false;
bool do_dumpout = false;
int init_method;
int nrepeat;
@@ -213,33 +213,33 @@ class AppArgs
if(!optarg)
throw std::runtime_error("Invalid option format!");
reduceOp = static_cast<ReduceTensorOp_t>(std::atoi(optarg));
reduceOp = static_cast<ReduceTensorOp>(std::atoi(optarg));
break;
case 'C':
if(!optarg)
throw std::runtime_error("Invalid option format!");
compTypeId = static_cast<appDataType_t>(std::atoi(optarg));
compTypeId = static_cast<AppDataType>(std::atoi(optarg));
compType_assigned = true;
break;
case 'W':
if(!optarg)
throw std::runtime_error("Invalid option format!");
outTypeId = static_cast<appDataType_t>(std::atoi(optarg));
outTypeId = static_cast<AppDataType>(std::atoi(optarg));
outType_assigned = true;
break;
case 'N':
if(!optarg)
throw std::runtime_error("Invalid option format!");
nanOpt = static_cast<NanPropagation_t>(std::atoi(optarg));
nanOpt = static_cast<NanPropagation>(std::atoi(optarg));
break;
case 'I':
if(!optarg)
throw std::runtime_error("Invalid option format!");
indicesOpt = static_cast<ReduceTensorIndices_t>(std::atoi(optarg));
indicesOpt = static_cast<ReduceTensorIndices>(std::atoi(optarg));
break;
case 'S':
if(!optarg)
@@ -303,10 +303,10 @@ class AppArgs
scales.push_back(0.0f);
};
if(reduceOp == ReduceTensorOp_t::MIN || reduceOp == ReduceTensorOp_t::MAX ||
reduceOp == ReduceTensorOp_t::AMAX)
if(reduceOp == ReduceTensorOp::MIN || reduceOp == ReduceTensorOp::MAX ||
reduceOp == ReduceTensorOp::AMAX)
{
if(indicesOpt != ReduceTensorIndices_t::NO_INDICES)
if(indicesOpt != ReduceTensorIndices::NO_INDICES)
need_indices = true;
// for indexable operations, no need to assign compType and outType, just let them be
@@ -333,22 +333,22 @@ int profile_reduce(int argc, char* argv[])
check_reduce_dims(rank, args.reduceDims);
if(args.reduceOp == ReduceTensorOp_t::MUL || args.reduceOp == ReduceTensorOp_t::NORM1)
if(args.reduceOp == ReduceTensorOp::MUL || args.reduceOp == ReduceTensorOp::NORM1)
throw std::runtime_error("MUL and NORM1 are not supported by composable kernel!");
if(args.use_half)
{
if(!args.compType_assigned)
args.compTypeId = appDataType_t::appHalf;
args.compTypeId = AppDataType::appHalf;
if(args.outType_assigned &&
(args.outTypeId != appDataType_t::appHalf && args.outTypeId != appDataType_t::appFloat))
args.outTypeId = appDataType_t::appFloat;
(args.outTypeId != AppDataType::appHalf && args.outTypeId != AppDataType::appFloat))
args.outTypeId = AppDataType::appFloat;
if(!args.outType_assigned)
args.outTypeId = appDataType_t::appHalf;
args.outTypeId = AppDataType::appHalf;
if(args.compTypeId == appDataType_t::appHalf)
if(args.compTypeId == AppDataType::appHalf)
{
profile_reduce_impl<ck::half_t, ck::half_t, ck::half_t>(args.do_verification,
args.init_method,
@@ -363,7 +363,7 @@ int profile_reduce(int argc, char* argv[])
args.scales[0],
args.scales[1]);
}
else if(args.compTypeId == appDataType_t::appFloat)
else if(args.compTypeId == AppDataType::appFloat)
{
profile_reduce_impl<ck::half_t, float, ck::half_t>(args.do_verification,
args.init_method,
@@ -399,16 +399,16 @@ int profile_reduce(int argc, char* argv[])
else if(args.use_int8)
{
if(!args.compType_assigned)
args.compTypeId = appDataType_t::appInt8;
args.compTypeId = AppDataType::appInt8;
if(args.outType_assigned &&
(args.outTypeId != appDataType_t::appInt8 && args.outTypeId != appDataType_t::appInt32))
args.outTypeId = appDataType_t::appInt32;
(args.outTypeId != AppDataType::appInt8 && args.outTypeId != AppDataType::appInt32))
args.outTypeId = AppDataType::appInt32;
if(!args.outType_assigned)
args.outTypeId = appDataType_t::appInt8;
args.outTypeId = AppDataType::appInt8;
if(args.compTypeId == appDataType_t::appInt8)
if(args.compTypeId == AppDataType::appInt8)
{
profile_reduce_impl<int8_t, int8_t, int8_t>(args.do_verification,
args.init_method,
@@ -423,7 +423,7 @@ int profile_reduce(int argc, char* argv[])
args.scales[0],
args.scales[1]);
}
else if(args.compTypeId == appDataType_t::appInt32)
else if(args.compTypeId == AppDataType::appInt32)
{
profile_reduce_impl<int8_t, int32_t, int8_t>(args.do_verification,
args.init_method,
@@ -443,12 +443,12 @@ int profile_reduce(int argc, char* argv[])
}
else if(args.use_bf16)
{
if(args.outType_assigned && (args.outTypeId != appDataType_t::appBFloat16 &&
args.outTypeId != appDataType_t::appFloat))
args.outTypeId = appDataType_t::appFloat;
if(args.outType_assigned &&
(args.outTypeId != AppDataType::appBFloat16 && args.outTypeId != AppDataType::appFloat))
args.outTypeId = AppDataType::appFloat;
if(!args.outType_assigned)
args.outTypeId = appDataType_t::appBFloat16;
args.outTypeId = AppDataType::appBFloat16;
profile_reduce_impl<ck::bhalf_t, float, ck::bhalf_t>(args.do_verification,
args.init_method,
@@ -465,7 +465,7 @@ int profile_reduce(int argc, char* argv[])
}
else
{
if(args.compTypeId == appDataType_t::appFloat)
if(args.compTypeId == AppDataType::appFloat)
{
profile_reduce_impl<float, float, float>(args.do_verification,
args.init_method,
@@ -480,7 +480,7 @@ int profile_reduce(int argc, char* argv[])
args.scales[0],
args.scales[1]);
}
else if(args.compTypeId == appDataType_t::appDouble)
else if(args.compTypeId == AppDataType::appDouble)
{
profile_reduce_impl<float, double, float>(args.do_verification,
args.init_method,