mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-12 17:26:00 +00:00
Compile for gfx908 and gfx90a (#130)
* adding compilation for multiple targets * fix build * clean * update Jekinsfile * update readme * update Jenkins * use ck::half_t instead of ushort for bf16 * rename enum classes * clean * rename * clean
This commit is contained in:
@@ -20,9 +20,9 @@
|
||||
|
||||
using namespace std;
|
||||
|
||||
using ck::NanPropagation_t;
|
||||
using ck::ReduceTensorIndices_t;
|
||||
using ck::ReduceTensorOp_t;
|
||||
using ck::NanPropagation;
|
||||
using ck::ReduceTensorIndices;
|
||||
using ck::ReduceTensorOp;
|
||||
|
||||
static struct option long_options[] = {{"inLengths", required_argument, nullptr, 'D'},
|
||||
{"reduceDims", required_argument, nullptr, 'R'},
|
||||
@@ -84,7 +84,7 @@ static std::vector<T> getTypeValuesFromString(const char* cstr_values)
|
||||
return (values);
|
||||
}
|
||||
|
||||
enum struct appDataType_t
|
||||
enum struct AppDataType
|
||||
{
|
||||
appHalf = 0,
|
||||
appFloat = 1,
|
||||
@@ -130,18 +130,18 @@ class AppArgs
|
||||
|
||||
std::vector<float> scales;
|
||||
|
||||
ReduceTensorOp_t reduceOp = ReduceTensorOp_t::ADD;
|
||||
appDataType_t compTypeId = appDataType_t::appFloat;
|
||||
appDataType_t outTypeId = appDataType_t::appFloat;
|
||||
ReduceTensorOp reduceOp = ReduceTensorOp::ADD;
|
||||
AppDataType compTypeId = AppDataType::appFloat;
|
||||
AppDataType outTypeId = AppDataType::appFloat;
|
||||
|
||||
bool compType_assigned = false;
|
||||
bool outType_assigned = false;
|
||||
|
||||
NanPropagation_t nanOpt = NanPropagation_t::NOT_PROPAGATE_NAN;
|
||||
ReduceTensorIndices_t indicesOpt = ReduceTensorIndices_t::NO_INDICES;
|
||||
bool do_log = false;
|
||||
bool do_verification = false;
|
||||
bool do_dumpout = false;
|
||||
NanPropagation nanOpt = NanPropagation::NOT_PROPAGATE_NAN;
|
||||
ReduceTensorIndices indicesOpt = ReduceTensorIndices::NO_INDICES;
|
||||
bool do_log = false;
|
||||
bool do_verification = false;
|
||||
bool do_dumpout = false;
|
||||
|
||||
int init_method;
|
||||
int nrepeat;
|
||||
@@ -213,33 +213,33 @@ class AppArgs
|
||||
if(!optarg)
|
||||
throw std::runtime_error("Invalid option format!");
|
||||
|
||||
reduceOp = static_cast<ReduceTensorOp_t>(std::atoi(optarg));
|
||||
reduceOp = static_cast<ReduceTensorOp>(std::atoi(optarg));
|
||||
break;
|
||||
case 'C':
|
||||
if(!optarg)
|
||||
throw std::runtime_error("Invalid option format!");
|
||||
|
||||
compTypeId = static_cast<appDataType_t>(std::atoi(optarg));
|
||||
compTypeId = static_cast<AppDataType>(std::atoi(optarg));
|
||||
compType_assigned = true;
|
||||
break;
|
||||
case 'W':
|
||||
if(!optarg)
|
||||
throw std::runtime_error("Invalid option format!");
|
||||
|
||||
outTypeId = static_cast<appDataType_t>(std::atoi(optarg));
|
||||
outTypeId = static_cast<AppDataType>(std::atoi(optarg));
|
||||
outType_assigned = true;
|
||||
break;
|
||||
case 'N':
|
||||
if(!optarg)
|
||||
throw std::runtime_error("Invalid option format!");
|
||||
|
||||
nanOpt = static_cast<NanPropagation_t>(std::atoi(optarg));
|
||||
nanOpt = static_cast<NanPropagation>(std::atoi(optarg));
|
||||
break;
|
||||
case 'I':
|
||||
if(!optarg)
|
||||
throw std::runtime_error("Invalid option format!");
|
||||
|
||||
indicesOpt = static_cast<ReduceTensorIndices_t>(std::atoi(optarg));
|
||||
indicesOpt = static_cast<ReduceTensorIndices>(std::atoi(optarg));
|
||||
break;
|
||||
case 'S':
|
||||
if(!optarg)
|
||||
@@ -303,10 +303,10 @@ class AppArgs
|
||||
scales.push_back(0.0f);
|
||||
};
|
||||
|
||||
if(reduceOp == ReduceTensorOp_t::MIN || reduceOp == ReduceTensorOp_t::MAX ||
|
||||
reduceOp == ReduceTensorOp_t::AMAX)
|
||||
if(reduceOp == ReduceTensorOp::MIN || reduceOp == ReduceTensorOp::MAX ||
|
||||
reduceOp == ReduceTensorOp::AMAX)
|
||||
{
|
||||
if(indicesOpt != ReduceTensorIndices_t::NO_INDICES)
|
||||
if(indicesOpt != ReduceTensorIndices::NO_INDICES)
|
||||
need_indices = true;
|
||||
|
||||
// for indexable operations, no need to assign compType and outType, just let them be
|
||||
@@ -333,22 +333,22 @@ int profile_reduce(int argc, char* argv[])
|
||||
|
||||
check_reduce_dims(rank, args.reduceDims);
|
||||
|
||||
if(args.reduceOp == ReduceTensorOp_t::MUL || args.reduceOp == ReduceTensorOp_t::NORM1)
|
||||
if(args.reduceOp == ReduceTensorOp::MUL || args.reduceOp == ReduceTensorOp::NORM1)
|
||||
throw std::runtime_error("MUL and NORM1 are not supported by composable kernel!");
|
||||
|
||||
if(args.use_half)
|
||||
{
|
||||
if(!args.compType_assigned)
|
||||
args.compTypeId = appDataType_t::appHalf;
|
||||
args.compTypeId = AppDataType::appHalf;
|
||||
|
||||
if(args.outType_assigned &&
|
||||
(args.outTypeId != appDataType_t::appHalf && args.outTypeId != appDataType_t::appFloat))
|
||||
args.outTypeId = appDataType_t::appFloat;
|
||||
(args.outTypeId != AppDataType::appHalf && args.outTypeId != AppDataType::appFloat))
|
||||
args.outTypeId = AppDataType::appFloat;
|
||||
|
||||
if(!args.outType_assigned)
|
||||
args.outTypeId = appDataType_t::appHalf;
|
||||
args.outTypeId = AppDataType::appHalf;
|
||||
|
||||
if(args.compTypeId == appDataType_t::appHalf)
|
||||
if(args.compTypeId == AppDataType::appHalf)
|
||||
{
|
||||
profile_reduce_impl<ck::half_t, ck::half_t, ck::half_t>(args.do_verification,
|
||||
args.init_method,
|
||||
@@ -363,7 +363,7 @@ int profile_reduce(int argc, char* argv[])
|
||||
args.scales[0],
|
||||
args.scales[1]);
|
||||
}
|
||||
else if(args.compTypeId == appDataType_t::appFloat)
|
||||
else if(args.compTypeId == AppDataType::appFloat)
|
||||
{
|
||||
profile_reduce_impl<ck::half_t, float, ck::half_t>(args.do_verification,
|
||||
args.init_method,
|
||||
@@ -399,16 +399,16 @@ int profile_reduce(int argc, char* argv[])
|
||||
else if(args.use_int8)
|
||||
{
|
||||
if(!args.compType_assigned)
|
||||
args.compTypeId = appDataType_t::appInt8;
|
||||
args.compTypeId = AppDataType::appInt8;
|
||||
|
||||
if(args.outType_assigned &&
|
||||
(args.outTypeId != appDataType_t::appInt8 && args.outTypeId != appDataType_t::appInt32))
|
||||
args.outTypeId = appDataType_t::appInt32;
|
||||
(args.outTypeId != AppDataType::appInt8 && args.outTypeId != AppDataType::appInt32))
|
||||
args.outTypeId = AppDataType::appInt32;
|
||||
|
||||
if(!args.outType_assigned)
|
||||
args.outTypeId = appDataType_t::appInt8;
|
||||
args.outTypeId = AppDataType::appInt8;
|
||||
|
||||
if(args.compTypeId == appDataType_t::appInt8)
|
||||
if(args.compTypeId == AppDataType::appInt8)
|
||||
{
|
||||
profile_reduce_impl<int8_t, int8_t, int8_t>(args.do_verification,
|
||||
args.init_method,
|
||||
@@ -423,7 +423,7 @@ int profile_reduce(int argc, char* argv[])
|
||||
args.scales[0],
|
||||
args.scales[1]);
|
||||
}
|
||||
else if(args.compTypeId == appDataType_t::appInt32)
|
||||
else if(args.compTypeId == AppDataType::appInt32)
|
||||
{
|
||||
profile_reduce_impl<int8_t, int32_t, int8_t>(args.do_verification,
|
||||
args.init_method,
|
||||
@@ -443,12 +443,12 @@ int profile_reduce(int argc, char* argv[])
|
||||
}
|
||||
else if(args.use_bf16)
|
||||
{
|
||||
if(args.outType_assigned && (args.outTypeId != appDataType_t::appBFloat16 &&
|
||||
args.outTypeId != appDataType_t::appFloat))
|
||||
args.outTypeId = appDataType_t::appFloat;
|
||||
if(args.outType_assigned &&
|
||||
(args.outTypeId != AppDataType::appBFloat16 && args.outTypeId != AppDataType::appFloat))
|
||||
args.outTypeId = AppDataType::appFloat;
|
||||
|
||||
if(!args.outType_assigned)
|
||||
args.outTypeId = appDataType_t::appBFloat16;
|
||||
args.outTypeId = AppDataType::appBFloat16;
|
||||
|
||||
profile_reduce_impl<ck::bhalf_t, float, ck::bhalf_t>(args.do_verification,
|
||||
args.init_method,
|
||||
@@ -465,7 +465,7 @@ int profile_reduce(int argc, char* argv[])
|
||||
}
|
||||
else
|
||||
{
|
||||
if(args.compTypeId == appDataType_t::appFloat)
|
||||
if(args.compTypeId == AppDataType::appFloat)
|
||||
{
|
||||
profile_reduce_impl<float, float, float>(args.do_verification,
|
||||
args.init_method,
|
||||
@@ -480,7 +480,7 @@ int profile_reduce(int argc, char* argv[])
|
||||
args.scales[0],
|
||||
args.scales[1]);
|
||||
}
|
||||
else if(args.compTypeId == appDataType_t::appDouble)
|
||||
else if(args.compTypeId == AppDataType::appDouble)
|
||||
{
|
||||
profile_reduce_impl<float, double, float>(args.do_verification,
|
||||
args.init_method,
|
||||
|
||||
Reference in New Issue
Block a user