mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-03 21:21:22 +00:00
Use double for all scaling values and float-point constant values at the Device Op API (#557)
* Use double as alpha/beta values type in reduce device op api * Use double as alpha/beta values type in softmax device op api * Use double as alpha/beta values type in multiple-reduce device op api * Use double as epsilon value type in normalization/elementwise-normalization device op api
This commit is contained in:
@@ -270,18 +270,18 @@ struct DeviceElementwiseNormalizationImpl
|
||||
const std::vector<index_t> reduceDims,
|
||||
XElementwiseOperation x_elementwise_op,
|
||||
YElementwiseOperation y_elementwise_op,
|
||||
AccDataType epsilon,
|
||||
double epsilon,
|
||||
const std::array<const void*, NumInput> in_dev_buffers,
|
||||
const GammaDataType* p_gamma,
|
||||
const BetaDataType* p_beta,
|
||||
YDataType* p_y)
|
||||
: epsilon_(epsilon),
|
||||
p_gamma_(p_gamma),
|
||||
: p_gamma_(p_gamma),
|
||||
p_beta_(p_beta),
|
||||
p_y_(p_y),
|
||||
x_elementwise_op_(x_elementwise_op),
|
||||
y_elementwise_op_(y_elementwise_op)
|
||||
{
|
||||
epsilon_ = static_cast<AccDataType>(epsilon);
|
||||
|
||||
Lengths_ = shuffle_tensor_dimensions<Rank, NumReduceDim>(lengths, reduceDims);
|
||||
for(int i = 0; i < NumInput; i++)
|
||||
@@ -543,7 +543,7 @@ struct DeviceElementwiseNormalizationImpl
|
||||
const std::vector<index_t> betaStrides,
|
||||
const std::vector<index_t> yStrides,
|
||||
const std::vector<index_t> reduceDims,
|
||||
AccDataType epsilon,
|
||||
double epsilon,
|
||||
const std::array<const void*, NumInput> in_dev_buffers,
|
||||
const void* p_gamma,
|
||||
const void* p_beta,
|
||||
|
||||
@@ -270,8 +270,8 @@ struct DeviceMultipleReduceMultiBlock : public DeviceMultipleReduce<Rank,
|
||||
const std::array<index_t, NumOutputDim>& outLengths,
|
||||
const std::array<std::array<index_t, NumOutputDim>, NumReduction>& outStridesArray,
|
||||
const std::array<int, NumReduceDim>& reduceDims,
|
||||
const std::array<const void*, NumReduction>& alphas,
|
||||
const std::array<const void*, NumReduction>& betas,
|
||||
const std::array<double, NumReduction>& alphas,
|
||||
const std::array<double, NumReduction>& betas,
|
||||
const void* in_dev,
|
||||
const std::array<void*, NumReduction>& out_dev_buffers,
|
||||
const InElementwiseOperationTuple in_elementwise_op_tuple,
|
||||
@@ -286,8 +286,8 @@ struct DeviceMultipleReduceMultiBlock : public DeviceMultipleReduce<Rank,
|
||||
|
||||
for(size_t i = 0; i < NumReduction; i++)
|
||||
{
|
||||
alpha_values_(i) = *static_cast<const AccDataType*>(alphas[i]);
|
||||
beta_values_(i) = *static_cast<const AccDataType*>(betas[i]);
|
||||
alpha_values_(i) = static_cast<AccDataType>(alphas[i]);
|
||||
beta_values_(i) = static_cast<AccDataType>(betas[i]);
|
||||
};
|
||||
|
||||
in_dev_ = static_cast<const InDataType*>(in_dev);
|
||||
@@ -547,8 +547,8 @@ struct DeviceMultipleReduceMultiBlock : public DeviceMultipleReduce<Rank,
|
||||
const std::array<index_t, NumOutputDim> outLengths,
|
||||
const std::array<std::array<index_t, NumOutputDim>, NumReduction> outStridesArray,
|
||||
const std::array<int, NumReduceDim> reduceDims,
|
||||
const std::array<const void*, NumReduction> alphas,
|
||||
const std::array<const void*, NumReduction> betas,
|
||||
const std::array<double, NumReduction> alphas,
|
||||
const std::array<double, NumReduction> betas,
|
||||
const void* in_dev,
|
||||
const std::array<void*, NumReduction> out_dev_buffers,
|
||||
const InElementwiseOperationTuple in_elementwise_op_tuple,
|
||||
|
||||
@@ -195,8 +195,8 @@ struct DeviceMultipleReduceThreadWise : public DeviceMultipleReduce<Rank,
|
||||
const std::array<index_t, NumOutputDim>& outLengths,
|
||||
const std::array<std::array<index_t, NumOutputDim>, NumReduction>& outStridesArray,
|
||||
const std::array<int, NumReduceDim>& reduceDims,
|
||||
const std::array<const void*, NumReduction>& alphas,
|
||||
const std::array<const void*, NumReduction>& betas,
|
||||
const std::array<double, NumReduction>& alphas,
|
||||
const std::array<double, NumReduction>& betas,
|
||||
const void* in_dev,
|
||||
const std::array<void*, NumReduction>& out_dev_buffers,
|
||||
const InElementwiseOperationTuple in_elementwise_op_tuple,
|
||||
@@ -211,8 +211,8 @@ struct DeviceMultipleReduceThreadWise : public DeviceMultipleReduce<Rank,
|
||||
|
||||
for(size_t i = 0; i < NumReduction; i++)
|
||||
{
|
||||
alpha_values_(i) = *static_cast<const AccDataType*>(alphas[i]);
|
||||
beta_values_(i) = *static_cast<const AccDataType*>(betas[i]);
|
||||
alpha_values_(i) = static_cast<AccDataType>(alphas[i]);
|
||||
beta_values_(i) = static_cast<AccDataType>(betas[i]);
|
||||
};
|
||||
|
||||
in_dev_ = static_cast<const InDataType*>(in_dev);
|
||||
@@ -374,8 +374,8 @@ struct DeviceMultipleReduceThreadWise : public DeviceMultipleReduce<Rank,
|
||||
const std::array<index_t, NumOutputDim> outLengths,
|
||||
const std::array<std::array<index_t, NumOutputDim>, NumReduction> outStridesArray,
|
||||
const std::array<int, NumReduceDim> reduceDims,
|
||||
const std::array<const void*, NumReduction> alphas,
|
||||
const std::array<const void*, NumReduction> betas,
|
||||
const std::array<double, NumReduction> alphas,
|
||||
const std::array<double, NumReduction> betas,
|
||||
const void* in_dev,
|
||||
const std::array<void*, NumReduction> out_dev_buffers,
|
||||
const InElementwiseOperationTuple in_elementwise_op_tuple,
|
||||
|
||||
@@ -221,18 +221,19 @@ struct DeviceNormalizationImpl : public DeviceNormalization<XDataType,
|
||||
const std::vector<index_t> yStrides,
|
||||
const std::vector<index_t> reduceDims,
|
||||
AccElementwiseOperation acc_elementwise_op,
|
||||
AccDataType epsilon,
|
||||
double epsilon,
|
||||
const XDataType* p_x,
|
||||
const GammaDataType* p_gamma,
|
||||
const BetaDataType* p_beta,
|
||||
YDataType* p_y)
|
||||
: epsilon_(epsilon),
|
||||
p_x_(p_x),
|
||||
: p_x_(p_x),
|
||||
p_gamma_(p_gamma),
|
||||
p_beta_(p_beta),
|
||||
p_y_(p_y),
|
||||
acc_elementwise_op_(acc_elementwise_op)
|
||||
{
|
||||
epsilon_ = static_cast<AccDataType>(epsilon);
|
||||
|
||||
Lengths_ = shuffle_tensor_dimensions<Rank, NumReduceDim>(lengths, reduceDims);
|
||||
xStrides_ = shuffle_tensor_dimensions<Rank, NumReduceDim>(xStrides, reduceDims);
|
||||
yStrides_ = shuffle_tensor_dimensions<Rank, NumReduceDim>(yStrides, reduceDims);
|
||||
@@ -421,7 +422,7 @@ struct DeviceNormalizationImpl : public DeviceNormalization<XDataType,
|
||||
const std::vector<index_t> betaStrides,
|
||||
const std::vector<index_t> yStrides,
|
||||
const std::vector<index_t> reduceDims,
|
||||
AccDataType epsilon,
|
||||
double epsilon,
|
||||
const void* p_x,
|
||||
const void* p_gamma,
|
||||
const void* p_beta,
|
||||
|
||||
@@ -217,8 +217,8 @@ struct DeviceReduceMultiBlock : public DeviceReduce<InDataType,
|
||||
const std::array<index_t, NumDstDim> outLengths,
|
||||
const std::array<index_t, NumDstDim> outStrides,
|
||||
const std::array<int, NumReduceDim> reduceDims,
|
||||
float alpha,
|
||||
float beta,
|
||||
double alpha,
|
||||
double beta,
|
||||
const InDataType* in_dev,
|
||||
const IndexDataType* in_index_dev,
|
||||
OutDataType* out_dev,
|
||||
@@ -502,8 +502,8 @@ struct DeviceReduceMultiBlock : public DeviceReduce<InDataType,
|
||||
const std::array<index_t, NumDstDim> outLengths,
|
||||
const std::array<index_t, NumDstDim> outStrides,
|
||||
const std::array<int, NumReduceDim> reduceDims,
|
||||
float alpha,
|
||||
float beta,
|
||||
double alpha,
|
||||
double beta,
|
||||
const void* in_dev,
|
||||
const void* in_index_dev,
|
||||
void* out_dev,
|
||||
|
||||
@@ -165,8 +165,8 @@ struct DeviceReduceThreadWise : public DeviceReduce<InDataType,
|
||||
const std::array<index_t, NumDstDim> outLengths,
|
||||
const std::array<index_t, NumDstDim> outStrides,
|
||||
const std::array<int, NumReduceDim> reduceDims,
|
||||
float alpha,
|
||||
float beta,
|
||||
double alpha,
|
||||
double beta,
|
||||
const InDataType* in_dev,
|
||||
OutDataType* out_dev,
|
||||
IndexDataType* out_index_dev,
|
||||
@@ -341,8 +341,8 @@ struct DeviceReduceThreadWise : public DeviceReduce<InDataType,
|
||||
const std::array<index_t, NumDstDim> outLengths,
|
||||
const std::array<index_t, NumDstDim> outStrides,
|
||||
const std::array<int, NumReduceDim> reduceDims,
|
||||
float alpha,
|
||||
float beta,
|
||||
double alpha,
|
||||
double beta,
|
||||
const void* in_dev,
|
||||
const void* in_index_dev,
|
||||
void* out_dev,
|
||||
|
||||
@@ -156,19 +156,20 @@ struct DeviceSoftmaxImpl : public DeviceSoftmax<InDataType,
|
||||
Argument(const std::vector<index_t> inLengths,
|
||||
const std::vector<index_t> inStrides,
|
||||
const std::vector<index_t> reduceDims,
|
||||
AccDataType alpha,
|
||||
AccDataType beta,
|
||||
double alpha,
|
||||
double beta,
|
||||
const InDataType* in_dev,
|
||||
OutDataType* out_dev,
|
||||
InElementwiseOp in_elementwise_op,
|
||||
AccElementwiseOp acc_elementwise_op)
|
||||
: alpha_{alpha},
|
||||
beta_{beta},
|
||||
in_dev_{in_dev},
|
||||
: in_dev_{in_dev},
|
||||
out_dev_{out_dev},
|
||||
in_elementwise_op_{in_elementwise_op},
|
||||
acc_elementwise_op_{acc_elementwise_op}
|
||||
{
|
||||
alpha_ = static_cast<AccDataType>(alpha);
|
||||
beta_ = static_cast<AccDataType>(beta);
|
||||
|
||||
if(Rank != inLengths.size() || Rank != inStrides.size() ||
|
||||
NumReduceDim != reduceDims.size())
|
||||
{
|
||||
@@ -336,8 +337,8 @@ struct DeviceSoftmaxImpl : public DeviceSoftmax<InDataType,
|
||||
static auto MakeArgument(const std::vector<index_t> inLengths,
|
||||
const std::vector<index_t> inStrides,
|
||||
const std::vector<int> reduceDims,
|
||||
const AccDataType alpha,
|
||||
const AccDataType beta,
|
||||
double alpha,
|
||||
double beta,
|
||||
const InDataType* in_dev,
|
||||
OutDataType* out_dev,
|
||||
InElementwiseOp in_elementwise_op,
|
||||
@@ -375,8 +376,8 @@ struct DeviceSoftmaxImpl : public DeviceSoftmax<InDataType,
|
||||
std::unique_ptr<BaseArgument> MakeArgumentPointer(const std::vector<index_t> inLengths,
|
||||
const std::vector<index_t> inStrides,
|
||||
const std::vector<int> reduceDims,
|
||||
const void* alpha,
|
||||
const void* beta,
|
||||
double alpha,
|
||||
double beta,
|
||||
const void* in_dev,
|
||||
void* out_dev,
|
||||
InElementwiseOp in_elementwise_op,
|
||||
@@ -385,8 +386,8 @@ struct DeviceSoftmaxImpl : public DeviceSoftmax<InDataType,
|
||||
return std::make_unique<Argument>(inLengths,
|
||||
inStrides,
|
||||
reduceDims,
|
||||
*static_cast<const AccDataType*>(alpha),
|
||||
*static_cast<const AccDataType*>(beta),
|
||||
alpha,
|
||||
beta,
|
||||
static_cast<const InDataType*>(in_dev),
|
||||
static_cast<OutDataType*>(out_dev),
|
||||
in_elementwise_op,
|
||||
|
||||
Reference in New Issue
Block a user