mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-22 22:08:24 +00:00
Layernorm and groupnorm support to save mean and inverse std in forward (#929)
* save mean and inverse std in normalization
* Save mean and inverse std in splitK
* Vector save mean and inv std
* Modify instance for save mean and std
* simplify the layernorm example
* Save mean and std in groupnorm example
* Save mean and inv std in ckProfiler and test
* Remove compute data type from base class
* Save mean and inv std in client example
* Add changelog
* clang format
* Fix compile error
* Refine naming
* Avoid error in bf16
* revert changelog
[ROCm/composable_kernel commit: 3696fe1c76]
This commit is contained in:
@@ -114,12 +114,15 @@ void host_gemm_layernorm(Tensor<HDataType>& h_m_n,
|
||||
BetaDataType,
|
||||
HDataType,
|
||||
AccDataType,
|
||||
AccDataType,
|
||||
HElementOp,
|
||||
2,
|
||||
1>;
|
||||
|
||||
Tensor<EMeanVarDataType> e_m_n(HostTensorDescriptor{M, N});
|
||||
Tensor<AccDataType> c_m_n(HostTensorDescriptor{M, N});
|
||||
Tensor<AccDataType> save_mean({M});
|
||||
Tensor<AccDataType> save_inv_std({M});
|
||||
|
||||
auto ref_gemm = ReferenceGemm{};
|
||||
auto ref_gemm_invoker = ref_gemm.MakeInvoker();
|
||||
@@ -145,7 +148,7 @@ void host_gemm_layernorm(Tensor<HDataType>& h_m_n,
|
||||
auto ref_layernorm_invoker = ref_layernorm.MakeInvoker();
|
||||
|
||||
auto ref_layernorm_argument = ref_layernorm.MakeArgument(
|
||||
e_m_n, gamma_n, beta_n, h_m_n, h_element_op, {M, N}, {1}, epsilon);
|
||||
e_m_n, gamma_n, beta_n, h_m_n, save_mean, save_inv_std, h_element_op, {M, N}, {1}, epsilon);
|
||||
ref_layernorm_invoker.Run(ref_layernorm_argument);
|
||||
}
|
||||
|
||||
|
||||
@@ -3,12 +3,15 @@
|
||||
|
||||
#include "common.hpp"
|
||||
|
||||
using XDataType = ck::half_t;
|
||||
using GammaDataType = ck::half_t;
|
||||
using BetaDataType = ck::half_t;
|
||||
using YDataType = ck::half_t;
|
||||
using ComputeDataType = float;
|
||||
using PassThrough = ck::tensor_operation::element_wise::PassThrough;
|
||||
using XDataType = ck::half_t;
|
||||
using GammaDataType = ck::half_t;
|
||||
using BetaDataType = ck::half_t;
|
||||
using YDataType = ck::half_t;
|
||||
using SaveMeanInvStdDataType = float;
|
||||
using ComputeDataType = float;
|
||||
using PassThrough = ck::tensor_operation::element_wise::PassThrough;
|
||||
|
||||
#define SAVE_MEAN_INV_STD
|
||||
|
||||
constexpr int Rank = 2;
|
||||
constexpr int NumReduceDim = 1;
|
||||
@@ -19,6 +22,7 @@ using DeviceInstance =
|
||||
BetaDataType,
|
||||
ComputeDataType,
|
||||
YDataType,
|
||||
SaveMeanInvStdDataType,
|
||||
PassThrough,
|
||||
Rank,
|
||||
NumReduceDim,
|
||||
@@ -33,7 +37,8 @@ using DeviceInstance =
|
||||
8, // GammaScalarPerVector
|
||||
1, // BetaVecDim (0=M, 1=K)
|
||||
8, // BetaScalarPerVector
|
||||
8>; // OutScalarPerVector
|
||||
8, // YScalarPerVector
|
||||
1>; // SaveMeanInvStdScalarPerVector
|
||||
#include "run_layernorm_example.inc"
|
||||
|
||||
int main() { return run_groupnorm_example<DeviceInstance>(); }
|
||||
|
||||
@@ -3,12 +3,15 @@
|
||||
|
||||
#include "common.hpp"
|
||||
|
||||
using XDataType = ck::half_t;
|
||||
using GammaDataType = ck::half_t;
|
||||
using BetaDataType = ck::half_t;
|
||||
using YDataType = ck::half_t;
|
||||
using ComputeDataType = float;
|
||||
using PassThrough = ck::tensor_operation::element_wise::PassThrough;
|
||||
using XDataType = ck::half_t;
|
||||
using GammaDataType = ck::half_t;
|
||||
using BetaDataType = ck::half_t;
|
||||
using YDataType = ck::half_t;
|
||||
using SaveMeanInvStdDataType = float;
|
||||
using ComputeDataType = float;
|
||||
using PassThrough = ck::tensor_operation::element_wise::PassThrough;
|
||||
|
||||
#define SAVE_MEAN_INV_STD
|
||||
|
||||
constexpr int Rank = 2;
|
||||
constexpr int NumReduceDim = 1;
|
||||
@@ -19,6 +22,7 @@ using DeviceInstance =
|
||||
BetaDataType,
|
||||
ComputeDataType,
|
||||
YDataType,
|
||||
SaveMeanInvStdDataType,
|
||||
PassThrough,
|
||||
Rank,
|
||||
NumReduceDim,
|
||||
@@ -33,7 +37,8 @@ using DeviceInstance =
|
||||
8, // GammaScalarPerVector
|
||||
1, // BetaVecDim (0=M, 1=K)
|
||||
8, // BetaScalarPerVector
|
||||
8>; // YScalarPerVector
|
||||
8, // YScalarPerVector
|
||||
1>; // SaveMeanInvStdScalarPerVector
|
||||
|
||||
#include "run_layernorm_example.inc"
|
||||
|
||||
|
||||
@@ -10,22 +10,13 @@ int run_groupnorm_example()
|
||||
|
||||
ck::index_t M = 1024;
|
||||
ck::index_t N = 1024;
|
||||
ck::index_t Stride = N;
|
||||
|
||||
auto f_host_tensor_descriptor1d = [](std::size_t len, std::size_t stride) {
|
||||
return HostTensorDescriptor({len}, {stride});
|
||||
};
|
||||
|
||||
auto f_host_tensor_descriptor2d = [](std::size_t row, std::size_t col, std::size_t stride) {
|
||||
using namespace ck::literals;
|
||||
|
||||
return HostTensorDescriptor({row, col}, {stride, 1_uz});
|
||||
};
|
||||
|
||||
Tensor<XDataType> x(f_host_tensor_descriptor2d(M, N, Stride));
|
||||
Tensor<GammaDataType> gamma(f_host_tensor_descriptor1d(N, 1));
|
||||
Tensor<BetaDataType> beta(f_host_tensor_descriptor1d(N, 1));
|
||||
Tensor<YDataType> y(f_host_tensor_descriptor2d(M, N, Stride));
|
||||
Tensor<XDataType> x({M, N});
|
||||
Tensor<GammaDataType> gamma({N});
|
||||
Tensor<BetaDataType> beta({N});
|
||||
Tensor<YDataType> y({M, N});
|
||||
Tensor<SaveMeanInvStdDataType> save_mean({M});
|
||||
Tensor<SaveMeanInvStdDataType> save_inv_std({M});
|
||||
|
||||
x.GenerateTensorValue(GeneratorTensor_3<XDataType>{0.0, 1.0});
|
||||
gamma.GenerateTensorValue(GeneratorTensor_3<GammaDataType>{0.0, 1.0});
|
||||
@@ -35,6 +26,11 @@ int run_groupnorm_example()
|
||||
DeviceMem gamma_dev(sizeof(GammaDataType) * gamma.mDesc.GetElementSpaceSize());
|
||||
DeviceMem beta_dev(sizeof(BetaDataType) * beta.mDesc.GetElementSpaceSize());
|
||||
DeviceMem y_dev(sizeof(YDataType) * y.mDesc.GetElementSpaceSize());
|
||||
#ifdef SAVE_MEAN_INV_STD
|
||||
DeviceMem save_mean_dev(sizeof(SaveMeanInvStdDataType) * save_mean.mDesc.GetElementSpaceSize());
|
||||
DeviceMem save_inv_std_dev(sizeof(SaveMeanInvStdDataType) *
|
||||
save_inv_std.mDesc.GetElementSpaceSize());
|
||||
#endif
|
||||
|
||||
x_dev.ToDevice(x.mData.data());
|
||||
gamma_dev.ToDevice(gamma.mData.data());
|
||||
@@ -47,14 +43,23 @@ int run_groupnorm_example()
|
||||
{0, 1},
|
||||
{0, 1},
|
||||
std::vector<ck::index_t>{y.mDesc.GetStrides().begin(), y.mDesc.GetStrides().end()},
|
||||
std::vector<ck::index_t>{save_mean.mDesc.GetStrides().begin(),
|
||||
save_mean.mDesc.GetStrides().end()},
|
||||
std::vector<ck::index_t>{save_mean.mDesc.GetStrides().begin(),
|
||||
save_mean.mDesc.GetStrides().end()},
|
||||
{1},
|
||||
1e-4,
|
||||
x_dev.GetDeviceBuffer(),
|
||||
gamma_dev.GetDeviceBuffer(),
|
||||
beta_dev.GetDeviceBuffer(),
|
||||
y_dev.GetDeviceBuffer(),
|
||||
#ifdef SAVE_MEAN_INV_STD
|
||||
save_mean_dev.GetDeviceBuffer(),
|
||||
save_inv_std_dev.GetDeviceBuffer(),
|
||||
#else
|
||||
nullptr,
|
||||
nullptr,
|
||||
#endif
|
||||
PassThrough{});
|
||||
|
||||
if(!device_instance.IsSupportedArgument(argument_ptr.get()))
|
||||
@@ -72,24 +77,45 @@ int run_groupnorm_example()
|
||||
|
||||
bool pass = true;
|
||||
{
|
||||
Tensor<YDataType> host_y(f_host_tensor_descriptor2d(M, N, Stride));
|
||||
using ReferenceInstance = ck::tensor_operation::host::ReferenceLayernorm<XDataType,
|
||||
GammaDataType,
|
||||
BetaDataType,
|
||||
YDataType,
|
||||
ComputeDataType,
|
||||
PassThrough,
|
||||
Rank,
|
||||
NumReduceDim>;
|
||||
Tensor<YDataType> host_y({M, N});
|
||||
Tensor<SaveMeanInvStdDataType> host_save_mean({M});
|
||||
Tensor<SaveMeanInvStdDataType> host_save_inv_std({M});
|
||||
|
||||
using ReferenceInstance =
|
||||
ck::tensor_operation::host::ReferenceLayernorm<XDataType,
|
||||
GammaDataType,
|
||||
BetaDataType,
|
||||
YDataType,
|
||||
SaveMeanInvStdDataType,
|
||||
ComputeDataType,
|
||||
PassThrough,
|
||||
Rank,
|
||||
NumReduceDim>;
|
||||
|
||||
ReferenceInstance ref;
|
||||
auto ref_argument =
|
||||
ref.MakeArgument(x, gamma, beta, host_y, PassThrough{}, {M, N}, {1}, 1e-4);
|
||||
auto ref_invoker = ref.MakeInvoker();
|
||||
auto ref_argument = ref.MakeArgument(x,
|
||||
gamma,
|
||||
beta,
|
||||
host_y,
|
||||
host_save_mean,
|
||||
host_save_inv_std,
|
||||
PassThrough{},
|
||||
{M, N},
|
||||
{1},
|
||||
1e-4);
|
||||
auto ref_invoker = ref.MakeInvoker();
|
||||
ref_invoker.Run(ref_argument);
|
||||
|
||||
y_dev.FromDevice(y.mData.data());
|
||||
pass &= ck::utils::check_err(y, host_y, "Error: Incorrect results", 1e-3, 1e-3);
|
||||
pass &= ck::utils::check_err(y, host_y, "Error: Incorrect results (y)", 1e-3, 1e-3);
|
||||
#ifdef SAVE_MEAN_INV_STD
|
||||
save_mean_dev.FromDevice(save_mean.mData.data());
|
||||
save_inv_std_dev.FromDevice(save_inv_std.mData.data());
|
||||
pass &= ck::utils::check_err(
|
||||
save_mean, host_save_mean, "Error: Incorrect results (mean)", 1e-3, 1e-3);
|
||||
pass &= ck::utils::check_err(
|
||||
save_inv_std, host_save_inv_std, "Error: Incorrect results (inv_std)", 1e-3, 1e-3);
|
||||
#endif
|
||||
}
|
||||
|
||||
return (pass ? 0 : 1);
|
||||
|
||||
@@ -6,11 +6,14 @@
|
||||
constexpr int Rank = 5;
|
||||
constexpr int NumReduceDim = 3;
|
||||
|
||||
using XDataType = ck::half_t;
|
||||
using GammaDataType = ck::half_t;
|
||||
using BetaDataType = ck::half_t;
|
||||
using YDataType = ck::half_t;
|
||||
using ComputeDataType = float;
|
||||
using XDataType = ck::half_t;
|
||||
using GammaDataType = ck::half_t;
|
||||
using BetaDataType = ck::half_t;
|
||||
using YDataType = ck::half_t;
|
||||
using SaveMeanInvStdDataType = float;
|
||||
using ComputeDataType = float;
|
||||
|
||||
#define SAVE_MEAN_INV_STD
|
||||
|
||||
struct YElementOp
|
||||
{
|
||||
@@ -39,6 +42,7 @@ using DeviceInstance =
|
||||
BetaDataType,
|
||||
ComputeDataType,
|
||||
YDataType,
|
||||
SaveMeanInvStdDataType,
|
||||
YElementOp,
|
||||
Rank,
|
||||
NumReduceDim,
|
||||
@@ -53,7 +57,8 @@ using DeviceInstance =
|
||||
2, // GammaScalarPerVector
|
||||
1, // BetaVecDim (0=M, 1=K)
|
||||
2, // BetaScalarPerVector
|
||||
2>; // OutScalarPerVector
|
||||
2, // YScalarPerVector
|
||||
1>; // SaveMeanInvStdScalarPerVector
|
||||
|
||||
#include "run_groupnorm_example.inc"
|
||||
|
||||
|
||||
@@ -6,12 +6,15 @@
|
||||
constexpr int Rank = 5;
|
||||
constexpr int NumReduceDim = 3;
|
||||
|
||||
using XDataType = ck::half_t;
|
||||
using GammaDataType = ck::half_t;
|
||||
using BetaDataType = ck::half_t;
|
||||
using YDataType = ck::half_t;
|
||||
using ComputeDataType = float;
|
||||
using YElementOp = ck::tensor_operation::element_wise::Swish;
|
||||
using XDataType = ck::half_t;
|
||||
using GammaDataType = ck::half_t;
|
||||
using BetaDataType = ck::half_t;
|
||||
using YDataType = ck::half_t;
|
||||
using SaveMeanInvStdDataType = float;
|
||||
using ComputeDataType = float;
|
||||
using YElementOp = ck::tensor_operation::element_wise::Swish;
|
||||
|
||||
#define SAVE_MEAN_INV_STD
|
||||
|
||||
using DeviceInstance =
|
||||
ck::tensor_operation::device::DeviceNormalizationSplitKImpl<XDataType,
|
||||
@@ -19,6 +22,7 @@ using DeviceInstance =
|
||||
BetaDataType,
|
||||
ComputeDataType,
|
||||
YDataType,
|
||||
SaveMeanInvStdDataType,
|
||||
YElementOp,
|
||||
Rank,
|
||||
NumReduceDim,
|
||||
@@ -33,7 +37,8 @@ using DeviceInstance =
|
||||
2, // GammaScalarPerVector
|
||||
1, // BetaVecDim (0=M, 1=K)
|
||||
2, // BetaScalarPerVector
|
||||
2>; // OutScalarPerVector
|
||||
2, // YScalarPerVector
|
||||
1>; // SaveMeanInvStdScalarPerVector
|
||||
|
||||
#include "run_groupnorm_example.inc"
|
||||
|
||||
|
||||
@@ -6,12 +6,15 @@
|
||||
constexpr int Rank = 5;
|
||||
constexpr int NumReduceDim = 3;
|
||||
|
||||
using XDataType = ck::half_t;
|
||||
using GammaDataType = ck::half_t;
|
||||
using BetaDataType = ck::half_t;
|
||||
using YDataType = ck::half_t;
|
||||
using ComputeDataType = float;
|
||||
using YElementOp = ck::tensor_operation::element_wise::Swish;
|
||||
using XDataType = ck::half_t;
|
||||
using GammaDataType = ck::half_t;
|
||||
using BetaDataType = ck::half_t;
|
||||
using YDataType = ck::half_t;
|
||||
using SaveMeanInvStdDataType = float;
|
||||
using ComputeDataType = float;
|
||||
using YElementOp = ck::tensor_operation::element_wise::Swish;
|
||||
|
||||
#define SAVE_MEAN_INV_STD
|
||||
|
||||
using DeviceInstance =
|
||||
ck::tensor_operation::device::DeviceNormalizationImpl<XDataType,
|
||||
@@ -19,6 +22,7 @@ using DeviceInstance =
|
||||
BetaDataType,
|
||||
ComputeDataType,
|
||||
YDataType,
|
||||
SaveMeanInvStdDataType,
|
||||
YElementOp,
|
||||
Rank,
|
||||
NumReduceDim,
|
||||
@@ -33,7 +37,8 @@ using DeviceInstance =
|
||||
2, // GammaScalarPerVector
|
||||
1, // BetaVecDim (0=M, 1=K)
|
||||
2, // BetaScalarPerVector
|
||||
2>; // OutScalarPerVector
|
||||
2, // YScalarPerVector
|
||||
1>; // SaveMeanInvStdScalarPerVector
|
||||
|
||||
#include "run_groupnorm_example.inc"
|
||||
|
||||
|
||||
@@ -34,6 +34,8 @@ int run_groupnorm_example(int argc, char* argv[])
|
||||
Tensor<YDataType> y({N, H, W, G, C});
|
||||
Tensor<GammaDataType> gamma({G, C});
|
||||
Tensor<BetaDataType> beta({G, C});
|
||||
Tensor<SaveMeanInvStdDataType> save_mean({N, G});
|
||||
Tensor<SaveMeanInvStdDataType> save_inv_std({N, G});
|
||||
|
||||
ck::utils::FillUniformDistribution<XDataType>{0.f, 1.f}(x);
|
||||
ck::utils::FillUniformDistribution<GammaDataType>{0.f, 1.f}(gamma);
|
||||
@@ -43,6 +45,11 @@ int run_groupnorm_example(int argc, char* argv[])
|
||||
DeviceMem gamma_dev(sizeof(GammaDataType) * gamma.mDesc.GetElementSpaceSize());
|
||||
DeviceMem beta_dev(sizeof(BetaDataType) * beta.mDesc.GetElementSpaceSize());
|
||||
DeviceMem y_dev(sizeof(YDataType) * y.mDesc.GetElementSpaceSize());
|
||||
#ifdef SAVE_MEAN_INV_STD
|
||||
DeviceMem save_mean_dev(sizeof(SaveMeanInvStdDataType) * save_mean.mDesc.GetElementSpaceSize());
|
||||
DeviceMem save_inv_std_dev(sizeof(SaveMeanInvStdDataType) *
|
||||
save_inv_std.mDesc.GetElementSpaceSize());
|
||||
#endif
|
||||
|
||||
x_dev.ToDevice(x.mData.data());
|
||||
gamma_dev.ToDevice(gamma.mData.data());
|
||||
@@ -57,14 +64,23 @@ int run_groupnorm_example(int argc, char* argv[])
|
||||
{0, 0, 0, C, 1},
|
||||
{0, 0, 0, C, 1},
|
||||
std::vector<ck::index_t>{y.mDesc.GetStrides().begin(), y.mDesc.GetStrides().end()},
|
||||
std::vector<ck::index_t>{save_mean.mDesc.GetStrides().begin(),
|
||||
save_mean.mDesc.GetStrides().end()},
|
||||
std::vector<ck::index_t>{save_mean.mDesc.GetStrides().begin(),
|
||||
save_mean.mDesc.GetStrides().end()},
|
||||
{1, 2, 4}, // reduction dimension: [H, W, C]
|
||||
1e-6,
|
||||
x_dev.GetDeviceBuffer(),
|
||||
gamma_dev.GetDeviceBuffer(),
|
||||
beta_dev.GetDeviceBuffer(),
|
||||
y_dev.GetDeviceBuffer(),
|
||||
#ifdef SAVE_MEAN_INV_STD
|
||||
save_mean_dev.GetDeviceBuffer(),
|
||||
save_inv_std_dev.GetDeviceBuffer(),
|
||||
#else
|
||||
nullptr,
|
||||
nullptr,
|
||||
#endif
|
||||
y_element_op);
|
||||
|
||||
if(!device_instance.IsSupportedArgument(argument_ptr.get()))
|
||||
@@ -92,21 +108,40 @@ int run_groupnorm_example(int argc, char* argv[])
|
||||
bool pass = true;
|
||||
{
|
||||
Tensor<YDataType> host_y({N, H, W, G, C});
|
||||
using ReferenceInstance = ck::tensor_operation::host::ReferenceGroupnorm<XDataType,
|
||||
GammaDataType,
|
||||
BetaDataType,
|
||||
YDataType,
|
||||
ComputeDataType,
|
||||
YElementOp>;
|
||||
Tensor<SaveMeanInvStdDataType> host_save_mean(HostTensorDescriptor{N, G});
|
||||
Tensor<SaveMeanInvStdDataType> host_save_inv_std(HostTensorDescriptor{N, G});
|
||||
using ReferenceInstance =
|
||||
ck::tensor_operation::host::ReferenceGroupnorm<XDataType,
|
||||
GammaDataType,
|
||||
BetaDataType,
|
||||
YDataType,
|
||||
SaveMeanInvStdDataType,
|
||||
ComputeDataType,
|
||||
YElementOp>;
|
||||
|
||||
ReferenceInstance ref;
|
||||
auto ref_argument =
|
||||
ref.MakeArgument(x, gamma, beta, host_y, y_element_op, {N, H, W, G, C}, 1e-6);
|
||||
auto ref_invoker = ref.MakeInvoker();
|
||||
auto ref_argument = ref.MakeArgument(x,
|
||||
gamma,
|
||||
beta,
|
||||
host_y,
|
||||
host_save_mean,
|
||||
host_save_inv_std,
|
||||
y_element_op,
|
||||
{N, H, W, G, C},
|
||||
1e-6);
|
||||
auto ref_invoker = ref.MakeInvoker();
|
||||
ref_invoker.Run(ref_argument);
|
||||
|
||||
y_dev.FromDevice(y.mData.data());
|
||||
pass &= ck::utils::check_err(y, host_y, "Error: Incorrect results", 1e-3, 1e-3);
|
||||
#ifdef SAVE_MEAN_INV_STD
|
||||
save_mean_dev.FromDevice(save_mean.mData.data());
|
||||
save_inv_std_dev.FromDevice(save_inv_std.mData.data());
|
||||
pass &= ck::utils::check_err(
|
||||
save_mean, host_save_mean, "Error: Incorrect results (mean)", 1e-3, 1e-3);
|
||||
pass &= ck::utils::check_err(
|
||||
save_inv_std, host_save_inv_std, "Error: Incorrect results (inv_std)", 1e-3, 1e-3);
|
||||
#endif
|
||||
}
|
||||
|
||||
return (pass ? 0 : 1);
|
||||
|
||||
@@ -167,20 +167,31 @@ int main()
|
||||
XElementwiseOperation>(x, a, b, mn, XElementwiseOperation{});
|
||||
|
||||
Tensor<YDataType> host_y(f_host_tensor_descriptor2d(M, N, Stride));
|
||||
Tensor<AccDataType> host_save_mean({M});
|
||||
Tensor<AccDataType> host_save_inv_std({M});
|
||||
using ReferenceInstance =
|
||||
ck::tensor_operation::host::ReferenceLayernorm<XDataType,
|
||||
GammaDataType,
|
||||
BetaDataType,
|
||||
YDataType,
|
||||
AccDataType,
|
||||
AccDataType,
|
||||
YElementwiseOperation,
|
||||
Rank,
|
||||
NumReduceDim>;
|
||||
|
||||
ReferenceInstance ref;
|
||||
auto ref_argument =
|
||||
ref.MakeArgument(x, gamma, beta, host_y, YElementwiseOperation{}, {M, N}, {1}, 1e-4);
|
||||
auto ref_invoker = ref.MakeInvoker();
|
||||
auto ref_argument = ref.MakeArgument(x,
|
||||
gamma,
|
||||
beta,
|
||||
host_y,
|
||||
host_save_mean,
|
||||
host_save_inv_std,
|
||||
YElementwiseOperation{},
|
||||
{M, N},
|
||||
{1},
|
||||
1e-4);
|
||||
auto ref_invoker = ref.MakeInvoker();
|
||||
ref_invoker.Run(ref_argument);
|
||||
|
||||
y_dev.FromDevice(y.mData.data());
|
||||
|
||||
Reference in New Issue
Block a user