mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-13 09:45:56 +00:00
Compile for gfx908 and gfx90a (#130)
* adding compilation for multiple targets * fix build * clean * update Jekinsfile * update readme * update Jenkins * use ck::half_t instead of ushort for bf16 * rename enum classes * clean * rename * clean
This commit is contained in:
@@ -39,7 +39,7 @@ void host_direct_convolution_add_nchwc(const Tensor<TIn>& in,
|
||||
const ConvDilations& conv_dilations,
|
||||
const InLeftPads& in_left_pads,
|
||||
const InRightPads&,
|
||||
const ck::ActivTypeEnum_t activ_type)
|
||||
const ck::ActivTypeEnum activ_type)
|
||||
{
|
||||
using namespace ck;
|
||||
|
||||
@@ -117,7 +117,7 @@ int main(int argc, char* argv[])
|
||||
exit(1);
|
||||
}
|
||||
|
||||
constexpr ck::ActivTypeEnum_t activ_type = ActivTypeEnum_t::LeakyRelu;
|
||||
constexpr ck::ActivTypeEnum activ_type = ActivTypeEnum::LeakyRelu;
|
||||
|
||||
const ConvForwardAlgo algo = static_cast<ConvForwardAlgo>(std::stoi(argv[1]));
|
||||
const bool do_verification = std::stoi(argv[2]);
|
||||
@@ -167,7 +167,7 @@ int main(int argc, char* argv[])
|
||||
const bool do_log = std::stoi(argv[4]);
|
||||
const int nrepeat = std::stoi(argv[5]);
|
||||
|
||||
constexpr ck::ActivTypeEnum_t activ_type = ActivTypeEnum_t::LeakyRelu;
|
||||
constexpr ck::ActivTypeEnum activ_type = ActivTypeEnum::LeakyRelu;
|
||||
|
||||
#if 0
|
||||
constexpr auto N = Number<1>{};
|
||||
|
||||
@@ -37,7 +37,7 @@ void host_direct_convolution_nchwc(const Tensor<TIn>& in,
|
||||
const ConvDilations& conv_dilations,
|
||||
const InLeftPads& in_left_pads,
|
||||
const InRightPads&,
|
||||
const ck::ActivTypeEnum_t activ_type)
|
||||
const ck::ActivTypeEnum activ_type)
|
||||
{
|
||||
using namespace ck;
|
||||
|
||||
@@ -102,7 +102,7 @@ int main(int argc, char* argv[])
|
||||
exit(1);
|
||||
}
|
||||
|
||||
constexpr ck::ActivTypeEnum_t activ_type = ActivTypeEnum_t::LeakyRelu;
|
||||
constexpr ck::ActivTypeEnum activ_type = ActivTypeEnum::LeakyRelu;
|
||||
|
||||
const ConvForwardAlgo algo = static_cast<ConvForwardAlgo>(std::stoi(argv[1]));
|
||||
const bool do_verification = std::stoi(argv[2]);
|
||||
@@ -149,8 +149,8 @@ int main(int argc, char* argv[])
|
||||
const bool do_log = std::stoi(argv[4]);
|
||||
const int nrepeat = std::stoi(argv[5]);
|
||||
|
||||
// constexpr ck::ActivTypeEnum_t activ_type = ActivTypeEnum_t::Sigmoid;
|
||||
constexpr ck::ActivTypeEnum_t activ_type = ActivTypeEnum_t::LeakyRelu;
|
||||
// constexpr ck::ActivTypeEnum activ_type = ActivTypeEnum::Sigmoid;
|
||||
constexpr ck::ActivTypeEnum activ_type = ActivTypeEnum::LeakyRelu;
|
||||
|
||||
#if 0
|
||||
constexpr auto N = Number<1>{};
|
||||
|
||||
@@ -38,7 +38,7 @@ void host_direct_convolution_maxpool_nchwc(const Tensor<TIn>& in,
|
||||
const ConvDilations& conv_dilations,
|
||||
const InLeftPads& in_left_pads,
|
||||
const InRightPads&,
|
||||
const ck::ActivTypeEnum_t activ_type)
|
||||
const ck::ActivTypeEnum activ_type)
|
||||
{
|
||||
using namespace ck;
|
||||
|
||||
@@ -126,7 +126,7 @@ int main(int argc, char* argv[])
|
||||
exit(1);
|
||||
}
|
||||
|
||||
constexpr ck::ActivTypeEnum_t activ_type = ActivTypeEnum_t::LeakyRelu;
|
||||
constexpr ck::ActivTypeEnum activ_type = ActivTypeEnum::LeakyRelu;
|
||||
|
||||
const ConvForwardAlgo algo = static_cast<ConvForwardAlgo>(std::stoi(argv[1]));
|
||||
const bool do_verification = std::stoi(argv[2]);
|
||||
@@ -176,18 +176,18 @@ int main(int argc, char* argv[])
|
||||
const bool do_log = std::stoi(argv[4]);
|
||||
const int nrepeat = std::stoi(argv[5]);
|
||||
|
||||
constexpr ck::ActivTypeEnum_t activ_type = ActivTypeEnum_t::LeakyRelu;
|
||||
constexpr ck::ActivTypeEnum activ_type = ActivTypeEnum::LeakyRelu;
|
||||
|
||||
#if 1
|
||||
constexpr auto N = Number<1>{};
|
||||
constexpr auto Hi = Number<1080>{};
|
||||
constexpr auto Wi = Number<1920>{};
|
||||
constexpr auto Y = Number<3>{};
|
||||
constexpr auto X = Number<3>{};
|
||||
constexpr auto C0 = Number<2>{};
|
||||
constexpr auto C1 = Number<8>{};
|
||||
constexpr auto K0 = Number<2>{};
|
||||
constexpr auto K1 = Number<8>{};
|
||||
constexpr auto N = Number<1>{};
|
||||
constexpr auto Hi = Number<1080>{};
|
||||
constexpr auto Wi = Number<1920>{};
|
||||
constexpr auto Y = Number<3>{};
|
||||
constexpr auto X = Number<3>{};
|
||||
constexpr auto C0 = Number<2>{};
|
||||
constexpr auto C1 = Number<8>{};
|
||||
constexpr auto K0 = Number<2>{};
|
||||
constexpr auto K1 = Number<8>{};
|
||||
#elif 0
|
||||
constexpr auto N = Number<1>{};
|
||||
constexpr auto Hi = Number<1080>{};
|
||||
|
||||
@@ -23,7 +23,7 @@ using PassThrough = ck::tensor_operation::element_wise::PassThrough;
|
||||
using ReduceSum = ck::tensor_operation::element_wise::ReduceSum;
|
||||
using ReduceSquareSum = ck::tensor_operation::element_wise::ReduceSquareSum;
|
||||
|
||||
static constexpr auto GemmDefault = ck::tensor_operation::device::GemmSpecialization_t::Default;
|
||||
static constexpr auto GemmDefault = ck::tensor_operation::device::GemmSpecialization::Default;
|
||||
|
||||
// c[g, m, n] = a[g, m, k] * b[g, n, k]
|
||||
// d0[g, m] = reduce0(c[g, m, n])
|
||||
|
||||
@@ -23,7 +23,7 @@ using PassThrough = ck::tensor_operation::element_wise::PassThrough;
|
||||
using ReduceSum = ck::tensor_operation::element_wise::ReduceSum;
|
||||
using ReduceSquareSum = ck::tensor_operation::element_wise::ReduceSquareSum;
|
||||
|
||||
static constexpr auto GemmDefault = ck::tensor_operation::device::GemmSpecialization_t::Default;
|
||||
static constexpr auto GemmDefault = ck::tensor_operation::device::GemmSpecialization::Default;
|
||||
|
||||
// c[g, m, n] = a[g, m, k] * b[g, n, k]
|
||||
// d0[g, m] = reduce0(c[g, m, n])
|
||||
|
||||
@@ -23,7 +23,7 @@ using PassThrough = ck::tensor_operation::element_wise::PassThrough;
|
||||
using ReduceSum = ck::tensor_operation::element_wise::ReduceSum;
|
||||
using ReduceSquareSum = ck::tensor_operation::element_wise::ReduceSquareSum;
|
||||
|
||||
static constexpr auto GemmDefault = ck::tensor_operation::device::GemmSpecialization_t::Default;
|
||||
static constexpr auto GemmDefault = ck::tensor_operation::device::GemmSpecialization::Default;
|
||||
|
||||
// c[g, m, n] = a[g, m, k] * b[g, n, k]
|
||||
// d0[g, m] = reduce0(c[g, m, n])
|
||||
|
||||
@@ -23,7 +23,7 @@ using PassThrough = ck::tensor_operation::element_wise::PassThrough;
|
||||
using ReduceSum = ck::tensor_operation::element_wise::ReduceSum;
|
||||
using ReduceSquareSum = ck::tensor_operation::element_wise::ReduceSquareSum;
|
||||
|
||||
static constexpr auto GemmDefault = ck::tensor_operation::device::GemmSpecialization_t::Default;
|
||||
static constexpr auto GemmDefault = ck::tensor_operation::device::GemmSpecialization::Default;
|
||||
|
||||
// c[g, m, n] = a[g, m, k] * b[g, n, k]
|
||||
// d0[g, m] = reduce0(c[g, m, n])
|
||||
|
||||
@@ -18,13 +18,13 @@ using S = ck::Sequence<Is...>;
|
||||
using PassThrough = ck::tensor_operation::element_wise::PassThrough;
|
||||
|
||||
static constexpr auto ConvFwdDefault =
|
||||
ck::tensor_operation::device::ConvolutionForwardSpecialization_t::Default;
|
||||
ck::tensor_operation::device::ConvolutionForwardSpecialization::Default;
|
||||
|
||||
static constexpr auto ConvFwd1x1P0 =
|
||||
ck::tensor_operation::device::ConvolutionForwardSpecialization_t::Filter1x1Pad0;
|
||||
ck::tensor_operation::device::ConvolutionForwardSpecialization::Filter1x1Pad0;
|
||||
|
||||
static constexpr auto ConvFwd1x1S1P0 =
|
||||
ck::tensor_operation::device::ConvolutionForwardSpecialization_t::Filter1x1Stride1Pad0;
|
||||
ck::tensor_operation::device::ConvolutionForwardSpecialization::Filter1x1Stride1Pad0;
|
||||
|
||||
// Compilation parameters for in[n, hi, wi, c] * wei[k, y, x, c] = out[n, ho, wo, k]
|
||||
using device_conv1d_fwd_xdl_nwc_kxc_nwk_bf16_instances = std::tuple<
|
||||
|
||||
@@ -18,13 +18,13 @@ using S = ck::Sequence<Is...>;
|
||||
using PassThrough = ck::tensor_operation::element_wise::PassThrough;
|
||||
|
||||
static constexpr auto ConvFwdDefault =
|
||||
ck::tensor_operation::device::ConvolutionForwardSpecialization_t::Default;
|
||||
ck::tensor_operation::device::ConvolutionForwardSpecialization::Default;
|
||||
|
||||
static constexpr auto ConvFwd1x1P0 =
|
||||
ck::tensor_operation::device::ConvolutionForwardSpecialization_t::Filter1x1Pad0;
|
||||
ck::tensor_operation::device::ConvolutionForwardSpecialization::Filter1x1Pad0;
|
||||
|
||||
static constexpr auto ConvFwd1x1S1P0 =
|
||||
ck::tensor_operation::device::ConvolutionForwardSpecialization_t::Filter1x1Stride1Pad0;
|
||||
ck::tensor_operation::device::ConvolutionForwardSpecialization::Filter1x1Stride1Pad0;
|
||||
|
||||
// Compilation parameters for in[n, hi, wi, c] * wei[k, y, x, c] = out[n, ho, wo, k]
|
||||
using device_conv1d_fwd_xdl_nwc_kxc_nwk_f16_instances = std::tuple<
|
||||
|
||||
@@ -17,13 +17,13 @@ using S = ck::Sequence<Is...>;
|
||||
using PassThrough = ck::tensor_operation::element_wise::PassThrough;
|
||||
|
||||
static constexpr auto ConvFwdDefault =
|
||||
ck::tensor_operation::device::ConvolutionForwardSpecialization_t::Default;
|
||||
ck::tensor_operation::device::ConvolutionForwardSpecialization::Default;
|
||||
|
||||
static constexpr auto ConvFwd1x1P0 =
|
||||
ck::tensor_operation::device::ConvolutionForwardSpecialization_t::Filter1x1Pad0;
|
||||
ck::tensor_operation::device::ConvolutionForwardSpecialization::Filter1x1Pad0;
|
||||
|
||||
static constexpr auto ConvFwd1x1S1P0 =
|
||||
ck::tensor_operation::device::ConvolutionForwardSpecialization_t::Filter1x1Stride1Pad0;
|
||||
ck::tensor_operation::device::ConvolutionForwardSpecialization::Filter1x1Stride1Pad0;
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Conv1D
|
||||
|
||||
@@ -17,13 +17,13 @@ using S = ck::Sequence<Is...>;
|
||||
using PassThrough = ck::tensor_operation::element_wise::PassThrough;
|
||||
|
||||
static constexpr auto ConvFwdDefault =
|
||||
ck::tensor_operation::device::ConvolutionForwardSpecialization_t::Default;
|
||||
ck::tensor_operation::device::ConvolutionForwardSpecialization::Default;
|
||||
|
||||
static constexpr auto ConvFwd1x1P0 =
|
||||
ck::tensor_operation::device::ConvolutionForwardSpecialization_t::Filter1x1Pad0;
|
||||
ck::tensor_operation::device::ConvolutionForwardSpecialization::Filter1x1Pad0;
|
||||
|
||||
static constexpr auto ConvFwd1x1S1P0 =
|
||||
ck::tensor_operation::device::ConvolutionForwardSpecialization_t::Filter1x1Stride1Pad0;
|
||||
ck::tensor_operation::device::ConvolutionForwardSpecialization::Filter1x1Stride1Pad0;
|
||||
|
||||
// Compilation parameters for in[n, hi, wi, c] * wei[k, y, x, c] = out[n, ho, wo, k]
|
||||
using device_conv1d_fwd_xdl_nwc_kxc_nwk_int8_instances =
|
||||
|
||||
@@ -17,10 +17,10 @@ using S = ck::Sequence<Is...>;
|
||||
|
||||
using PassThrough = ck::tensor_operation::element_wise::PassThrough;
|
||||
static constexpr auto ConvBwdDataDefault =
|
||||
ck::tensor_operation::device::ConvolutionBackwardDataSpecialization_t::Default;
|
||||
ck::tensor_operation::device::ConvolutionBackwardDataSpecialization::Default;
|
||||
|
||||
static constexpr auto ConvBwdDataFilter1x1Stride1Pad0 =
|
||||
ck::tensor_operation::device::ConvolutionBackwardDataSpecialization_t::Filter1x1Stride1Pad0;
|
||||
ck::tensor_operation::device::ConvolutionBackwardDataSpecialization::Filter1x1Stride1Pad0;
|
||||
|
||||
// Compilation parameters for in[n, hi, wi, c] * wei[k, y, x, c] = out[n, ho, wo, k]
|
||||
using device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk_bf16_instances = std::tuple<
|
||||
|
||||
@@ -17,10 +17,10 @@ using S = ck::Sequence<Is...>;
|
||||
|
||||
using PassThrough = ck::tensor_operation::element_wise::PassThrough;
|
||||
static constexpr auto ConvBwdDataDefault =
|
||||
ck::tensor_operation::device::ConvolutionBackwardDataSpecialization_t::Default;
|
||||
ck::tensor_operation::device::ConvolutionBackwardDataSpecialization::Default;
|
||||
|
||||
static constexpr auto ConvBwdDataFilter1x1Stride1Pad0 =
|
||||
ck::tensor_operation::device::ConvolutionBackwardDataSpecialization_t::Filter1x1Stride1Pad0;
|
||||
ck::tensor_operation::device::ConvolutionBackwardDataSpecialization::Filter1x1Stride1Pad0;
|
||||
|
||||
// Compilation parameters for in[n, hi, wi, c] * wei[k, y, x, c] = out[n, ho, wo, k]
|
||||
using device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk_f16_instances = std::tuple<
|
||||
|
||||
@@ -16,10 +16,10 @@ using S = ck::Sequence<Is...>;
|
||||
|
||||
using PassThrough = ck::tensor_operation::element_wise::PassThrough;
|
||||
static constexpr auto ConvBwdDataDefault =
|
||||
ck::tensor_operation::device::ConvolutionBackwardDataSpecialization_t::Default;
|
||||
ck::tensor_operation::device::ConvolutionBackwardDataSpecialization::Default;
|
||||
|
||||
static constexpr auto ConvBwdDataFilter1x1Stride1Pad0 =
|
||||
ck::tensor_operation::device::ConvolutionBackwardDataSpecialization_t::Filter1x1Stride1Pad0;
|
||||
ck::tensor_operation::device::ConvolutionBackwardDataSpecialization::Filter1x1Stride1Pad0;
|
||||
|
||||
// Compilation parameters for in[n, hi, wi, c] * wei[k, y, x, c] = out[n, ho, wo, k]
|
||||
using device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk_f32_instances = std::tuple<
|
||||
|
||||
@@ -17,10 +17,10 @@ using S = ck::Sequence<Is...>;
|
||||
|
||||
using PassThrough = ck::tensor_operation::element_wise::PassThrough;
|
||||
static constexpr auto ConvBwdDataDefault =
|
||||
ck::tensor_operation::device::ConvolutionBackwardDataSpecialization_t::Default;
|
||||
ck::tensor_operation::device::ConvolutionBackwardDataSpecialization::Default;
|
||||
|
||||
static constexpr auto ConvBwdDataFilter1x1Stride1Pad0 =
|
||||
ck::tensor_operation::device::ConvolutionBackwardDataSpecialization_t::Filter1x1Stride1Pad0;
|
||||
ck::tensor_operation::device::ConvolutionBackwardDataSpecialization::Filter1x1Stride1Pad0;
|
||||
|
||||
// Compilation parameters for in[n, hi, wi, c] * wei[k, y, x, c] = out[n, ho, wo, k]
|
||||
using device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk_int8_instances = std::tuple<
|
||||
|
||||
@@ -18,16 +18,16 @@ using S = ck::Sequence<Is...>;
|
||||
using PassThrough = ck::tensor_operation::element_wise::PassThrough;
|
||||
|
||||
static constexpr auto ConvFwdDefault =
|
||||
ck::tensor_operation::device::ConvolutionForwardSpecialization_t::Default;
|
||||
ck::tensor_operation::device::ConvolutionForwardSpecialization::Default;
|
||||
|
||||
static constexpr auto ConvFwd1x1P0 =
|
||||
ck::tensor_operation::device::ConvolutionForwardSpecialization_t::Filter1x1Pad0;
|
||||
ck::tensor_operation::device::ConvolutionForwardSpecialization::Filter1x1Pad0;
|
||||
|
||||
static constexpr auto ConvFwd1x1S1P0 =
|
||||
ck::tensor_operation::device::ConvolutionForwardSpecialization_t::Filter1x1Stride1Pad0;
|
||||
ck::tensor_operation::device::ConvolutionForwardSpecialization::Filter1x1Stride1Pad0;
|
||||
|
||||
static constexpr auto ConvFwdOddC =
|
||||
ck::tensor_operation::device::ConvolutionForwardSpecialization_t::OddC;
|
||||
ck::tensor_operation::device::ConvolutionForwardSpecialization::OddC;
|
||||
|
||||
// arbitrary conv
|
||||
using device_conv2d_fwd_xdl_c_shuffle_nhwc_kyxc_nhwk_f16_instances = std::tuple<
|
||||
|
||||
@@ -18,13 +18,13 @@ using S = ck::Sequence<Is...>;
|
||||
using PassThrough = ck::tensor_operation::element_wise::PassThrough;
|
||||
|
||||
static constexpr auto ConvFwdDefault =
|
||||
ck::tensor_operation::device::ConvolutionForwardSpecialization_t::Default;
|
||||
ck::tensor_operation::device::ConvolutionForwardSpecialization::Default;
|
||||
|
||||
static constexpr auto ConvFwd1x1P0 =
|
||||
ck::tensor_operation::device::ConvolutionForwardSpecialization_t::Filter1x1Pad0;
|
||||
ck::tensor_operation::device::ConvolutionForwardSpecialization::Filter1x1Pad0;
|
||||
|
||||
static constexpr auto ConvFwd1x1S1P0 =
|
||||
ck::tensor_operation::device::ConvolutionForwardSpecialization_t::Filter1x1Stride1Pad0;
|
||||
ck::tensor_operation::device::ConvolutionForwardSpecialization::Filter1x1Stride1Pad0;
|
||||
|
||||
// Compilation parameters for in[n, hi, wi, c] * wei[k, y, x, c] = out[n, ho, wo, k]
|
||||
using device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_bf16_instances = std::tuple<
|
||||
|
||||
@@ -18,13 +18,13 @@ using S = ck::Sequence<Is...>;
|
||||
using PassThrough = ck::tensor_operation::element_wise::PassThrough;
|
||||
|
||||
static constexpr auto ConvFwdDefault =
|
||||
ck::tensor_operation::device::ConvolutionForwardSpecialization_t::Default;
|
||||
ck::tensor_operation::device::ConvolutionForwardSpecialization::Default;
|
||||
|
||||
static constexpr auto ConvFwd1x1P0 =
|
||||
ck::tensor_operation::device::ConvolutionForwardSpecialization_t::Filter1x1Pad0;
|
||||
ck::tensor_operation::device::ConvolutionForwardSpecialization::Filter1x1Pad0;
|
||||
|
||||
static constexpr auto ConvFwd1x1S1P0 =
|
||||
ck::tensor_operation::device::ConvolutionForwardSpecialization_t::Filter1x1Stride1Pad0;
|
||||
ck::tensor_operation::device::ConvolutionForwardSpecialization::Filter1x1Stride1Pad0;
|
||||
|
||||
// Compilation parameters for in[n, hi, wi, c] * wei[k, y, x, c] = out[n, ho, wo, k]
|
||||
using device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_f16_instances = std::tuple<
|
||||
|
||||
@@ -17,13 +17,13 @@ using S = ck::Sequence<Is...>;
|
||||
using PassThrough = ck::tensor_operation::element_wise::PassThrough;
|
||||
|
||||
static constexpr auto ConvFwdDefault =
|
||||
ck::tensor_operation::device::ConvolutionForwardSpecialization_t::Default;
|
||||
ck::tensor_operation::device::ConvolutionForwardSpecialization::Default;
|
||||
|
||||
static constexpr auto ConvFwd1x1P0 =
|
||||
ck::tensor_operation::device::ConvolutionForwardSpecialization_t::Filter1x1Pad0;
|
||||
ck::tensor_operation::device::ConvolutionForwardSpecialization::Filter1x1Pad0;
|
||||
|
||||
static constexpr auto ConvFwd1x1S1P0 =
|
||||
ck::tensor_operation::device::ConvolutionForwardSpecialization_t::Filter1x1Stride1Pad0;
|
||||
ck::tensor_operation::device::ConvolutionForwardSpecialization::Filter1x1Stride1Pad0;
|
||||
|
||||
// Compilation parameters for in[n, hi, wi, c] * wei[k, y, x, c] = out[n, ho, wo, k]
|
||||
using device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_f32_instances = std::tuple<
|
||||
|
||||
@@ -17,13 +17,13 @@ using S = ck::Sequence<Is...>;
|
||||
using PassThrough = ck::tensor_operation::element_wise::PassThrough;
|
||||
|
||||
static constexpr auto ConvFwdDefault =
|
||||
ck::tensor_operation::device::ConvolutionForwardSpecialization_t::Default;
|
||||
ck::tensor_operation::device::ConvolutionForwardSpecialization::Default;
|
||||
|
||||
static constexpr auto ConvFwd1x1P0 =
|
||||
ck::tensor_operation::device::ConvolutionForwardSpecialization_t::Filter1x1Pad0;
|
||||
ck::tensor_operation::device::ConvolutionForwardSpecialization::Filter1x1Pad0;
|
||||
|
||||
static constexpr auto ConvFwd1x1S1P0 =
|
||||
ck::tensor_operation::device::ConvolutionForwardSpecialization_t::Filter1x1Stride1Pad0;
|
||||
ck::tensor_operation::device::ConvolutionForwardSpecialization::Filter1x1Stride1Pad0;
|
||||
|
||||
// Compilation parameters for in[n, hi, wi, c] * wei[k, y, x, c] = out[n, ho, wo, k]
|
||||
using device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_int8_instances = std::tuple<
|
||||
|
||||
@@ -18,19 +18,19 @@ using S = ck::Sequence<Is...>;
|
||||
using PassThrough = ck::tensor_operation::element_wise::PassThrough;
|
||||
using AddRelu = ck::tensor_operation::element_wise::AddRelu;
|
||||
|
||||
static constexpr auto MemorySet = ck::InMemoryDataOperationEnum_t::Set;
|
||||
static constexpr auto MemorySet = ck::InMemoryDataOperationEnum::Set;
|
||||
|
||||
static constexpr auto ConvFwdDefault =
|
||||
ck::tensor_operation::device::ConvolutionForwardSpecialization_t::Default;
|
||||
ck::tensor_operation::device::ConvolutionForwardSpecialization::Default;
|
||||
|
||||
static constexpr auto ConvFwd1x1P0 =
|
||||
ck::tensor_operation::device::ConvolutionForwardSpecialization_t::Filter1x1Pad0;
|
||||
ck::tensor_operation::device::ConvolutionForwardSpecialization::Filter1x1Pad0;
|
||||
|
||||
static constexpr auto ConvFwd1x1S1P0 =
|
||||
ck::tensor_operation::device::ConvolutionForwardSpecialization_t::Filter1x1Stride1Pad0;
|
||||
ck::tensor_operation::device::ConvolutionForwardSpecialization::Filter1x1Stride1Pad0;
|
||||
|
||||
static constexpr auto ConvFwdOddC =
|
||||
ck::tensor_operation::device::ConvolutionForwardSpecialization_t::OddC;
|
||||
ck::tensor_operation::device::ConvolutionForwardSpecialization::OddC;
|
||||
|
||||
// arbitrary conv
|
||||
using device_conv2d_fwd_xdl_c_shuffle_bias_relu_nhwc_kyxc_nhwk_f16_instances = std::tuple<
|
||||
|
||||
@@ -19,16 +19,16 @@ using PassThrough = ck::tensor_operation::element_wise::PassThrough;
|
||||
using AddReluAdd = ck::tensor_operation::element_wise::AddReluAdd;
|
||||
|
||||
static constexpr auto ConvFwdDefault =
|
||||
ck::tensor_operation::device::ConvolutionForwardSpecialization_t::Default;
|
||||
ck::tensor_operation::device::ConvolutionForwardSpecialization::Default;
|
||||
|
||||
static constexpr auto ConvFwd1x1P0 =
|
||||
ck::tensor_operation::device::ConvolutionForwardSpecialization_t::Filter1x1Pad0;
|
||||
ck::tensor_operation::device::ConvolutionForwardSpecialization::Filter1x1Pad0;
|
||||
|
||||
static constexpr auto ConvFwd1x1S1P0 =
|
||||
ck::tensor_operation::device::ConvolutionForwardSpecialization_t::Filter1x1Stride1Pad0;
|
||||
ck::tensor_operation::device::ConvolutionForwardSpecialization::Filter1x1Stride1Pad0;
|
||||
|
||||
static constexpr auto ConvFwdOddC =
|
||||
ck::tensor_operation::device::ConvolutionForwardSpecialization_t::OddC;
|
||||
ck::tensor_operation::device::ConvolutionForwardSpecialization::OddC;
|
||||
|
||||
// arbitrary conv
|
||||
using device_conv2d_fwd_xdl_c_shuffle_bias_relu_add_nhwc_kyxc_nhwk_f16_instances = std::tuple<
|
||||
|
||||
@@ -18,10 +18,10 @@ using S = ck::Sequence<Is...>;
|
||||
using PassThrough = ck::tensor_operation::element_wise::PassThrough;
|
||||
using AddRelu = ck::tensor_operation::element_wise::AddRelu;
|
||||
|
||||
static constexpr auto InMemoryAtomicAdd = ck::InMemoryDataOperationEnum_t::AtomicAdd;
|
||||
static constexpr auto InMemoryAtomicAdd = ck::InMemoryDataOperationEnum::AtomicAdd;
|
||||
|
||||
static constexpr auto ConvFwdDefault =
|
||||
ck::tensor_operation::device::ConvolutionForwardSpecialization_t::Default;
|
||||
ck::tensor_operation::device::ConvolutionForwardSpecialization::Default;
|
||||
|
||||
using device_conv2d_fwd_xdl_c_shuffle_bias_relu_atomic_add_nhwc_kyxc_nhwk_f16_instances = std::tuple<
|
||||
// clang-format off
|
||||
|
||||
@@ -18,13 +18,13 @@ using S = ck::Sequence<Is...>;
|
||||
using PassThrough = ck::tensor_operation::element_wise::PassThrough;
|
||||
|
||||
static constexpr auto ConvFwdDefault =
|
||||
ck::tensor_operation::device::ConvolutionForwardSpecialization_t::Default;
|
||||
ck::tensor_operation::device::ConvolutionForwardSpecialization::Default;
|
||||
|
||||
static constexpr auto ConvFwd1x1P0 =
|
||||
ck::tensor_operation::device::ConvolutionForwardSpecialization_t::Filter1x1Pad0;
|
||||
ck::tensor_operation::device::ConvolutionForwardSpecialization::Filter1x1Pad0;
|
||||
|
||||
static constexpr auto ConvFwd1x1S1P0 =
|
||||
ck::tensor_operation::device::ConvolutionForwardSpecialization_t::Filter1x1Stride1Pad0;
|
||||
ck::tensor_operation::device::ConvolutionForwardSpecialization::Filter1x1Stride1Pad0;
|
||||
|
||||
// Compilation parameters for in[n, hi, wi, c] * wei[k, y, x, c] = out[n, ho, wo, k]
|
||||
using device_conv3d_fwd_xdl_ndhwc_kzyxc_ndhwk_bf16_instances = std::tuple<
|
||||
|
||||
@@ -18,13 +18,13 @@ using S = ck::Sequence<Is...>;
|
||||
using PassThrough = ck::tensor_operation::element_wise::PassThrough;
|
||||
|
||||
static constexpr auto ConvFwdDefault =
|
||||
ck::tensor_operation::device::ConvolutionForwardSpecialization_t::Default;
|
||||
ck::tensor_operation::device::ConvolutionForwardSpecialization::Default;
|
||||
|
||||
static constexpr auto ConvFwd1x1P0 =
|
||||
ck::tensor_operation::device::ConvolutionForwardSpecialization_t::Filter1x1Pad0;
|
||||
ck::tensor_operation::device::ConvolutionForwardSpecialization::Filter1x1Pad0;
|
||||
|
||||
static constexpr auto ConvFwd1x1S1P0 =
|
||||
ck::tensor_operation::device::ConvolutionForwardSpecialization_t::Filter1x1Stride1Pad0;
|
||||
ck::tensor_operation::device::ConvolutionForwardSpecialization::Filter1x1Stride1Pad0;
|
||||
|
||||
// Compilation parameters for in[n, hi, wi, c] * wei[k, y, x, c] = out[n, ho, wo, k]
|
||||
using device_conv3d_fwd_xdl_ndhwc_kzyxc_ndhwk_f16_instances = std::tuple<
|
||||
|
||||
@@ -17,13 +17,13 @@ using S = ck::Sequence<Is...>;
|
||||
using PassThrough = ck::tensor_operation::element_wise::PassThrough;
|
||||
|
||||
static constexpr auto ConvFwdDefault =
|
||||
ck::tensor_operation::device::ConvolutionForwardSpecialization_t::Default;
|
||||
ck::tensor_operation::device::ConvolutionForwardSpecialization::Default;
|
||||
|
||||
static constexpr auto ConvFwd1x1P0 =
|
||||
ck::tensor_operation::device::ConvolutionForwardSpecialization_t::Filter1x1Pad0;
|
||||
ck::tensor_operation::device::ConvolutionForwardSpecialization::Filter1x1Pad0;
|
||||
|
||||
static constexpr auto ConvFwd1x1S1P0 =
|
||||
ck::tensor_operation::device::ConvolutionForwardSpecialization_t::Filter1x1Stride1Pad0;
|
||||
ck::tensor_operation::device::ConvolutionForwardSpecialization::Filter1x1Stride1Pad0;
|
||||
|
||||
// Compilation parameters for in[n, hi, wi, c] * wei[k, y, x, c] = out[n, ho, wo, k]
|
||||
using device_conv3d_fwd_xdl_ndhwc_kzyxc_ndhwk_f32_instances = std::tuple<
|
||||
|
||||
@@ -17,13 +17,13 @@ using S = ck::Sequence<Is...>;
|
||||
using PassThrough = ck::tensor_operation::element_wise::PassThrough;
|
||||
|
||||
static constexpr auto ConvFwdDefault =
|
||||
ck::tensor_operation::device::ConvolutionForwardSpecialization_t::Default;
|
||||
ck::tensor_operation::device::ConvolutionForwardSpecialization::Default;
|
||||
|
||||
static constexpr auto ConvFwd1x1P0 =
|
||||
ck::tensor_operation::device::ConvolutionForwardSpecialization_t::Filter1x1Pad0;
|
||||
ck::tensor_operation::device::ConvolutionForwardSpecialization::Filter1x1Pad0;
|
||||
|
||||
static constexpr auto ConvFwd1x1S1P0 =
|
||||
ck::tensor_operation::device::ConvolutionForwardSpecialization_t::Filter1x1Stride1Pad0;
|
||||
ck::tensor_operation::device::ConvolutionForwardSpecialization::Filter1x1Stride1Pad0;
|
||||
|
||||
// Compilation parameters for in[n, hi, wi, c] * wei[k, y, x, c] = out[n, ho, wo, k]
|
||||
using device_conv3d_fwd_xdl_ndhwc_kzyxc_ndhwk_int8_instances =
|
||||
|
||||
@@ -9,7 +9,7 @@ namespace tensor_operation {
|
||||
namespace device {
|
||||
namespace device_conv2d_bwd_data_instance {
|
||||
|
||||
using BF16 = ushort;
|
||||
using BF16 = bhalf_t;
|
||||
using F32 = float;
|
||||
|
||||
template <ck::index_t... Is>
|
||||
@@ -17,10 +17,10 @@ using S = ck::Sequence<Is...>;
|
||||
|
||||
using PassThrough = ck::tensor_operation::element_wise::PassThrough;
|
||||
static constexpr auto ConvBwdDataDefault =
|
||||
ck::tensor_operation::device::ConvolutionBackwardDataSpecialization_t::Default;
|
||||
ck::tensor_operation::device::ConvolutionBackwardDataSpecialization::Default;
|
||||
|
||||
static constexpr auto ConvBwdDataFilter1x1Stride1Pad0 =
|
||||
ck::tensor_operation::device::ConvolutionBackwardDataSpecialization_t::Filter1x1Stride1Pad0;
|
||||
ck::tensor_operation::device::ConvolutionBackwardDataSpecialization::Filter1x1Stride1Pad0;
|
||||
|
||||
// Compilation parameters for in[n, hi, wi, c] * wei[k, y, x, c] = out[n, ho, wo, k]
|
||||
using device_conv1d_bwd_data_xdl_nwc_kxc_nwk_bf16_instances =
|
||||
|
||||
@@ -17,10 +17,10 @@ using S = ck::Sequence<Is...>;
|
||||
|
||||
using PassThrough = ck::tensor_operation::element_wise::PassThrough;
|
||||
static constexpr auto ConvBwdDataDefault =
|
||||
ck::tensor_operation::device::ConvolutionBackwardDataSpecialization_t::Default;
|
||||
ck::tensor_operation::device::ConvolutionBackwardDataSpecialization::Default;
|
||||
|
||||
static constexpr auto ConvBwdDataFilter1x1Stride1Pad0 =
|
||||
ck::tensor_operation::device::ConvolutionBackwardDataSpecialization_t::Filter1x1Stride1Pad0;
|
||||
ck::tensor_operation::device::ConvolutionBackwardDataSpecialization::Filter1x1Stride1Pad0;
|
||||
|
||||
// Compilation parameters for in[n, hi, wi, c] * wei[k, y, x, c] = out[n, ho, wo, k]
|
||||
using device_conv1d_bwd_data_xdl_nwc_kxc_nwk_f16_instances =
|
||||
|
||||
@@ -16,10 +16,10 @@ using S = ck::Sequence<Is...>;
|
||||
|
||||
using PassThrough = ck::tensor_operation::element_wise::PassThrough;
|
||||
static constexpr auto ConvBwdDataDefault =
|
||||
ck::tensor_operation::device::ConvolutionBackwardDataSpecialization_t::Default;
|
||||
ck::tensor_operation::device::ConvolutionBackwardDataSpecialization::Default;
|
||||
|
||||
static constexpr auto ConvBwdDataFilter1x1Stride1Pad0 =
|
||||
ck::tensor_operation::device::ConvolutionBackwardDataSpecialization_t::Filter1x1Stride1Pad0;
|
||||
ck::tensor_operation::device::ConvolutionBackwardDataSpecialization::Filter1x1Stride1Pad0;
|
||||
|
||||
// Compilation parameters for in[n, hi, wi, c] * wei[k, y, x, c] = out[n, ho, wo, k]
|
||||
using device_conv1d_bwd_data_xdl_nwc_kxc_nwk_f32_instances =
|
||||
|
||||
@@ -17,10 +17,10 @@ using S = ck::Sequence<Is...>;
|
||||
|
||||
using PassThrough = ck::tensor_operation::element_wise::PassThrough;
|
||||
static constexpr auto ConvBwdDataDefault =
|
||||
ck::tensor_operation::device::ConvolutionBackwardDataSpecialization_t::Default;
|
||||
ck::tensor_operation::device::ConvolutionBackwardDataSpecialization::Default;
|
||||
|
||||
static constexpr auto ConvBwdDataFilter1x1Stride1Pad0 =
|
||||
ck::tensor_operation::device::ConvolutionBackwardDataSpecialization_t::Filter1x1Stride1Pad0;
|
||||
ck::tensor_operation::device::ConvolutionBackwardDataSpecialization::Filter1x1Stride1Pad0;
|
||||
|
||||
// Compilation parameters for in[n, hi, wi, c] * wei[k, y, x, c] = out[n, ho, wo, k]
|
||||
using device_conv1d_bwd_data_xdl_nwc_kxc_nwk_int8_instances =
|
||||
|
||||
@@ -9,7 +9,7 @@ namespace tensor_operation {
|
||||
namespace device {
|
||||
namespace device_conv2d_bwd_data_instance {
|
||||
|
||||
using BF16 = ushort;
|
||||
using BF16 = bhalf_t;
|
||||
using F32 = float;
|
||||
|
||||
template <ck::index_t... Is>
|
||||
@@ -17,10 +17,10 @@ using S = ck::Sequence<Is...>;
|
||||
|
||||
using PassThrough = ck::tensor_operation::element_wise::PassThrough;
|
||||
static constexpr auto ConvBwdDataDefault =
|
||||
ck::tensor_operation::device::ConvolutionBackwardDataSpecialization_t::Default;
|
||||
ck::tensor_operation::device::ConvolutionBackwardDataSpecialization::Default;
|
||||
|
||||
static constexpr auto ConvBwdDataFilter1x1Stride1Pad0 =
|
||||
ck::tensor_operation::device::ConvolutionBackwardDataSpecialization_t::Filter1x1Stride1Pad0;
|
||||
ck::tensor_operation::device::ConvolutionBackwardDataSpecialization::Filter1x1Stride1Pad0;
|
||||
|
||||
// Compilation parameters for in[n, hi, wi, c] * wei[k, y, x, c] = out[n, ho, wo, k]
|
||||
using device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk_bf16_instances =
|
||||
|
||||
@@ -17,10 +17,10 @@ using S = ck::Sequence<Is...>;
|
||||
|
||||
using PassThrough = ck::tensor_operation::element_wise::PassThrough;
|
||||
static constexpr auto ConvBwdDataDefault =
|
||||
ck::tensor_operation::device::ConvolutionBackwardDataSpecialization_t::Default;
|
||||
ck::tensor_operation::device::ConvolutionBackwardDataSpecialization::Default;
|
||||
|
||||
static constexpr auto ConvBwdDataFilter1x1Stride1Pad0 =
|
||||
ck::tensor_operation::device::ConvolutionBackwardDataSpecialization_t::Filter1x1Stride1Pad0;
|
||||
ck::tensor_operation::device::ConvolutionBackwardDataSpecialization::Filter1x1Stride1Pad0;
|
||||
|
||||
// Compilation parameters for in[n, hi, wi, c] * wei[k, y, x, c] = out[n, ho, wo, k]
|
||||
using device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk_f16_instances =
|
||||
|
||||
@@ -16,10 +16,10 @@ using S = ck::Sequence<Is...>;
|
||||
|
||||
using PassThrough = ck::tensor_operation::element_wise::PassThrough;
|
||||
static constexpr auto ConvBwdDataDefault =
|
||||
ck::tensor_operation::device::ConvolutionBackwardDataSpecialization_t::Default;
|
||||
ck::tensor_operation::device::ConvolutionBackwardDataSpecialization::Default;
|
||||
|
||||
static constexpr auto ConvBwdDataFilter1x1Stride1Pad0 =
|
||||
ck::tensor_operation::device::ConvolutionBackwardDataSpecialization_t::Filter1x1Stride1Pad0;
|
||||
ck::tensor_operation::device::ConvolutionBackwardDataSpecialization::Filter1x1Stride1Pad0;
|
||||
|
||||
// Compilation parameters for in[n, hi, wi, c] * wei[k, y, x, c] = out[n, ho, wo, k]
|
||||
using device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk_f32_instances =
|
||||
|
||||
@@ -17,10 +17,10 @@ using S = ck::Sequence<Is...>;
|
||||
|
||||
using PassThrough = ck::tensor_operation::element_wise::PassThrough;
|
||||
static constexpr auto ConvBwdDataDefault =
|
||||
ck::tensor_operation::device::ConvolutionBackwardDataSpecialization_t::Default;
|
||||
ck::tensor_operation::device::ConvolutionBackwardDataSpecialization::Default;
|
||||
|
||||
static constexpr auto ConvBwdDataFilter1x1Stride1Pad0 =
|
||||
ck::tensor_operation::device::ConvolutionBackwardDataSpecialization_t::Filter1x1Stride1Pad0;
|
||||
ck::tensor_operation::device::ConvolutionBackwardDataSpecialization::Filter1x1Stride1Pad0;
|
||||
|
||||
// Compilation parameters for in[n, hi, wi, c] * wei[k, y, x, c] = out[n, ho, wo, k]
|
||||
using device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk_int8_instances =
|
||||
|
||||
@@ -9,7 +9,7 @@ namespace tensor_operation {
|
||||
namespace device {
|
||||
namespace device_conv2d_bwd_data_instance {
|
||||
|
||||
using BF16 = ushort;
|
||||
using BF16 = bhalf_t;
|
||||
using F32 = float;
|
||||
|
||||
template <ck::index_t... Is>
|
||||
@@ -17,10 +17,10 @@ using S = ck::Sequence<Is...>;
|
||||
|
||||
using PassThrough = ck::tensor_operation::element_wise::PassThrough;
|
||||
static constexpr auto ConvBwdDataDefault =
|
||||
ck::tensor_operation::device::ConvolutionBackwardDataSpecialization_t::Default;
|
||||
ck::tensor_operation::device::ConvolutionBackwardDataSpecialization::Default;
|
||||
|
||||
static constexpr auto ConvBwdDataFilter1x1Stride1Pad0 =
|
||||
ck::tensor_operation::device::ConvolutionBackwardDataSpecialization_t::Filter1x1Stride1Pad0;
|
||||
ck::tensor_operation::device::ConvolutionBackwardDataSpecialization::Filter1x1Stride1Pad0;
|
||||
|
||||
// Compilation parameters for in[n, hi, wi, c] * wei[k, y, x, c] = out[n, ho, wo, k]
|
||||
using device_conv3d_bwd_data_xdl_ndhwc_kzyxc_ndhwk_bf16_instances =
|
||||
|
||||
@@ -17,10 +17,10 @@ using S = ck::Sequence<Is...>;
|
||||
|
||||
using PassThrough = ck::tensor_operation::element_wise::PassThrough;
|
||||
static constexpr auto ConvBwdDataDefault =
|
||||
ck::tensor_operation::device::ConvolutionBackwardDataSpecialization_t::Default;
|
||||
ck::tensor_operation::device::ConvolutionBackwardDataSpecialization::Default;
|
||||
|
||||
static constexpr auto ConvBwdDataFilter1x1Stride1Pad0 =
|
||||
ck::tensor_operation::device::ConvolutionBackwardDataSpecialization_t::Filter1x1Stride1Pad0;
|
||||
ck::tensor_operation::device::ConvolutionBackwardDataSpecialization::Filter1x1Stride1Pad0;
|
||||
|
||||
// Compilation parameters for in[n, hi, wi, c] * wei[k, y, x, c] = out[n, ho, wo, k]
|
||||
using device_conv3d_bwd_data_xdl_ndhwc_kzyxc_ndhwk_f16_instances =
|
||||
|
||||
@@ -16,10 +16,10 @@ using S = ck::Sequence<Is...>;
|
||||
|
||||
using PassThrough = ck::tensor_operation::element_wise::PassThrough;
|
||||
static constexpr auto ConvBwdDataDefault =
|
||||
ck::tensor_operation::device::ConvolutionBackwardDataSpecialization_t::Default;
|
||||
ck::tensor_operation::device::ConvolutionBackwardDataSpecialization::Default;
|
||||
|
||||
static constexpr auto ConvBwdDataFilter1x1Stride1Pad0 =
|
||||
ck::tensor_operation::device::ConvolutionBackwardDataSpecialization_t::Filter1x1Stride1Pad0;
|
||||
ck::tensor_operation::device::ConvolutionBackwardDataSpecialization::Filter1x1Stride1Pad0;
|
||||
|
||||
// Compilation parameters for in[n, hi, wi, c] * wei[k, y, x, c] = out[n, ho, wo, k]
|
||||
using device_conv3d_bwd_data_xdl_ndhwc_kzyxc_ndhwk_f32_instances =
|
||||
|
||||
@@ -17,10 +17,10 @@ using S = ck::Sequence<Is...>;
|
||||
|
||||
using PassThrough = ck::tensor_operation::element_wise::PassThrough;
|
||||
static constexpr auto ConvBwdDataDefault =
|
||||
ck::tensor_operation::device::ConvolutionBackwardDataSpecialization_t::Default;
|
||||
ck::tensor_operation::device::ConvolutionBackwardDataSpecialization::Default;
|
||||
|
||||
static constexpr auto ConvBwdDataFilter1x1Stride1Pad0 =
|
||||
ck::tensor_operation::device::ConvolutionBackwardDataSpecialization_t::Filter1x1Stride1Pad0;
|
||||
ck::tensor_operation::device::ConvolutionBackwardDataSpecialization::Filter1x1Stride1Pad0;
|
||||
|
||||
// Compilation parameters for in[n, hi, wi, c] * wei[k, y, x, c] = out[n, ho, wo, k]
|
||||
using device_conv3d_bwd_data_xdl_ndhwc_kzyxc_ndhwk_int8_instances =
|
||||
|
||||
@@ -20,7 +20,7 @@ using S = ck::Sequence<Is...>;
|
||||
|
||||
using PassThrough = ck::tensor_operation::element_wise::PassThrough;
|
||||
|
||||
static constexpr auto GemmDefault = ck::tensor_operation::device::GemmSpecialization_t::Default;
|
||||
static constexpr auto GemmDefault = ck::tensor_operation::device::GemmSpecialization::Default;
|
||||
|
||||
// Compilation parameters for a[k, m] * b[k, n] = c[m, n]
|
||||
using device_gemm_xdl_f16_f16_f16_km_kn_mn_instances =
|
||||
|
||||
@@ -20,7 +20,7 @@ using S = ck::Sequence<Is...>;
|
||||
|
||||
using PassThrough = ck::tensor_operation::element_wise::PassThrough;
|
||||
|
||||
static constexpr auto GemmDefault = ck::tensor_operation::device::GemmSpecialization_t::Default;
|
||||
static constexpr auto GemmDefault = ck::tensor_operation::device::GemmSpecialization::Default;
|
||||
|
||||
// Compilation parameters for a[k, m] * b[n, k] = c[m, n]
|
||||
using device_gemm_xdl_f16_f16_f16_km_nk_mn_instances =
|
||||
|
||||
@@ -20,7 +20,7 @@ using S = ck::Sequence<Is...>;
|
||||
|
||||
using PassThrough = ck::tensor_operation::element_wise::PassThrough;
|
||||
|
||||
static constexpr auto GemmDefault = ck::tensor_operation::device::GemmSpecialization_t::Default;
|
||||
static constexpr auto GemmDefault = ck::tensor_operation::device::GemmSpecialization::Default;
|
||||
|
||||
// Compilation parameters for a[m, k] * b[k, n] = c[m, n]
|
||||
using device_gemm_xdl_f16_f16_f16_mk_kn_mn_instances =
|
||||
|
||||
@@ -20,8 +20,8 @@ using S = ck::Sequence<Is...>;
|
||||
|
||||
using PassThrough = ck::tensor_operation::element_wise::PassThrough;
|
||||
|
||||
static constexpr auto GemmDefault = ck::tensor_operation::device::GemmSpecialization_t::Default;
|
||||
static constexpr auto GemmMNPadding = ck::tensor_operation::device::GemmSpecialization_t::MNPadding;
|
||||
static constexpr auto GemmDefault = ck::tensor_operation::device::GemmSpecialization::Default;
|
||||
static constexpr auto GemmMNPadding = ck::tensor_operation::device::GemmSpecialization::MNPadding;
|
||||
|
||||
// Compilation parameters for a[m, k] * b[n, k] = c[m, n]
|
||||
using device_gemm_xdl_f16_f16_f16_mk_nk_mn_instances =
|
||||
|
||||
@@ -20,7 +20,7 @@ using S = ck::Sequence<Is...>;
|
||||
|
||||
using PassThrough = ck::tensor_operation::element_wise::PassThrough;
|
||||
|
||||
static constexpr auto GemmDefault = ck::tensor_operation::device::GemmSpecialization_t::Default;
|
||||
static constexpr auto GemmDefault = ck::tensor_operation::device::GemmSpecialization::Default;
|
||||
|
||||
// Compilation parameters for a[k, m] * b[k, n] = c[m, n]
|
||||
using device_gemm_xdl_f32_f32_f32_km_kn_mn_instances =
|
||||
|
||||
@@ -20,7 +20,7 @@ using S = ck::Sequence<Is...>;
|
||||
|
||||
using PassThrough = ck::tensor_operation::element_wise::PassThrough;
|
||||
|
||||
static constexpr auto GemmDefault = ck::tensor_operation::device::GemmSpecialization_t::Default;
|
||||
static constexpr auto GemmDefault = ck::tensor_operation::device::GemmSpecialization::Default;
|
||||
|
||||
// Compilation parameters for a[k, m] * b[n, k] = c[m, n]
|
||||
using device_gemm_xdl_f32_f32_f32_km_nk_mn_instances =
|
||||
|
||||
@@ -20,7 +20,7 @@ using S = ck::Sequence<Is...>;
|
||||
|
||||
using PassThrough = ck::tensor_operation::element_wise::PassThrough;
|
||||
|
||||
static constexpr auto GemmDefault = ck::tensor_operation::device::GemmSpecialization_t::Default;
|
||||
static constexpr auto GemmDefault = ck::tensor_operation::device::GemmSpecialization::Default;
|
||||
|
||||
// Compilation parameters for a[m, k] * b[k, n] = c[m, n]
|
||||
using device_gemm_xdl_f32_f32_f32_mk_kn_mn_instances =
|
||||
|
||||
@@ -20,7 +20,7 @@ using S = ck::Sequence<Is...>;
|
||||
|
||||
using PassThrough = ck::tensor_operation::element_wise::PassThrough;
|
||||
|
||||
static constexpr auto GemmDefault = ck::tensor_operation::device::GemmSpecialization_t::Default;
|
||||
static constexpr auto GemmDefault = ck::tensor_operation::device::GemmSpecialization::Default;
|
||||
|
||||
// Compilation parameters for a[m, k] * b[n, k] = c[m, n]
|
||||
using device_gemm_xdl_f32_f32_f32_mk_nk_mn_instances =
|
||||
|
||||
@@ -20,7 +20,7 @@ using S = ck::Sequence<Is...>;
|
||||
|
||||
using PassThrough = ck::tensor_operation::element_wise::PassThrough;
|
||||
|
||||
static constexpr auto GemmDefault = ck::tensor_operation::device::GemmSpecialization_t::Default;
|
||||
static constexpr auto GemmDefault = ck::tensor_operation::device::GemmSpecialization::Default;
|
||||
|
||||
// Compilation parameters for a[m, k] * b[k, n] = c[m, n]
|
||||
using device_gemm_xdl_splitk_f16_f16_f16_km_kn_mn_instances = std::tuple<
|
||||
|
||||
@@ -20,7 +20,7 @@ using S = ck::Sequence<Is...>;
|
||||
|
||||
using PassThrough = ck::tensor_operation::element_wise::PassThrough;
|
||||
|
||||
static constexpr auto GemmDefault = ck::tensor_operation::device::GemmSpecialization_t::Default;
|
||||
static constexpr auto GemmDefault = ck::tensor_operation::device::GemmSpecialization::Default;
|
||||
|
||||
// Compilation parameters for a[m, k] * b[k, n] = c[m, n]
|
||||
using device_gemm_xdl_splitk_f16_f16_f16_km_nk_mn_instances = std::tuple<
|
||||
|
||||
@@ -20,7 +20,7 @@ using S = ck::Sequence<Is...>;
|
||||
|
||||
using PassThrough = ck::tensor_operation::element_wise::PassThrough;
|
||||
|
||||
static constexpr auto GemmDefault = ck::tensor_operation::device::GemmSpecialization_t::Default;
|
||||
static constexpr auto GemmDefault = ck::tensor_operation::device::GemmSpecialization::Default;
|
||||
|
||||
// Compilation parameters for a[m, k] * b[k, n] = c[m, n]
|
||||
using device_gemm_xdl_splitk_f16_f16_f16_mk_kn_mn_instances = std::tuple<
|
||||
|
||||
@@ -20,7 +20,7 @@ using S = ck::Sequence<Is...>;
|
||||
|
||||
using PassThrough = ck::tensor_operation::element_wise::PassThrough;
|
||||
|
||||
static constexpr auto GemmDefault = ck::tensor_operation::device::GemmSpecialization_t::Default;
|
||||
static constexpr auto GemmDefault = ck::tensor_operation::device::GemmSpecialization::Default;
|
||||
|
||||
// Compilation parameters for a[m, k] * b[k, n] = c[m, n]
|
||||
using device_gemm_xdl_splitk_f16_f16_f16_mk_nk_mn_instances = std::tuple<
|
||||
|
||||
@@ -20,7 +20,7 @@ using S = ck::Sequence<Is...>;
|
||||
|
||||
using PassThrough = ck::tensor_operation::element_wise::PassThrough;
|
||||
|
||||
static constexpr auto GemmDefault = ck::tensor_operation::device::GemmSpecialization_t::Default;
|
||||
static constexpr auto GemmDefault = ck::tensor_operation::device::GemmSpecialization::Default;
|
||||
|
||||
// Compilation parameters for a[k, m] * b[k, n] = c[m, n]
|
||||
using device_gemm_xdl_splitk_f32_f32_f32_km_kn_mn_instances = std::tuple<
|
||||
|
||||
@@ -20,7 +20,7 @@ using S = ck::Sequence<Is...>;
|
||||
|
||||
using PassThrough = ck::tensor_operation::element_wise::PassThrough;
|
||||
|
||||
static constexpr auto GemmDefault = ck::tensor_operation::device::GemmSpecialization_t::Default;
|
||||
static constexpr auto GemmDefault = ck::tensor_operation::device::GemmSpecialization::Default;
|
||||
|
||||
// Compilation parameters for a[k, m] * b[n, k] = c[m, n]
|
||||
using device_gemm_xdl_splitk_f32_f32_f32_km_nk_mn_instances = std::tuple<
|
||||
|
||||
@@ -20,7 +20,7 @@ using S = ck::Sequence<Is...>;
|
||||
|
||||
using PassThrough = ck::tensor_operation::element_wise::PassThrough;
|
||||
|
||||
static constexpr auto GemmMNPadding = ck::tensor_operation::device::GemmSpecialization_t::MNPadding;
|
||||
static constexpr auto GemmMNPadding = ck::tensor_operation::device::GemmSpecialization::MNPadding;
|
||||
|
||||
// Compilation parameters for a[m, k] * b[k, n] = c[m, n]
|
||||
using device_gemm_xdl_splitk_f32_f32_f32_mk_kn_mn_instances = std::tuple<
|
||||
|
||||
@@ -20,7 +20,7 @@ using S = ck::Sequence<Is...>;
|
||||
|
||||
using PassThrough = ck::tensor_operation::element_wise::PassThrough;
|
||||
|
||||
static constexpr auto GemmDefault = ck::tensor_operation::device::GemmSpecialization_t::Default;
|
||||
static constexpr auto GemmDefault = ck::tensor_operation::device::GemmSpecialization::Default;
|
||||
|
||||
// Compilation parameters for a[m, k] * b[n, k] = c[m, n]
|
||||
using device_gemm_xdl_splitk_f32_f32_f32_mk_nk_mn_instances = std::tuple<
|
||||
|
||||
@@ -23,7 +23,7 @@ using PassThrough = ck::tensor_operation::element_wise::PassThrough;
|
||||
using ReduceSum = ck::tensor_operation::element_wise::ReduceSum;
|
||||
using ReduceSquareSum = ck::tensor_operation::element_wise::ReduceSquareSum;
|
||||
|
||||
static constexpr auto GemmDefault = ck::tensor_operation::device::GemmSpecialization_t::Default;
|
||||
static constexpr auto GemmDefault = ck::tensor_operation::device::GemmSpecialization::Default;
|
||||
|
||||
// c[m, n] = a[k, m] * b[k, n]
|
||||
// d0[m] = reduce0(c[m, n])
|
||||
|
||||
@@ -23,7 +23,7 @@ using PassThrough = ck::tensor_operation::element_wise::PassThrough;
|
||||
using ReduceSum = ck::tensor_operation::element_wise::ReduceSum;
|
||||
using ReduceSquareSum = ck::tensor_operation::element_wise::ReduceSquareSum;
|
||||
|
||||
static constexpr auto GemmDefault = ck::tensor_operation::device::GemmSpecialization_t::Default;
|
||||
static constexpr auto GemmDefault = ck::tensor_operation::device::GemmSpecialization::Default;
|
||||
|
||||
// c[m, n] = a[k, m] * b[n, k]
|
||||
// d0[m] = reduce0(c[m, n])
|
||||
|
||||
@@ -23,7 +23,7 @@ using PassThrough = ck::tensor_operation::element_wise::PassThrough;
|
||||
using ReduceSum = ck::tensor_operation::element_wise::ReduceSum;
|
||||
using ReduceSquareSum = ck::tensor_operation::element_wise::ReduceSquareSum;
|
||||
|
||||
static constexpr auto GemmDefault = ck::tensor_operation::device::GemmSpecialization_t::Default;
|
||||
static constexpr auto GemmDefault = ck::tensor_operation::device::GemmSpecialization::Default;
|
||||
|
||||
// c[m, n] = a[m, k] * b[n, k]
|
||||
// d0[m] = reduce0(c[m, n])
|
||||
|
||||
@@ -23,7 +23,7 @@ using PassThrough = ck::tensor_operation::element_wise::PassThrough;
|
||||
using ReduceSum = ck::tensor_operation::element_wise::ReduceSum;
|
||||
using ReduceSquareSum = ck::tensor_operation::element_wise::ReduceSquareSum;
|
||||
|
||||
static constexpr auto GemmDefault = ck::tensor_operation::device::GemmSpecialization_t::Default;
|
||||
static constexpr auto GemmDefault = ck::tensor_operation::device::GemmSpecialization::Default;
|
||||
|
||||
// c[m, n] = a[m, k] * b[n, k]
|
||||
// d0[m] = reduce0(c[m, n])
|
||||
|
||||
@@ -20,7 +20,7 @@ using S = ck::Sequence<Is...>;
|
||||
|
||||
using PassThrough = ck::tensor_operation::element_wise::PassThrough;
|
||||
|
||||
static constexpr auto GemmDefault = ck::tensor_operation::device::GemmSpecialization_t::Default;
|
||||
static constexpr auto GemmDefault = ck::tensor_operation::device::GemmSpecialization::Default;
|
||||
|
||||
// Compilation parameters for a[k, m] * b[k, n] = c[m, n]
|
||||
using device_grouped_gemm_xdl_f16_f16_f16_km_kn_mn_instances = std::tuple<
|
||||
|
||||
@@ -20,7 +20,7 @@ using S = ck::Sequence<Is...>;
|
||||
|
||||
using PassThrough = ck::tensor_operation::element_wise::PassThrough;
|
||||
|
||||
static constexpr auto GemmDefault = ck::tensor_operation::device::GemmSpecialization_t::Default;
|
||||
static constexpr auto GemmDefault = ck::tensor_operation::device::GemmSpecialization::Default;
|
||||
|
||||
// Compilation parameters for a[k, m] * b[n, k] = c[m, n]
|
||||
using device_grouped_gemm_xdl_f16_f16_f16_km_nk_mn_instances = std::tuple<
|
||||
|
||||
@@ -20,7 +20,7 @@ using S = ck::Sequence<Is...>;
|
||||
|
||||
using PassThrough = ck::tensor_operation::element_wise::PassThrough;
|
||||
|
||||
static constexpr auto GemmDefault = ck::tensor_operation::device::GemmSpecialization_t::Default;
|
||||
static constexpr auto GemmDefault = ck::tensor_operation::device::GemmSpecialization::Default;
|
||||
|
||||
// Compilation parameters for a[m, k] * b[k, n] = c[m, n]
|
||||
using device_grouped_gemm_xdl_f16_f16_f16_mk_kn_mn_instances = std::tuple<
|
||||
|
||||
@@ -20,8 +20,8 @@ using S = ck::Sequence<Is...>;
|
||||
|
||||
using PassThrough = ck::tensor_operation::element_wise::PassThrough;
|
||||
|
||||
static constexpr auto GemmDefault = ck::tensor_operation::device::GemmSpecialization_t::Default;
|
||||
static constexpr auto GemmMNPadding = ck::tensor_operation::device::GemmSpecialization_t::MNPadding;
|
||||
static constexpr auto GemmDefault = ck::tensor_operation::device::GemmSpecialization::Default;
|
||||
static constexpr auto GemmMNPadding = ck::tensor_operation::device::GemmSpecialization::MNPadding;
|
||||
|
||||
// Compilation parameters for a[m, k] * b[n, k] = c[m, n]
|
||||
using device_grouped_gemm_xdl_f16_f16_f16_mk_nk_mn_instances = std::tuple<
|
||||
|
||||
Reference in New Issue
Block a user