mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-06-30 19:57:40 +00:00
Added working fp16 and int8 instances.
This commit is contained in:
@@ -116,7 +116,7 @@ bool profile_grouped_conv_fwd_impl(int do_verification,
|
||||
// get device op instances
|
||||
const auto ops = ck_tile::ops::DeviceOperationInstanceFactory<DeviceOp>::GetInstances();
|
||||
|
||||
std::cout << "found " << ops.size() << " instances" << std::endl;
|
||||
std::cout << "Found " << ops.size() << " instances\n" << std::endl;
|
||||
|
||||
std::string best_op_name;
|
||||
float best_avg_time = 0;
|
||||
@@ -149,13 +149,14 @@ bool profile_grouped_conv_fwd_impl(int do_verification,
|
||||
if(op->IsSupportedArgument(args))
|
||||
{
|
||||
num_kernel++;
|
||||
std::string op_name = op->GetName(args);
|
||||
if((instance_index != -1) && (instance_index + 1 != num_kernel))
|
||||
{
|
||||
// skip test if instance_index is specified
|
||||
continue;
|
||||
}
|
||||
|
||||
std::string op_name = op->GetName(args);
|
||||
|
||||
std::cout << op_name << " is profiled..." << std::endl;
|
||||
|
||||
// Run verification first. If it doesn't pass, no need to do performance measurement.
|
||||
@@ -228,7 +229,7 @@ bool profile_grouped_conv_fwd_impl(int do_verification,
|
||||
}
|
||||
else
|
||||
{
|
||||
// std::cout << op->GetName(args) << " does not support this problem." << std::endl;
|
||||
std::cout << op->GetName(args) << " does not support this problem." << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -32,7 +32,9 @@ using tile_grouped_conv_fwd_fp16_instances = std::tuple<
|
||||
//##############################| Num| InLayout| WeiLayout| OutLayout| InData| WeiData| OutData| In| Wei| Out| Conv| K-block| M-tile| N-tile | K-tile | M-warp| N-warp| K-warp| M-warp| N-warp| K-warp| Vector| Vector| Vector| Double| GEMM|
|
||||
//##############################| Dim| | | | Type| Type| Type| Elementwise| Elementwise| Elementwise| Spec| per| | | | | | | tile| tile| tile| size| size| size| smem| pipeline|
|
||||
//##############################| Spatial| | | | | | | Operation| Operation| Operation| | CU| | | | | | | size| size| size| A| B| C| buffer| version|
|
||||
GroupedConvolutionForwardInvoker<NDimSpatial, ALayout, BLayout, ELayout, F16, F16, F16, PassThrough, PassThrough, PassThrough, ConvolutionSpecialization::Filter1x1Stride1Pad0, 1, 64, 64, 32, 1, 1, 1, 32, 32, 16, 1, 1, 1, false, CK_TILE_PIPELINE_MEMORY>
|
||||
GroupedConvolutionForwardInvoker<NDimSpatial, ALayout, BLayout, ELayout, F16, F16, F16, PassThrough, PassThrough, PassThrough, ConvolutionSpecialization::Filter1x1Stride1Pad0, 1, 64, 64, 32, 1, 1, 1, 32, 32, 16, 1, 1, 1, false, CK_TILE_PIPELINE_MEMORY>,
|
||||
GroupedConvolutionForwardInvoker<NDimSpatial, ALayout, BLayout, ELayout, F16, F16, F16, PassThrough, PassThrough, PassThrough, ConvolutionSpecialization::Default, 1, 64, 64, 32, 1, 1, 1, 32, 32, 16, 1, 1, 1, false, CK_TILE_PIPELINE_MEMORY>
|
||||
// clang-format on
|
||||
// clang-format on
|
||||
>;
|
||||
|
||||
|
||||
@@ -22,6 +22,8 @@ using DeviceOpFwd2DINT8 = GroupedConvolutionForwardBaseInvoker<2,
|
||||
PassThrough,
|
||||
INT8,
|
||||
INT8>;
|
||||
|
||||
//using enum ConvSpecDef = ConvolutionSpecialization::Default;
|
||||
|
||||
template <ck_tile::index_t NDimSpatial,
|
||||
typename ALayout,
|
||||
@@ -32,7 +34,139 @@ using tile_grouped_conv_fwd_int8_instances = std::tuple<
|
||||
//##############################| Num| InLayout| WeiLayout| OutLayout| InData| WeiData| OutData| In| Wei| Out| Conv| K-block| M-tile| N-tile | K-tile | M-warp| N-warp| K-warp| M-warp| N-warp| K-warp| Vector| Vector| Vector| Double| GEMM|
|
||||
//##############################| Dim| | | | Type| Type| Type| Elementwise| Elementwise| Elementwise| Spec| per| | | | | | | tile| tile| tile| size| size| size| smem| pipeline|
|
||||
//##############################| Spatial| | | | | | | Operation| Operation| Operation| | CU| | | | | | | size| size| size| A| B| C| buffer| version|
|
||||
GroupedConvolutionForwardInvoker<NDimSpatial, ALayout, BLayout, ELayout, INT8, INT8, INT8, PassThrough, PassThrough, PassThrough, ConvolutionSpecialization::Filter1x1Stride1Pad0, 1, 64, 64, 32, 1, 1, 1, 32, 32, 16, 1, 1, 1, false, CK_TILE_PIPELINE_MEMORY>
|
||||
GroupedConvolutionForwardInvoker<NDimSpatial, ALayout, BLayout, ELayout, INT8, INT8, INT8, PassThrough, PassThrough, PassThrough, ConvolutionSpecialization::Filter1x1Stride1Pad0, 1, 64, 64, 32, 1, 1, 1, 32, 32, 16, 1, 1, 1, false, CK_TILE_PIPELINE_MEMORY>,
|
||||
GroupedConvolutionForwardInvoker<NDimSpatial, ALayout, BLayout, ELayout, INT8, INT8, INT8, PassThrough, PassThrough, PassThrough, ConvolutionSpecialization::Default, 1, 64, 64, 32, 1, 1, 1, 32, 32, 16, 1, 1, 1, false, CK_TILE_PIPELINE_MEMORY>
|
||||
|
||||
// Converted from device_grouped_conv_fwd_xdl_int8_comp_instances_2x
|
||||
// GroupedConvolutionForwardInvoker<NDimSpatial, ALayout, BLayout, ELayout, INT8, INT8, INT8, PassThrough, PassThrough, PassThrough, ConvolutionSpecialization::Filter1x1Stride1Pad0, 1, 128, 128, 128, 2, 2, 1, 32, 32, 32, 16, 16, 16, false, CK_TILE_PIPELINE_MEMORY>,
|
||||
// GroupedConvolutionForwardInvoker<NDimSpatial, ALayout, BLayout, ELayout, INT8, INT8, INT8, PassThrough, PassThrough, PassThrough, ConvolutionSpecialization::Filter1x1Stride1Pad0, 1, 128, 128, 128, 2, 2, 1, 32, 32, 32, 16, 16, 16, false, CK_TILE_PIPELINE_COMPUTE_V3>,
|
||||
// GroupedConvolutionForwardInvoker<NDimSpatial, ALayout, BLayout, ELayout, INT8, INT8, INT8, PassThrough, PassThrough, PassThrough, ConvolutionSpecialization::Filter1x1Stride1Pad0, 1, 128, 128, 128, 2, 2, 1, 32, 32, 32, 16, 16, 16, true, CK_TILE_PIPELINE_COMPUTE_V4>,
|
||||
// GroupedConvolutionForwardInvoker<NDimSpatial, ALayout, BLayout, ELayout, INT8, INT8, INT8, PassThrough, PassThrough, PassThrough, ConvolutionSpecialization::Filter1x1Pad0, 1, 128, 128, 128, 2, 2, 1, 32, 32, 32, 16, 16, 16, false, CK_TILE_PIPELINE_MEMORY>,
|
||||
// GroupedConvolutionForwardInvoker<NDimSpatial, ALayout, BLayout, ELayout, INT8, INT8, INT8, PassThrough, PassThrough, PassThrough, ConvolutionSpecialization::Filter1x1Pad0, 1, 128, 128, 128, 2, 2, 1, 32, 32, 32, 16, 16, 16, false, CK_TILE_PIPELINE_COMPUTE_V3>,
|
||||
// GroupedConvolutionForwardInvoker<NDimSpatial, ALayout, BLayout, ELayout, INT8, INT8, INT8, PassThrough, PassThrough, PassThrough, ConvolutionSpecialization::Filter1x1Pad0, 1, 128, 128, 128, 2, 2, 1, 32, 32, 32, 16, 16, 16, true, CK_TILE_PIPELINE_COMPUTE_V4>,
|
||||
// GroupedConvolutionForwardInvoker<NDimSpatial, ALayout, BLayout, ELayout, INT8, INT8, INT8, PassThrough, PassThrough, PassThrough, ConvolutionSpecialization::Filter3x3, 1, 128, 128, 128, 2, 2, 1, 32, 32, 32, 16, 16, 16, false, CK_TILE_PIPELINE_MEMORY>,
|
||||
// GroupedConvolutionForwardInvoker<NDimSpatial, ALayout, BLayout, ELayout, INT8, INT8, INT8, PassThrough, PassThrough, PassThrough, ConvolutionSpecialization::Filter3x3, 1, 128, 128, 128, 2, 2, 1, 32, 32, 32, 16, 16, 16, false, CK_TILE_PIPELINE_COMPUTE_V3>,
|
||||
// GroupedConvolutionForwardInvoker<NDimSpatial, ALayout, BLayout, ELayout, INT8, INT8, INT8, PassThrough, PassThrough, PassThrough, ConvolutionSpecialization::Filter3x3, 1, 128, 128, 128, 2, 2, 1, 32, 32, 32, 16, 16, 16, true, CK_TILE_PIPELINE_COMPUTE_V4>,
|
||||
// GroupedConvolutionForwardInvoker<NDimSpatial, ALayout, BLayout, ELayout, INT8, INT8, INT8, PassThrough, PassThrough, PassThrough, ConvolutionSpecialization::Default, 1, 128, 128, 128, 2, 2, 1, 32, 32, 32, 16, 16, 16, false, CK_TILE_PIPELINE_MEMORY>,
|
||||
// GroupedConvolutionForwardInvoker<NDimSpatial, ALayout, BLayout, ELayout, INT8, INT8, INT8, PassThrough, PassThrough, PassThrough, ConvolutionSpecialization::Default, 1, 128, 128, 128, 2, 2, 1, 32, 32, 32, 16, 16, 16, false, CK_TILE_PIPELINE_COMPUTE_V3>,
|
||||
// GroupedConvolutionForwardInvoker<NDimSpatial, ALayout, BLayout, ELayout, INT8, INT8, INT8, PassThrough, PassThrough, PassThrough, ConvolutionSpecialization::Default, 1, 128, 128, 128, 2, 2, 1, 32, 32, 32, 16, 16, 16, true, CK_TILE_PIPELINE_COMPUTE_V4>,
|
||||
// Converted from device_grouped_conv_fwd_xdl_int8_comp_instances
|
||||
// GroupedConvolutionForwardInvoker<NDimSpatial, ALayout, BLayout, ELayout, INT8, INT8, INT8, PassThrough, PassThrough, PassThrough, ConvolutionSpecialization::Filter1x1Stride1Pad0, 1, 128, 128, 32, 2, 2, 1, 32, 32, 32, 8, 8, 8, false, CK_TILE_PIPELINE_MEMORY>,
|
||||
// GroupedConvolutionForwardInvoker<NDimSpatial, ALayout, BLayout, ELayout, INT8, INT8, INT8, PassThrough, PassThrough, PassThrough, ConvolutionSpecialization::Filter1x1Stride1Pad0, 1, 128, 128, 32, 2, 2, 1, 32, 32, 32, 8, 8, 8, false, CK_TILE_PIPELINE_COMPUTE_V3>,
|
||||
// GroupedConvolutionForwardInvoker<NDimSpatial, ALayout, BLayout, ELayout, INT8, INT8, INT8, PassThrough, PassThrough, PassThrough, ConvolutionSpecialization::Filter1x1Stride1Pad0, 1, 128, 128, 32, 2, 2, 1, 32, 32, 32, 8, 8, 8, true, CK_TILE_PIPELINE_COMPUTE_V4>,
|
||||
// GroupedConvolutionForwardInvoker<NDimSpatial, ALayout, BLayout, ELayout, INT8, INT8, INT8, PassThrough, PassThrough, PassThrough, ConvolutionSpecialization::Filter1x1Pad0, 1, 128, 128, 32, 2, 2, 1, 32, 32, 32, 8, 8, 8, false, CK_TILE_PIPELINE_MEMORY>,
|
||||
// GroupedConvolutionForwardInvoker<NDimSpatial, ALayout, BLayout, ELayout, INT8, INT8, INT8, PassThrough, PassThrough, PassThrough, ConvolutionSpecialization::Filter1x1Pad0, 1, 128, 128, 32, 2, 2, 1, 32, 32, 32, 8, 8, 8, false, CK_TILE_PIPELINE_COMPUTE_V3>,
|
||||
// GroupedConvolutionForwardInvoker<NDimSpatial, ALayout, BLayout, ELayout, INT8, INT8, INT8, PassThrough, PassThrough, PassThrough, ConvolutionSpecialization::Filter1x1Pad0, 1, 128, 128, 32, 2, 2, 1, 32, 32, 32, 8, 8, 8, true, CK_TILE_PIPELINE_COMPUTE_V4>,
|
||||
// GroupedConvolutionForwardInvoker<NDimSpatial, ALayout, BLayout, ELayout, INT8, INT8, INT8, PassThrough, PassThrough, PassThrough, ConvolutionSpecialization::Filter3x3, 1, 128, 128, 32, 2, 2, 1, 32, 32, 32, 8, 8, 8, false, CK_TILE_PIPELINE_MEMORY>,
|
||||
// GroupedConvolutionForwardInvoker<NDimSpatial, ALayout, BLayout, ELayout, INT8, INT8, INT8, PassThrough, PassThrough, PassThrough, ConvolutionSpecialization::Filter3x3, 1, 128, 128, 32, 2, 2, 1, 32, 32, 32, 8, 8, 8, false, CK_TILE_PIPELINE_COMPUTE_V3>,
|
||||
// GroupedConvolutionForwardInvoker<NDimSpatial, ALayout, BLayout, ELayout, INT8, INT8, INT8, PassThrough, PassThrough, PassThrough, ConvolutionSpecialization::Filter3x3, 1, 128, 128, 32, 2, 2, 1, 32, 32, 32, 8, 8, 8, true, CK_TILE_PIPELINE_COMPUTE_V4>,
|
||||
// GroupedConvolutionForwardInvoker<NDimSpatial, ALayout, BLayout, ELayout, INT8, INT8, INT8, PassThrough, PassThrough, PassThrough, ConvolutionSpecialization::Default, 1, 128, 128, 32, 2, 2, 1, 32, 32, 32, 8, 8, 8, false, CK_TILE_PIPELINE_MEMORY>,
|
||||
// GroupedConvolutionForwardInvoker<NDimSpatial, ALayout, BLayout, ELayout, INT8, INT8, INT8, PassThrough, PassThrough, PassThrough, ConvolutionSpecialization::Default, 1, 128, 128, 32, 2, 2, 1, 32, 32, 32, 8, 8, 8, false, CK_TILE_PIPELINE_COMPUTE_V3>,
|
||||
// GroupedConvolutionForwardInvoker<NDimSpatial, ALayout, BLayout, ELayout, INT8, INT8, INT8, PassThrough, PassThrough, PassThrough, ConvolutionSpecialization::Default, 1, 128, 128, 32, 2, 2, 1, 32, 32, 32, 8, 8, 8, true, CK_TILE_PIPELINE_COMPUTE_V4>,
|
||||
// Converted from device_grouped_conv_fwd_xdl_int8_comp_instances_part2 - Instance 1
|
||||
// GroupedConvolutionForwardInvoker<NDimSpatial, ALayout, BLayout, ELayout, INT8, INT8, INT8, PassThrough, PassThrough, PassThrough, ConvolutionSpecialization::Filter1x1Stride1Pad0, 1, 256, 256, 32, 4, 4, 1, 32, 32, 32, 8, 8, 8, false, CK_TILE_PIPELINE_MEMORY>,
|
||||
// GroupedConvolutionForwardInvoker<NDimSpatial, ALayout, BLayout, ELayout, INT8, INT8, INT8, PassThrough, PassThrough, PassThrough, ConvolutionSpecialization::Filter1x1Stride1Pad0, 1, 256, 256, 32, 4, 4, 1, 32, 32, 32, 8, 8, 8, false, CK_TILE_PIPELINE_COMPUTE_V3>,
|
||||
// GroupedConvolutionForwardInvoker<NDimSpatial, ALayout, BLayout, ELayout, INT8, INT8, INT8, PassThrough, PassThrough, PassThrough, ConvolutionSpecialization::Filter1x1Stride1Pad0, 1, 256, 256, 32, 4, 4, 1, 32, 32, 32, 8, 8, 8, true, CK_TILE_PIPELINE_COMPUTE_V4>,
|
||||
// GroupedConvolutionForwardInvoker<NDimSpatial, ALayout, BLayout, ELayout, INT8, INT8, INT8, PassThrough, PassThrough, PassThrough, ConvolutionSpecialization::Filter1x1Pad0, 1, 256, 256, 32, 4, 4, 1, 32, 32, 32, 8, 8, 8, false, CK_TILE_PIPELINE_MEMORY>,
|
||||
// GroupedConvolutionForwardInvoker<NDimSpatial, ALayout, BLayout, ELayout, INT8, INT8, INT8, PassThrough, PassThrough, PassThrough, ConvolutionSpecialization::Filter1x1Pad0, 1, 256, 256, 32, 4, 4, 1, 32, 32, 32, 8, 8, 8, false, CK_TILE_PIPELINE_COMPUTE_V3>,
|
||||
// GroupedConvolutionForwardInvoker<NDimSpatial, ALayout, BLayout, ELayout, INT8, INT8, INT8, PassThrough, PassThrough, PassThrough, ConvolutionSpecialization::Filter1x1Pad0, 1, 256, 256, 32, 4, 4, 1, 32, 32, 32, 8, 8, 8, true, CK_TILE_PIPELINE_COMPUTE_V4>,
|
||||
// GroupedConvolutionForwardInvoker<NDimSpatial, ALayout, BLayout, ELayout, INT8, INT8, INT8, PassThrough, PassThrough, PassThrough, ConvolutionSpecialization::Filter3x3, 1, 256, 256, 32, 4, 4, 1, 32, 32, 32, 8, 8, 8, false, CK_TILE_PIPELINE_MEMORY>,
|
||||
// GroupedConvolutionForwardInvoker<NDimSpatial, ALayout, BLayout, ELayout, INT8, INT8, INT8, PassThrough, PassThrough, PassThrough, ConvolutionSpecialization::Filter3x3, 1, 256, 256, 32, 4, 4, 1, 32, 32, 32, 8, 8, 8, false, CK_TILE_PIPELINE_COMPUTE_V3>,
|
||||
// GroupedConvolutionForwardInvoker<NDimSpatial, ALayout, BLayout, ELayout, INT8, INT8, INT8, PassThrough, PassThrough, PassThrough, ConvolutionSpecialization::Filter3x3, 1, 256, 256, 32, 4, 4, 1, 32, 32, 32, 8, 8, 8, true, CK_TILE_PIPELINE_COMPUTE_V4>,
|
||||
// GroupedConvolutionForwardInvoker<NDimSpatial, ALayout, BLayout, ELayout, INT8, INT8, INT8, PassThrough, PassThrough, PassThrough, ConvolutionSpecialization::Default, 1, 256, 256, 32, 4, 4, 1, 32, 32, 32, 8, 8, 8, false, CK_TILE_PIPELINE_MEMORY>,
|
||||
// GroupedConvolutionForwardInvoker<NDimSpatial, ALayout, BLayout, ELayout, INT8, INT8, INT8, PassThrough, PassThrough, PassThrough, ConvolutionSpecialization::Default, 1, 256, 256, 32, 4, 4, 1, 32, 32, 32, 8, 8, 8, false, CK_TILE_PIPELINE_COMPUTE_V3>,
|
||||
// GroupedConvolutionForwardInvoker<NDimSpatial, ALayout, BLayout, ELayout, INT8, INT8, INT8, PassThrough, PassThrough, PassThrough, ConvolutionSpecialization::Default, 1, 256, 256, 32, 4, 4, 1, 32, 32, 32, 8, 8, 8, true, CK_TILE_PIPELINE_COMPUTE_V4>,
|
||||
// Converted from device_grouped_conv_fwd_xdl_int8_comp_instances_part2 - Instance 2
|
||||
// GroupedConvolutionForwardInvoker<NDimSpatial, ALayout, BLayout, ELayout, INT8, INT8, INT8, PassThrough, PassThrough, PassThrough, ConvolutionSpecialization::Filter1x1Stride1Pad0, 1, 256, 256, 32, 4, 4, 1, 32, 32, 32, 8, 8, 8, false, CK_TILE_PIPELINE_MEMORY>,
|
||||
// GroupedConvolutionForwardInvoker<NDimSpatial, ALayout, BLayout, ELayout, INT8, INT8, INT8, PassThrough, PassThrough, PassThrough, ConvolutionSpecialization::Filter1x1Stride1Pad0, 1, 256, 256, 32, 4, 4, 1, 32, 32, 32, 8, 8, 8, false, CK_TILE_PIPELINE_COMPUTE_V3>,
|
||||
// GroupedConvolutionForwardInvoker<NDimSpatial, ALayout, BLayout, ELayout, INT8, INT8, INT8, PassThrough, PassThrough, PassThrough, ConvolutionSpecialization::Filter1x1Stride1Pad0, 1, 256, 256, 32, 4, 4, 1, 32, 32, 32, 8, 8, 8, true, CK_TILE_PIPELINE_COMPUTE_V4>,
|
||||
// GroupedConvolutionForwardInvoker<NDimSpatial, ALayout, BLayout, ELayout, INT8, INT8, INT8, PassThrough, PassThrough, PassThrough, ConvolutionSpecialization::Filter1x1Pad0, 1, 256, 256, 32, 4, 4, 1, 32, 32, 32, 8, 8, 8, false, CK_TILE_PIPELINE_MEMORY>,
|
||||
// GroupedConvolutionForwardInvoker<NDimSpatial, ALayout, BLayout, ELayout, INT8, INT8, INT8, PassThrough, PassThrough, PassThrough, ConvolutionSpecialization::Filter1x1Pad0, 1, 256, 256, 32, 4, 4, 1, 32, 32, 32, 8, 8, 8, false, CK_TILE_PIPELINE_COMPUTE_V3>,
|
||||
// GroupedConvolutionForwardInvoker<NDimSpatial, ALayout, BLayout, ELayout, INT8, INT8, INT8, PassThrough, PassThrough, PassThrough, ConvolutionSpecialization::Filter1x1Pad0, 1, 256, 256, 32, 4, 4, 1, 32, 32, 32, 8, 8, 8, true, CK_TILE_PIPELINE_COMPUTE_V4>,
|
||||
// GroupedConvolutionForwardInvoker<NDimSpatial, ALayout, BLayout, ELayout, INT8, INT8, INT8, PassThrough, PassThrough, PassThrough, ConvolutionSpecialization::Filter3x3, 1, 256, 256, 32, 4, 4, 1, 32, 32, 32, 8, 8, 8, false, CK_TILE_PIPELINE_MEMORY>,
|
||||
// GroupedConvolutionForwardInvoker<NDimSpatial, ALayout, BLayout, ELayout, INT8, INT8, INT8, PassThrough, PassThrough, PassThrough, ConvolutionSpecialization::Filter3x3, 1, 256, 256, 32, 4, 4, 1, 32, 32, 32, 8, 8, 8, false, CK_TILE_PIPELINE_COMPUTE_V3>,
|
||||
// GroupedConvolutionForwardInvoker<NDimSpatial, ALayout, BLayout, ELayout, INT8, INT8, INT8, PassThrough, PassThrough, PassThrough, ConvolutionSpecialization::Filter3x3, 1, 256, 256, 32, 4, 4, 1, 32, 32, 32, 8, 8, 8, true, CK_TILE_PIPELINE_COMPUTE_V4>,
|
||||
// GroupedConvolutionForwardInvoker<NDimSpatial, ALayout, BLayout, ELayout, INT8, INT8, INT8, PassThrough, PassThrough, PassThrough, ConvolutionSpecialization::Default, 1, 256, 256, 32, 4, 4, 1, 32, 32, 32, 8, 8, 8, false, CK_TILE_PIPELINE_MEMORY>,
|
||||
// GroupedConvolutionForwardInvoker<NDimSpatial, ALayout, BLayout, ELayout, INT8, INT8, INT8, PassThrough, PassThrough, PassThrough, ConvolutionSpecialization::Default, 1, 256, 256, 32, 4, 4, 1, 32, 32, 32, 8, 8, 8, false, CK_TILE_PIPELINE_COMPUTE_V3>,
|
||||
// GroupedConvolutionForwardInvoker<NDimSpatial, ALayout, BLayout, ELayout, INT8, INT8, INT8, PassThrough, PassThrough, PassThrough, ConvolutionSpecialization::Default, 1, 256, 256, 32, 4, 4, 1, 32, 32, 32, 8, 8, 8, true, CK_TILE_PIPELINE_COMPUTE_V4>,
|
||||
// Converted from device_grouped_conv_fwd_xdl_int8_comp_instances_part2 - Instance 3
|
||||
// GroupedConvolutionForwardInvoker<NDimSpatial, ALayout, BLayout, ELayout, INT8, INT8, INT8, PassThrough, PassThrough, PassThrough, ConvolutionSpecialization::Filter1x1Stride1Pad0, 1, 256, 256, 32, 4, 4, 1, 32, 32, 32, 8, 8, 8, false, CK_TILE_PIPELINE_MEMORY>,
|
||||
// GroupedConvolutionForwardInvoker<NDimSpatial, ALayout, BLayout, ELayout, INT8, INT8, INT8, PassThrough, PassThrough, PassThrough, ConvolutionSpecialization::Filter1x1Stride1Pad0, 1, 256, 256, 32, 4, 4, 1, 32, 32, 32, 8, 8, 8, false, CK_TILE_PIPELINE_COMPUTE_V3>,
|
||||
// GroupedConvolutionForwardInvoker<NDimSpatial, ALayout, BLayout, ELayout, INT8, INT8, INT8, PassThrough, PassThrough, PassThrough, ConvolutionSpecialization::Filter1x1Stride1Pad0, 1, 256, 256, 32, 4, 4, 1, 32, 32, 32, 8, 8, 8, true, CK_TILE_PIPELINE_COMPUTE_V4>,
|
||||
// GroupedConvolutionForwardInvoker<NDimSpatial, ALayout, BLayout, ELayout, INT8, INT8, INT8, PassThrough, PassThrough, PassThrough, ConvolutionSpecialization::Filter1x1Pad0, 1, 256, 256, 32, 4, 4, 1, 32, 32, 32, 8, 8, 8, false, CK_TILE_PIPELINE_MEMORY>,
|
||||
// GroupedConvolutionForwardInvoker<NDimSpatial, ALayout, BLayout, ELayout, INT8, INT8, INT8, PassThrough, PassThrough, PassThrough, ConvolutionSpecialization::Filter1x1Pad0, 1, 256, 256, 32, 4, 4, 1, 32, 32, 32, 8, 8, 8, false, CK_TILE_PIPELINE_COMPUTE_V3>,
|
||||
// GroupedConvolutionForwardInvoker<NDimSpatial, ALayout, BLayout, ELayout, INT8, INT8, INT8, PassThrough, PassThrough, PassThrough, ConvolutionSpecialization::Filter1x1Pad0, 1, 256, 256, 32, 4, 4, 1, 32, 32, 32, 8, 8, 8, true, CK_TILE_PIPELINE_COMPUTE_V4>,
|
||||
// GroupedConvolutionForwardInvoker<NDimSpatial, ALayout, BLayout, ELayout, INT8, INT8, INT8, PassThrough, PassThrough, PassThrough, ConvolutionSpecialization::Filter3x3, 1, 256, 256, 32, 4, 4, 1, 32, 32, 32, 8, 8, 8, false, CK_TILE_PIPELINE_MEMORY>,
|
||||
// GroupedConvolutionForwardInvoker<NDimSpatial, ALayout, BLayout, ELayout, INT8, INT8, INT8, PassThrough, PassThrough, PassThrough, ConvolutionSpecialization::Filter3x3, 1, 256, 256, 32, 4, 4, 1, 32, 32, 32, 8, 8, 8, false, CK_TILE_PIPELINE_COMPUTE_V3>,
|
||||
// GroupedConvolutionForwardInvoker<NDimSpatial, ALayout, BLayout, ELayout, INT8, INT8, INT8, PassThrough, PassThrough, PassThrough, ConvolutionSpecialization::Filter3x3, 1, 256, 256, 32, 4, 4, 1, 32, 32, 32, 8, 8, 8, true, CK_TILE_PIPELINE_COMPUTE_V4>,
|
||||
// GroupedConvolutionForwardInvoker<NDimSpatial, ALayout, BLayout, ELayout, INT8, INT8, INT8, PassThrough, PassThrough, PassThrough, ConvolutionSpecialization::Default, 1, 256, 256, 32, 4, 4, 1, 32, 32, 32, 8, 8, 8, false, CK_TILE_PIPELINE_MEMORY>,
|
||||
// GroupedConvolutionForwardInvoker<NDimSpatial, ALayout, BLayout, ELayout, INT8, INT8, INT8, PassThrough, PassThrough, PassThrough, ConvolutionSpecialization::Default, 1, 256, 256, 32, 4, 4, 1, 32, 32, 32, 8, 8, 8, false, CK_TILE_PIPELINE_COMPUTE_V3>,
|
||||
// GroupedConvolutionForwardInvoker<NDimSpatial, ALayout, BLayout, ELayout, INT8, INT8, INT8, PassThrough, PassThrough, PassThrough, ConvolutionSpecialization::Default, 1, 256, 256, 32, 4, 4, 1, 32, 32, 32, 8, 8, 8, true, CK_TILE_PIPELINE_COMPUTE_V4>,
|
||||
// Converted from device_grouped_conv_fwd_xdl_int8_comp_instances_part2 - Instance 4 (MPerXDL=16)
|
||||
// GroupedConvolutionForwardInvoker<NDimSpatial, ALayout, BLayout, ELayout, INT8, INT8, INT8, PassThrough, PassThrough, PassThrough, ConvolutionSpecialization::Filter1x1Stride1Pad0, 1, 256, 256, 32, 8, 8, 1, 16, 16, 16, 8, 8, 8, false, CK_TILE_PIPELINE_MEMORY>,
|
||||
// GroupedConvolutionForwardInvoker<NDimSpatial, ALayout, BLayout, ELayout, INT8, INT8, INT8, PassThrough, PassThrough, PassThrough, ConvolutionSpecialization::Filter1x1Stride1Pad0, 1, 256, 256, 32, 8, 8, 1, 16, 16, 16, 8, 8, 8, false, CK_TILE_PIPELINE_COMPUTE_V3>,
|
||||
// GroupedConvolutionForwardInvoker<NDimSpatial, ALayout, BLayout, ELayout, INT8, INT8, INT8, PassThrough, PassThrough, PassThrough, ConvolutionSpecialization::Filter1x1Stride1Pad0, 1, 256, 256, 32, 8, 8, 1, 16, 16, 16, 8, 8, 8, true, CK_TILE_PIPELINE_COMPUTE_V4>,
|
||||
// GroupedConvolutionForwardInvoker<NDimSpatial, ALayout, BLayout, ELayout, INT8, INT8, INT8, PassThrough, PassThrough, PassThrough, ConvolutionSpecialization::Filter1x1Pad0, 1, 256, 256, 32, 8, 8, 1, 16, 16, 16, 8, 8, 8, false, CK_TILE_PIPELINE_MEMORY>,
|
||||
// GroupedConvolutionForwardInvoker<NDimSpatial, ALayout, BLayout, ELayout, INT8, INT8, INT8, PassThrough, PassThrough, PassThrough, ConvolutionSpecialization::Filter1x1Pad0, 1, 256, 256, 32, 8, 8, 1, 16, 16, 16, 8, 8, 8, false, CK_TILE_PIPELINE_COMPUTE_V3>,
|
||||
// GroupedConvolutionForwardInvoker<NDimSpatial, ALayout, BLayout, ELayout, INT8, INT8, INT8, PassThrough, PassThrough, PassThrough, ConvolutionSpecialization::Filter1x1Pad0, 1, 256, 256, 32, 8, 8, 1, 16, 16, 16, 8, 8, 8, true, CK_TILE_PIPELINE_COMPUTE_V4>,
|
||||
// GroupedConvolutionForwardInvoker<NDimSpatial, ALayout, BLayout, ELayout, INT8, INT8, INT8, PassThrough, PassThrough, PassThrough, ConvolutionSpecialization::Filter3x3, 1, 256, 256, 32, 8, 8, 1, 16, 16, 16, 8, 8, 8, false, CK_TILE_PIPELINE_MEMORY>,
|
||||
// GroupedConvolutionForwardInvoker<NDimSpatial, ALayout, BLayout, ELayout, INT8, INT8, INT8, PassThrough, PassThrough, PassThrough, ConvolutionSpecialization::Filter3x3, 1, 256, 256, 32, 8, 8, 1, 16, 16, 16, 8, 8, 8, false, CK_TILE_PIPELINE_COMPUTE_V3>,
|
||||
// GroupedConvolutionForwardInvoker<NDimSpatial, ALayout, BLayout, ELayout, INT8, INT8, INT8, PassThrough, PassThrough, PassThrough, ConvolutionSpecialization::Filter3x3, 1, 256, 256, 32, 8, 8, 1, 16, 16, 16, 8, 8, 8, true, CK_TILE_PIPELINE_COMPUTE_V4>,
|
||||
// GroupedConvolutionForwardInvoker<NDimSpatial, ALayout, BLayout, ELayout, INT8, INT8, INT8, PassThrough, PassThrough, PassThrough, ConvolutionSpecialization::Default, 1, 256, 256, 32, 8, 8, 1, 16, 16, 16, 8, 8, 8, false, CK_TILE_PIPELINE_MEMORY>,
|
||||
// GroupedConvolutionForwardInvoker<NDimSpatial, ALayout, BLayout, ELayout, INT8, INT8, INT8, PassThrough, PassThrough, PassThrough, ConvolutionSpecialization::Default, 1, 256, 256, 32, 8, 8, 1, 16, 16, 16, 8, 8, 8, false, CK_TILE_PIPELINE_COMPUTE_V3>,
|
||||
// GroupedConvolutionForwardInvoker<NDimSpatial, ALayout, BLayout, ELayout, INT8, INT8, INT8, PassThrough, PassThrough, PassThrough, ConvolutionSpecialization::Default, 1, 256, 256, 32, 8, 8, 1, 16, 16, 16, 8, 8, 8, true, CK_TILE_PIPELINE_COMPUTE_V4>,
|
||||
// Converted from device_grouped_conv_fwd_xdl_int8_comp_instances_part2 - Instance 5
|
||||
// GroupedConvolutionForwardInvoker<NDimSpatial, ALayout, BLayout, ELayout, INT8, INT8, INT8, PassThrough, PassThrough, PassThrough, ConvolutionSpecialization::Filter1x1Stride1Pad0, 1, 128, 128, 64, 2, 2, 1, 32, 32, 32, 8, 8, 8, false, CK_TILE_PIPELINE_MEMORY>,
|
||||
// GroupedConvolutionForwardInvoker<NDimSpatial, ALayout, BLayout, ELayout, INT8, INT8, INT8, PassThrough, PassThrough, PassThrough, ConvolutionSpecialization::Filter1x1Stride1Pad0, 1, 128, 128, 64, 2, 2, 1, 32, 32, 32, 8, 8, 8, false, CK_TILE_PIPELINE_COMPUTE_V3>,
|
||||
// GroupedConvolutionForwardInvoker<NDimSpatial, ALayout, BLayout, ELayout, INT8, INT8, INT8, PassThrough, PassThrough, PassThrough, ConvolutionSpecialization::Filter1x1Stride1Pad0, 1, 128, 128, 64, 2, 2, 1, 32, 32, 32, 8, 8, 8, true, CK_TILE_PIPELINE_COMPUTE_V4>,
|
||||
// GroupedConvolutionForwardInvoker<NDimSpatial, ALayout, BLayout, ELayout, INT8, INT8, INT8, PassThrough, PassThrough, PassThrough, ConvolutionSpecialization::Filter1x1Pad0, 1, 128, 128, 64, 2, 2, 1, 32, 32, 32, 8, 8, 8, false, CK_TILE_PIPELINE_MEMORY>,
|
||||
// GroupedConvolutionForwardInvoker<NDimSpatial, ALayout, BLayout, ELayout, INT8, INT8, INT8, PassThrough, PassThrough, PassThrough, ConvolutionSpecialization::Filter1x1Pad0, 1, 128, 128, 64, 2, 2, 1, 32, 32, 32, 8, 8, 8, false, CK_TILE_PIPELINE_COMPUTE_V3>,
|
||||
// GroupedConvolutionForwardInvoker<NDimSpatial, ALayout, BLayout, ELayout, INT8, INT8, INT8, PassThrough, PassThrough, PassThrough, ConvolutionSpecialization::Filter1x1Pad0, 1, 128, 128, 64, 2, 2, 1, 32, 32, 32, 8, 8, 8, true, CK_TILE_PIPELINE_COMPUTE_V4>,
|
||||
// GroupedConvolutionForwardInvoker<NDimSpatial, ALayout, BLayout, ELayout, INT8, INT8, INT8, PassThrough, PassThrough, PassThrough, ConvolutionSpecialization::Filter3x3, 1, 128, 128, 64, 2, 2, 1, 32, 32, 32, 8, 8, 8, false, CK_TILE_PIPELINE_MEMORY>,
|
||||
// GroupedConvolutionForwardInvoker<NDimSpatial, ALayout, BLayout, ELayout, INT8, INT8, INT8, PassThrough, PassThrough, PassThrough, ConvolutionSpecialization::Filter3x3, 1, 128, 128, 64, 2, 2, 1, 32, 32, 32, 8, 8, 8, false, CK_TILE_PIPELINE_COMPUTE_V3>,
|
||||
// GroupedConvolutionForwardInvoker<NDimSpatial, ALayout, BLayout, ELayout, INT8, INT8, INT8, PassThrough, PassThrough, PassThrough, ConvolutionSpecialization::Filter3x3, 1, 128, 128, 64, 2, 2, 1, 32, 32, 32, 8, 8, 8, true, CK_TILE_PIPELINE_COMPUTE_V4>,
|
||||
// GroupedConvolutionForwardInvoker<NDimSpatial, ALayout, BLayout, ELayout, INT8, INT8, INT8, PassThrough, PassThrough, PassThrough, ConvolutionSpecialization::Default, 1, 128, 128, 64, 2, 2, 1, 32, 32, 32, 8, 8, 8, false, CK_TILE_PIPELINE_MEMORY>,
|
||||
// GroupedConvolutionForwardInvoker<NDimSpatial, ALayout, BLayout, ELayout, INT8, INT8, INT8, PassThrough, PassThrough, PassThrough, ConvolutionSpecialization::Default, 1, 128, 128, 64, 2, 2, 1, 32, 32, 32, 8, 8, 8, false, CK_TILE_PIPELINE_COMPUTE_V3>,
|
||||
// GroupedConvolutionForwardInvoker<NDimSpatial, ALayout, BLayout, ELayout, INT8, INT8, INT8, PassThrough, PassThrough, PassThrough, ConvolutionSpecialization::Default, 1, 128, 128, 64, 2, 2, 1, 32, 32, 32, 8, 8, 8, true, CK_TILE_PIPELINE_COMPUTE_V4>,
|
||||
// Converted from device_grouped_conv_fwd_xdl_int8_comp_instances_part2 - Instance 6
|
||||
// GroupedConvolutionForwardInvoker<NDimSpatial, ALayout, BLayout, ELayout, INT8, INT8, INT8, PassThrough, PassThrough, PassThrough, ConvolutionSpecialization::Filter1x1Stride1Pad0, 1, 128, 256, 32, 2, 4, 1, 32, 32, 32, 8, 8, 8, false, CK_TILE_PIPELINE_MEMORY>,
|
||||
// GroupedConvolutionForwardInvoker<NDimSpatial, ALayout, BLayout, ELayout, INT8, INT8, INT8, PassThrough, PassThrough, PassThrough, ConvolutionSpecialization::Filter1x1Stride1Pad0, 1, 128, 256, 32, 2, 4, 1, 32, 32, 32, 8, 8, 8, false, CK_TILE_PIPELINE_COMPUTE_V3>,
|
||||
// GroupedConvolutionForwardInvoker<NDimSpatial, ALayout, BLayout, ELayout, INT8, INT8, INT8, PassThrough, PassThrough, PassThrough, ConvolutionSpecialization::Filter1x1Stride1Pad0, 1, 128, 256, 32, 2, 4, 1, 32, 32, 32, 8, 8, 8, true, CK_TILE_PIPELINE_COMPUTE_V4>,
|
||||
// GroupedConvolutionForwardInvoker<NDimSpatial, ALayout, BLayout, ELayout, INT8, INT8, INT8, PassThrough, PassThrough, PassThrough, ConvolutionSpecialization::Filter1x1Pad0, 1, 128, 256, 32, 2, 4, 1, 32, 32, 32, 8, 8, 8, false, CK_TILE_PIPELINE_MEMORY>,
|
||||
// GroupedConvolutionForwardInvoker<NDimSpatial, ALayout, BLayout, ELayout, INT8, INT8, INT8, PassThrough, PassThrough, PassThrough, ConvolutionSpecialization::Filter1x1Pad0, 1, 128, 256, 32, 2, 4, 1, 32, 32, 32, 8, 8, 8, false, CK_TILE_PIPELINE_COMPUTE_V3>,
|
||||
// GroupedConvolutionForwardInvoker<NDimSpatial, ALayout, BLayout, ELayout, INT8, INT8, INT8, PassThrough, PassThrough, PassThrough, ConvolutionSpecialization::Filter1x1Pad0, 1, 128, 256, 32, 2, 4, 1, 32, 32, 32, 8, 8, 8, true, CK_TILE_PIPELINE_COMPUTE_V4>,
|
||||
// GroupedConvolutionForwardInvoker<NDimSpatial, ALayout, BLayout, ELayout, INT8, INT8, INT8, PassThrough, PassThrough, PassThrough, ConvolutionSpecialization::Filter3x3, 1, 128, 256, 32, 2, 4, 1, 32, 32, 32, 8, 8, 8, false, CK_TILE_PIPELINE_MEMORY>,
|
||||
// GroupedConvolutionForwardInvoker<NDimSpatial, ALayout, BLayout, ELayout, INT8, INT8, INT8, PassThrough, PassThrough, PassThrough, ConvolutionSpecialization::Filter3x3, 1, 128, 256, 32, 2, 4, 1, 32, 32, 32, 8, 8, 8, false, CK_TILE_PIPELINE_COMPUTE_V3>,
|
||||
// GroupedConvolutionForwardInvoker<NDimSpatial, ALayout, BLayout, ELayout, INT8, INT8, INT8, PassThrough, PassThrough, PassThrough, ConvolutionSpecialization::Filter3x3, 1, 128, 256, 32, 2, 4, 1, 32, 32, 32, 8, 8, 8, true, CK_TILE_PIPELINE_COMPUTE_V4>,
|
||||
// GroupedConvolutionForwardInvoker<NDimSpatial, ALayout, BLayout, ELayout, INT8, INT8, INT8, PassThrough, PassThrough, PassThrough, ConvolutionSpecialization::Default, 1, 128, 256, 32, 2, 4, 1, 32, 32, 32, 8, 8, 8, false, CK_TILE_PIPELINE_MEMORY>,
|
||||
// GroupedConvolutionForwardInvoker<NDimSpatial, ALayout, BLayout, ELayout, INT8, INT8, INT8, PassThrough, PassThrough, PassThrough, ConvolutionSpecialization::Default, 1, 128, 256, 32, 2, 4, 1, 32, 32, 32, 8, 8, 8, false, CK_TILE_PIPELINE_COMPUTE_V3>,
|
||||
// GroupedConvolutionForwardInvoker<NDimSpatial, ALayout, BLayout, ELayout, INT8, INT8, INT8, PassThrough, PassThrough, PassThrough, ConvolutionSpecialization::Default, 1, 128, 256, 32, 2, 4, 1, 32, 32, 32, 8, 8, 8, true, CK_TILE_PIPELINE_COMPUTE_V4>,
|
||||
// Converted from device_grouped_conv_fwd_xdl_int8_comp_instances_part2 - Instance 7
|
||||
// GroupedConvolutionForwardInvoker<NDimSpatial, ALayout, BLayout, ELayout, INT8, INT8, INT8, PassThrough, PassThrough, PassThrough, ConvolutionSpecialization::Filter1x1Stride1Pad0, 1, 256, 128, 32, 4, 2, 1, 32, 32, 32, 8, 8, 8, false, CK_TILE_PIPELINE_MEMORY>,
|
||||
// GroupedConvolutionForwardInvoker<NDimSpatial, ALayout, BLayout, ELayout, INT8, INT8, INT8, PassThrough, PassThrough, PassThrough, ConvolutionSpecialization::Filter1x1Stride1Pad0, 1, 256, 128, 32, 4, 2, 1, 32, 32, 32, 8, 8, 8, false, CK_TILE_PIPELINE_COMPUTE_V3>,
|
||||
// GroupedConvolutionForwardInvoker<NDimSpatial, ALayout, BLayout, ELayout, INT8, INT8, INT8, PassThrough, PassThrough, PassThrough, ConvolutionSpecialization::Filter1x1Stride1Pad0, 1, 256, 128, 32, 4, 2, 1, 32, 32, 32, 8, 8, 8, true, CK_TILE_PIPELINE_COMPUTE_V4>,
|
||||
// GroupedConvolutionForwardInvoker<NDimSpatial, ALayout, BLayout, ELayout, INT8, INT8, INT8, PassThrough, PassThrough, PassThrough, ConvolutionSpecialization::Filter1x1Pad0, 1, 256, 128, 32, 4, 2, 1, 32, 32, 32, 8, 8, 8, false, CK_TILE_PIPELINE_MEMORY>,
|
||||
// GroupedConvolutionForwardInvoker<NDimSpatial, ALayout, BLayout, ELayout, INT8, INT8, INT8, PassThrough, PassThrough, PassThrough, ConvolutionSpecialization::Filter1x1Pad0, 1, 256, 128, 32, 4, 2, 1, 32, 32, 32, 8, 8, 8, false, CK_TILE_PIPELINE_COMPUTE_V3>,
|
||||
// GroupedConvolutionForwardInvoker<NDimSpatial, ALayout, BLayout, ELayout, INT8, INT8, INT8, PassThrough, PassThrough, PassThrough, ConvolutionSpecialization::Filter1x1Pad0, 1, 256, 128, 32, 4, 2, 1, 32, 32, 32, 8, 8, 8, true, CK_TILE_PIPELINE_COMPUTE_V4>,
|
||||
// GroupedConvolutionForwardInvoker<NDimSpatial, ALayout, BLayout, ELayout, INT8, INT8, INT8, PassThrough, PassThrough, PassThrough, ConvolutionSpecialization::Filter3x3, 1, 256, 128, 32, 4, 2, 1, 32, 32, 32, 8, 8, 8, false, CK_TILE_PIPELINE_MEMORY>,
|
||||
// GroupedConvolutionForwardInvoker<NDimSpatial, ALayout, BLayout, ELayout, INT8, INT8, INT8, PassThrough, PassThrough, PassThrough, ConvolutionSpecialization::Filter3x3, 1, 256, 128, 32, 4, 2, 1, 32, 32, 32, 8, 8, 8, false, CK_TILE_PIPELINE_COMPUTE_V3>,
|
||||
// GroupedConvolutionForwardInvoker<NDimSpatial, ALayout, BLayout, ELayout, INT8, INT8, INT8, PassThrough, PassThrough, PassThrough, ConvolutionSpecialization::Filter3x3, 1, 256, 128, 32, 4, 2, 1, 32, 32, 32, 8, 8, 8, true, CK_TILE_PIPELINE_COMPUTE_V4>,
|
||||
// GroupedConvolutionForwardInvoker<NDimSpatial, ALayout, BLayout, ELayout, INT8, INT8, INT8, PassThrough, PassThrough, PassThrough, ConvolutionSpecialization::Default, 1, 256, 128, 32, 4, 2, 1, 32, 32, 32, 8, 8, 8, false, CK_TILE_PIPELINE_MEMORY>,
|
||||
// GroupedConvolutionForwardInvoker<NDimSpatial, ALayout, BLayout, ELayout, INT8, INT8, INT8, PassThrough, PassThrough, PassThrough, ConvolutionSpecialization::Default, 1, 256, 128, 32, 4, 2, 1, 32, 32, 32, 8, 8, 8, false, CK_TILE_PIPELINE_COMPUTE_V3>,
|
||||
// GroupedConvolutionForwardInvoker<NDimSpatial, ALayout, BLayout, ELayout, INT8, INT8, INT8, PassThrough, PassThrough, PassThrough, ConvolutionSpecialization::Default, 1, 256, 128, 32, 4, 2, 1, 32, 32, 32, 8, 8, 8, true, CK_TILE_PIPELINE_COMPUTE_V4>,
|
||||
// Converted from device_grouped_conv_fwd_xdl_int8_comp_instances_part2 - Instance 8
|
||||
// GroupedConvolutionForwardInvoker<NDimSpatial, ALayout, BLayout, ELayout, INT8, INT8, INT8, PassThrough, PassThrough, PassThrough, ConvolutionSpecialization::Filter1x1Stride1Pad0, 1, 128, 128, 64, 2, 2, 1, 32, 32, 32, 8, 8, 8, false, CK_TILE_PIPELINE_MEMORY>,
|
||||
// GroupedConvolutionForwardInvoker<NDimSpatial, ALayout, BLayout, ELayout, INT8, INT8, INT8, PassThrough, PassThrough, PassThrough, ConvolutionSpecialization::Filter1x1Stride1Pad0, 1, 128, 128, 64, 2, 2, 1, 32, 32, 32, 8, 8, 8, false, CK_TILE_PIPELINE_COMPUTE_V3>,
|
||||
// GroupedConvolutionForwardInvoker<NDimSpatial, ALayout, BLayout, ELayout, INT8, INT8, INT8, PassThrough, PassThrough, PassThrough, ConvolutionSpecialization::Filter1x1Stride1Pad0, 1, 128, 128, 64, 2, 2, 1, 32, 32, 32, 8, 8, 8, true, CK_TILE_PIPELINE_COMPUTE_V4>,
|
||||
// GroupedConvolutionForwardInvoker<NDimSpatial, ALayout, BLayout, ELayout, INT8, INT8, INT8, PassThrough, PassThrough, PassThrough, ConvolutionSpecialization::Filter1x1Pad0, 1, 128, 128, 64, 2, 2, 1, 32, 32, 32, 8, 8, 8, false, CK_TILE_PIPELINE_MEMORY>,
|
||||
// GroupedConvolutionForwardInvoker<NDimSpatial, ALayout, BLayout, ELayout, INT8, INT8, INT8, PassThrough, PassThrough, PassThrough, ConvolutionSpecialization::Filter1x1Pad0, 1, 128, 128, 64, 2, 2, 1, 32, 32, 32, 8, 8, 8, false, CK_TILE_PIPELINE_COMPUTE_V3>,
|
||||
// GroupedConvolutionForwardInvoker<NDimSpatial, ALayout, BLayout, ELayout, INT8, INT8, INT8, PassThrough, PassThrough, PassThrough, ConvolutionSpecialization::Filter1x1Pad0, 1, 128, 128, 64, 2, 2, 1, 32, 32, 32, 8, 8, 8, true, CK_TILE_PIPELINE_COMPUTE_V4>,
|
||||
// GroupedConvolutionForwardInvoker<NDimSpatial, ALayout, BLayout, ELayout, INT8, INT8, INT8, PassThrough, PassThrough, PassThrough, ConvolutionSpecialization::Filter3x3, 1, 128, 128, 64, 2, 2, 1, 32, 32, 32, 8, 8, 8, false, CK_TILE_PIPELINE_MEMORY>,
|
||||
// GroupedConvolutionForwardInvoker<NDimSpatial, ALayout, BLayout, ELayout, INT8, INT8, INT8, PassThrough, PassThrough, PassThrough, ConvolutionSpecialization::Filter3x3, 1, 128, 128, 64, 2, 2, 1, 32, 32, 32, 8, 8, 8, false, CK_TILE_PIPELINE_COMPUTE_V3>,
|
||||
// GroupedConvolutionForwardInvoker<NDimSpatial, ALayout, BLayout, ELayout, INT8, INT8, INT8, PassThrough, PassThrough, PassThrough, ConvolutionSpecialization::Filter3x3, 1, 128, 128, 64, 2, 2, 1, 32, 32, 32, 8, 8, 8, true, CK_TILE_PIPELINE_COMPUTE_V4>,
|
||||
// GroupedConvolutionForwardInvoker<NDimSpatial, ALayout, BLayout, ELayout, INT8, INT8, INT8, PassThrough, PassThrough, PassThrough, ConvolutionSpecialization::Default, 1, 128, 128, 64, 2, 2, 1, 32, 32, 32, 8, 8, 8, false, CK_TILE_PIPELINE_MEMORY>,
|
||||
// GroupedConvolutionForwardInvoker<NDimSpatial, ALayout, BLayout, ELayout, INT8, INT8, INT8, PassThrough, PassThrough, PassThrough, ConvolutionSpecialization::Default, 1, 128, 128, 64, 2, 2, 1, 32, 32, 32, 8, 8, 8, false, CK_TILE_PIPELINE_COMPUTE_V3>,
|
||||
// GroupedConvolutionForwardInvoker<NDimSpatial, ALayout, BLayout, ELayout, INT8, INT8, INT8, PassThrough, PassThrough, PassThrough, ConvolutionSpecialization::Default, 1, 128, 128, 64, 2, 2, 1, 32, 32, 32, 8, 8, 8, true, CK_TILE_PIPELINE_COMPUTE_V4>
|
||||
// clang-format on
|
||||
>;
|
||||
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
FP16
|
||||
data_type layout verify init_method print time_kernel spat_dim G N K C Y X Hi Wi Sy Sx Dy Dx Left_pad_y Left_pad_x Right_pad_y Right_pad_x
|
||||
1 1 0 1 0 1 2 1 1 32 32 3 3 800 1280 1 1 1 1 1 1 1 1
|
||||
op data_type layout verify init_method print time_kernel spat_dim G N K C Y X Hi Wi Sy Sx Dy Dx Left_pad_y Left_pad_x Right_pad_y Right_pad_x
|
||||
grouped_conv_fwd 1 1 0 1 0 1 2 1 1 32 32 3 3 800 1280 1 1 1 1 1 1 1 1
|
||||
|
||||
INT8
|
||||
data_type layout verify init_method print time_kernel spat_dim G N K C Y X Hi Wi Sy Sx Dy Dx Left_pad_y Left_pad_x Right_pad_y Right_pad_x
|
||||
3 1 0 1 0 1 2 1 1 32 32 3 3 800 1280 1 1 1 1 1 1 1 1
|
||||
op data_type layout verify init_method print time_kernel spat_dim G N K C Y X Hi Wi Sy Sx Dy Dx Left_pad_y Left_pad_x Right_pad_y Right_pad_x
|
||||
grouped_conv_fwd 3 1 0 1 0 1 2 1 1 32 32 3 3 800 1280 1 1 1 1 1 1 1 1
|
||||
Reference in New Issue
Block a user