diff --git a/experimental/ck_tile_profiler/include/tile_profile_grouped_conv_fwd_impl.hpp b/experimental/ck_tile_profiler/include/tile_profile_grouped_conv_fwd_impl.hpp index 8a437e612e..22f4f20efb 100644 --- a/experimental/ck_tile_profiler/include/tile_profile_grouped_conv_fwd_impl.hpp +++ b/experimental/ck_tile_profiler/include/tile_profile_grouped_conv_fwd_impl.hpp @@ -116,7 +116,7 @@ bool profile_grouped_conv_fwd_impl(int do_verification, // get device op instances const auto ops = ck_tile::ops::DeviceOperationInstanceFactory::GetInstances(); - std::cout << "found " << ops.size() << " instances" << std::endl; + std::cout << "Found " << ops.size() << " instances\n" << std::endl; std::string best_op_name; float best_avg_time = 0; @@ -149,13 +149,14 @@ bool profile_grouped_conv_fwd_impl(int do_verification, if(op->IsSupportedArgument(args)) { num_kernel++; + std::string op_name = op->GetName(args); if((instance_index != -1) && (instance_index + 1 != num_kernel)) { // skip test if instance_index is specified continue; } - std::string op_name = op->GetName(args); + std::cout << op_name << " is profiled..." << std::endl; // Run verification first. If it doesn't pass, no need to do performance measurement. @@ -228,7 +229,7 @@ bool profile_grouped_conv_fwd_impl(int do_verification, } else { - // std::cout << op->GetName(args) << " does not support this problem." << std::endl; + std::cout << op->GetName(args) << " does not support this problem." << std::endl; } } diff --git a/library/include/ck_tile/library/tensor_operation_instance/gpu/tile_grouped_conv_fwd_fp16_instances.hpp b/library/include/ck_tile/library/tensor_operation_instance/gpu/tile_grouped_conv_fwd_fp16_instances.hpp index 49b30471d4..c73966a57d 100644 --- a/library/include/ck_tile/library/tensor_operation_instance/gpu/tile_grouped_conv_fwd_fp16_instances.hpp +++ b/library/include/ck_tile/library/tensor_operation_instance/gpu/tile_grouped_conv_fwd_fp16_instances.hpp @@ -32,7 +32,9 @@ using tile_grouped_conv_fwd_fp16_instances = std::tuple< //##############################| Num| InLayout| WeiLayout| OutLayout| InData| WeiData| OutData| In| Wei| Out| Conv| K-block| M-tile| N-tile | K-tile | M-warp| N-warp| K-warp| M-warp| N-warp| K-warp| Vector| Vector| Vector| Double| GEMM| //##############################| Dim| | | | Type| Type| Type| Elementwise| Elementwise| Elementwise| Spec| per| | | | | | | tile| tile| tile| size| size| size| smem| pipeline| //##############################| Spatial| | | | | | | Operation| Operation| Operation| | CU| | | | | | | size| size| size| A| B| C| buffer| version| -GroupedConvolutionForwardInvoker +GroupedConvolutionForwardInvoker, +GroupedConvolutionForwardInvoker +// clang-format on // clang-format on >; diff --git a/library/include/ck_tile/library/tensor_operation_instance/gpu/tile_grouped_conv_fwd_int8_instances.hpp b/library/include/ck_tile/library/tensor_operation_instance/gpu/tile_grouped_conv_fwd_int8_instances.hpp index 0c85b73a42..b06344ec23 100644 --- a/library/include/ck_tile/library/tensor_operation_instance/gpu/tile_grouped_conv_fwd_int8_instances.hpp +++ b/library/include/ck_tile/library/tensor_operation_instance/gpu/tile_grouped_conv_fwd_int8_instances.hpp @@ -22,6 +22,8 @@ using DeviceOpFwd2DINT8 = GroupedConvolutionForwardBaseInvoker<2, PassThrough, INT8, INT8>; + +//using enum ConvSpecDef = ConvolutionSpecialization::Default; template +GroupedConvolutionForwardInvoker, +GroupedConvolutionForwardInvoker + +// Converted from device_grouped_conv_fwd_xdl_int8_comp_instances_2x +// GroupedConvolutionForwardInvoker, +// GroupedConvolutionForwardInvoker, +// GroupedConvolutionForwardInvoker, +// GroupedConvolutionForwardInvoker, +// GroupedConvolutionForwardInvoker, +// GroupedConvolutionForwardInvoker, +// GroupedConvolutionForwardInvoker, +// GroupedConvolutionForwardInvoker, +// GroupedConvolutionForwardInvoker, +// GroupedConvolutionForwardInvoker, +// GroupedConvolutionForwardInvoker, +// GroupedConvolutionForwardInvoker, +// Converted from device_grouped_conv_fwd_xdl_int8_comp_instances +// GroupedConvolutionForwardInvoker, +// GroupedConvolutionForwardInvoker, +// GroupedConvolutionForwardInvoker, +// GroupedConvolutionForwardInvoker, +// GroupedConvolutionForwardInvoker, +// GroupedConvolutionForwardInvoker, +// GroupedConvolutionForwardInvoker, +// GroupedConvolutionForwardInvoker, +// GroupedConvolutionForwardInvoker, +// GroupedConvolutionForwardInvoker, +// GroupedConvolutionForwardInvoker, +// GroupedConvolutionForwardInvoker, +// Converted from device_grouped_conv_fwd_xdl_int8_comp_instances_part2 - Instance 1 +// GroupedConvolutionForwardInvoker, +// GroupedConvolutionForwardInvoker, +// GroupedConvolutionForwardInvoker, +// GroupedConvolutionForwardInvoker, +// GroupedConvolutionForwardInvoker, +// GroupedConvolutionForwardInvoker, +// GroupedConvolutionForwardInvoker, +// GroupedConvolutionForwardInvoker, +// GroupedConvolutionForwardInvoker, +// GroupedConvolutionForwardInvoker, +// GroupedConvolutionForwardInvoker, +// GroupedConvolutionForwardInvoker, +// Converted from device_grouped_conv_fwd_xdl_int8_comp_instances_part2 - Instance 2 +// GroupedConvolutionForwardInvoker, +// GroupedConvolutionForwardInvoker, +// GroupedConvolutionForwardInvoker, +// GroupedConvolutionForwardInvoker, +// GroupedConvolutionForwardInvoker, +// GroupedConvolutionForwardInvoker, +// GroupedConvolutionForwardInvoker, +// GroupedConvolutionForwardInvoker, +// GroupedConvolutionForwardInvoker, +// GroupedConvolutionForwardInvoker, +// GroupedConvolutionForwardInvoker, +// GroupedConvolutionForwardInvoker, +// Converted from device_grouped_conv_fwd_xdl_int8_comp_instances_part2 - Instance 3 +// GroupedConvolutionForwardInvoker, +// GroupedConvolutionForwardInvoker, +// GroupedConvolutionForwardInvoker, +// GroupedConvolutionForwardInvoker, +// GroupedConvolutionForwardInvoker, +// GroupedConvolutionForwardInvoker, +// GroupedConvolutionForwardInvoker, +// GroupedConvolutionForwardInvoker, +// GroupedConvolutionForwardInvoker, +// GroupedConvolutionForwardInvoker, +// GroupedConvolutionForwardInvoker, +// GroupedConvolutionForwardInvoker, +// Converted from device_grouped_conv_fwd_xdl_int8_comp_instances_part2 - Instance 4 (MPerXDL=16) +// GroupedConvolutionForwardInvoker, +// GroupedConvolutionForwardInvoker, +// GroupedConvolutionForwardInvoker, +// GroupedConvolutionForwardInvoker, +// GroupedConvolutionForwardInvoker, +// GroupedConvolutionForwardInvoker, +// GroupedConvolutionForwardInvoker, +// GroupedConvolutionForwardInvoker, +// GroupedConvolutionForwardInvoker, +// GroupedConvolutionForwardInvoker, +// GroupedConvolutionForwardInvoker, +// GroupedConvolutionForwardInvoker, +// Converted from device_grouped_conv_fwd_xdl_int8_comp_instances_part2 - Instance 5 +// GroupedConvolutionForwardInvoker, +// GroupedConvolutionForwardInvoker, +// GroupedConvolutionForwardInvoker, +// GroupedConvolutionForwardInvoker, +// GroupedConvolutionForwardInvoker, +// GroupedConvolutionForwardInvoker, +// GroupedConvolutionForwardInvoker, +// GroupedConvolutionForwardInvoker, +// GroupedConvolutionForwardInvoker, +// GroupedConvolutionForwardInvoker, +// GroupedConvolutionForwardInvoker, +// GroupedConvolutionForwardInvoker, +// Converted from device_grouped_conv_fwd_xdl_int8_comp_instances_part2 - Instance 6 +// GroupedConvolutionForwardInvoker, +// GroupedConvolutionForwardInvoker, +// GroupedConvolutionForwardInvoker, +// GroupedConvolutionForwardInvoker, +// GroupedConvolutionForwardInvoker, +// GroupedConvolutionForwardInvoker, +// GroupedConvolutionForwardInvoker, +// GroupedConvolutionForwardInvoker, +// GroupedConvolutionForwardInvoker, +// GroupedConvolutionForwardInvoker, +// GroupedConvolutionForwardInvoker, +// GroupedConvolutionForwardInvoker, +// Converted from device_grouped_conv_fwd_xdl_int8_comp_instances_part2 - Instance 7 +// GroupedConvolutionForwardInvoker, +// GroupedConvolutionForwardInvoker, +// GroupedConvolutionForwardInvoker, +// GroupedConvolutionForwardInvoker, +// GroupedConvolutionForwardInvoker, +// GroupedConvolutionForwardInvoker, +// GroupedConvolutionForwardInvoker, +// GroupedConvolutionForwardInvoker, +// GroupedConvolutionForwardInvoker, +// GroupedConvolutionForwardInvoker, +// GroupedConvolutionForwardInvoker, +// GroupedConvolutionForwardInvoker, +// Converted from device_grouped_conv_fwd_xdl_int8_comp_instances_part2 - Instance 8 +// GroupedConvolutionForwardInvoker, +// GroupedConvolutionForwardInvoker, +// GroupedConvolutionForwardInvoker, +// GroupedConvolutionForwardInvoker, +// GroupedConvolutionForwardInvoker, +// GroupedConvolutionForwardInvoker, +// GroupedConvolutionForwardInvoker, +// GroupedConvolutionForwardInvoker, +// GroupedConvolutionForwardInvoker, +// GroupedConvolutionForwardInvoker, +// GroupedConvolutionForwardInvoker, +// GroupedConvolutionForwardInvoker // clang-format on >; diff --git a/script/navi4x_int8_test_shapes.txt b/script/navi4x_int8_test_shapes.txt index fa9e449ff5..f7885f5d46 100644 --- a/script/navi4x_int8_test_shapes.txt +++ b/script/navi4x_int8_test_shapes.txt @@ -1,7 +1,7 @@ FP16 -data_type layout verify init_method print time_kernel spat_dim G N K C Y X Hi Wi Sy Sx Dy Dx Left_pad_y Left_pad_x Right_pad_y Right_pad_x - 1 1 0 1 0 1 2 1 1 32 32 3 3 800 1280 1 1 1 1 1 1 1 1 + op data_type layout verify init_method print time_kernel spat_dim G N K C Y X Hi Wi Sy Sx Dy Dx Left_pad_y Left_pad_x Right_pad_y Right_pad_x +grouped_conv_fwd 1 1 0 1 0 1 2 1 1 32 32 3 3 800 1280 1 1 1 1 1 1 1 1 INT8 -data_type layout verify init_method print time_kernel spat_dim G N K C Y X Hi Wi Sy Sx Dy Dx Left_pad_y Left_pad_x Right_pad_y Right_pad_x - 3 1 0 1 0 1 2 1 1 32 32 3 3 800 1280 1 1 1 1 1 1 1 1 \ No newline at end of file + op data_type layout verify init_method print time_kernel spat_dim G N K C Y X Hi Wi Sy Sx Dy Dx Left_pad_y Left_pad_x Right_pad_y Right_pad_x +grouped_conv_fwd 3 1 0 1 0 1 2 1 1 32 32 3 3 800 1280 1 1 1 1 1 1 1 1 \ No newline at end of file