From 9b34909b766345eb051b5ed124bf7eeafb1a95ec Mon Sep 17 00:00:00 2001 From: joyeamd Date: Mon, 9 Jun 2025 15:13:40 +0800 Subject: [PATCH] add default nullptr --- ...nv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp | 13 +- stride2_tile_mapping.cpp | 167 ++++++++++++++++ tile_mapping_example.cpp | 181 ++++++++++++++++++ 3 files changed, 360 insertions(+), 1 deletion(-) create mode 100644 stride2_tile_mapping.cpp create mode 100644 tile_mapping_example.cpp diff --git a/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp b/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp index d91b8182f5..85823cb98c 100644 --- a/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp +++ b/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp @@ -1450,7 +1450,18 @@ struct DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1 } } - return nullptr; + auto default_kernel = &kernel_grouped_conv_bwd_data_optimized; + return static_cast(nullptr); }; const auto kernel = kernel_selector(); diff --git a/stride2_tile_mapping.cpp b/stride2_tile_mapping.cpp new file mode 100644 index 0000000000..f4fb9b4d6c --- /dev/null +++ b/stride2_tile_mapping.cpp @@ -0,0 +1,167 @@ +// 4x4 Input Tile到Output Tile映射 - Stride=2情况 +#include +#include +#include + +struct TileRange +{ + int h_min, h_max, w_min, w_max; + int size_h() const { return h_max - h_min + 1; } + int size_w() const { return w_max - w_min + 1; } + bool is_valid() const { return h_min <= h_max && w_min <= w_max; } +}; + +// 计算4x4输入tile影响的输出tile范围 +TileRange calculate_output_tile_stride2(int input_h_start, int input_w_start) +{ + // 卷积参数 + const int input_tile_size = 4; + const int kernel_h = 3, kernel_w = 3; + const int stride_h = 2, stride_w = 2; + const int pad_h = 1, pad_w = 1; + + TileRange output_range; + + // 输入tile范围: [h_start, h_start+3] x [w_start, w_start+3] + int input_h_end = input_h_start + input_tile_size - 1; + int input_w_end = input_w_start + input_tile_size - 1; + + // 向上取整除法 + auto ceil_div = [](int a, int b) { return (a + b - 1) / b; }; + + // 最小输出位置:当滤波器在最右下角时 (kh=2, kw=2) + output_range.h_min = std::max(0, ceil_div(input_h_start + pad_h - (kernel_h - 1), stride_h)); + output_range.w_min = std::max(0, ceil_div(input_w_start + pad_w - (kernel_w - 1), stride_w)); + + // 最大输出位置:当滤波器在最左上角时 (kh=0, kw=0) + output_range.h_max = (input_h_end + pad_h) / stride_h; + output_range.w_max = (input_w_end + pad_w) / stride_w; + + return output_range; +} + +// 检查输入位置是否影响输出位置 +bool input_affects_output_stride2(int input_h, int input_w, int output_h, int output_w) +{ + const int kernel_h = 3, kernel_w = 3; + const int stride_h = 2, stride_w = 2; + const int pad_h = 1, pad_w = 1; + + // 计算该输出位置的滤波器在输入上的采样范围 + for(int kh = 0; kh < kernel_h; kh++) + { + for(int kw = 0; kw < kernel_w; kw++) + { + int sample_h = output_h * stride_h - pad_h + kh; + int sample_w = output_w * stride_w - pad_w + kw; + + if(sample_h == input_h && sample_w == input_w) + { + return true; + } + } + } + return false; +} + +// 获取输出位置采样的所有输入位置 +std::vector> get_sampled_positions(int output_h, int output_w) +{ + const int kernel_h = 3, kernel_w = 3; + const int stride_h = 2, stride_w = 2; + const int pad_h = 1, pad_w = 1; + + std::vector> positions; + + for(int kh = 0; kh < kernel_h; kh++) + { + for(int kw = 0; kw < kernel_w; kw++) + { + int sample_h = output_h * stride_h - pad_h + kh; + int sample_w = output_w * stride_w - pad_w + kw; + positions.push_back({sample_h, sample_w}); + } + } + return positions; +} + +int main() +{ + std::cout << "=== 4x4 Input Tile映射分析 (Stride=2) ===" << std::endl; + std::cout << "参数: kernel=3x3, stride=2x2, pad=1x1" << std::endl; + std::cout << std::endl; + + // 测试几个不同的输入tile位置 + std::vector> test_tiles = {{0, 0}, {2, 2}, {4, 4}, {6, 6}, {8, 8}}; + + for(auto [h_start, w_start] : test_tiles) + { + std::cout << "输入Tile [" << h_start << ":" << h_start + 3 << ", " << w_start << ":" + << w_start + 3 << "]" << std::endl; + + TileRange output_tile = calculate_output_tile_stride2(h_start, w_start); + + if(output_tile.is_valid()) + { + std::cout << " -> 输出Tile [" << output_tile.h_min << ":" << output_tile.h_max << ", " + << output_tile.w_min << ":" << output_tile.w_max << "]" << std::endl; + std::cout << " -> 输出大小: " << output_tile.size_h() << "x" << output_tile.size_w() + << std::endl; + + // 详细分析每个输出位置 + std::cout << " -> 详细映射关系:" << std::endl; + for(int h = output_tile.h_min; h <= output_tile.h_max; h++) + { + for(int w = output_tile.w_min; w <= output_tile.w_max; w++) + { + std::cout << " Output(" << h << "," << w << ") 采样输入位置: "; + + auto sampled = get_sampled_positions(h, w); + bool first = true; + for(auto [sh, sw] : sampled) + { + // 检查是否在当前输入tile范围内 + if(sh >= h_start && sh < h_start + 4 && sw >= w_start && sw < w_start + 4) + { + if(!first) + std::cout << ", "; + std::cout << "(" << sh << "," << sw << ")"; + first = false; + } + } + std::cout << std::endl; + } + } + } + else + { + std::cout << " -> 无效的输出tile (该输入tile不影响任何输出)" << std::endl; + } + std::cout << std::endl; + } + + // 特殊分析:stride=2的影响 + std::cout << "=== Stride=2的影响分析 ===" << std::endl; + std::cout << "对比同一输入tile在不同stride下的输出范围:" << std::endl; + + // 输入tile [0:3, 0:3] + int h_start = 0, w_start = 0; + + // Stride=1的情况 (理论计算) + std::cout << "输入Tile [0:3, 0:3]:" << std::endl; + std::cout << " Stride=1: 输出范围大约是 [0:4, 0:4] (5x5)" << std::endl; + + // Stride=2的实际情况 + TileRange stride2_output = calculate_output_tile_stride2(0, 0); + std::cout << " Stride=2: 输出范围是 [" << stride2_output.h_min << ":" << stride2_output.h_max + << ", " << stride2_output.w_min << ":" << stride2_output.w_max << "] (" + << stride2_output.size_h() << "x" << stride2_output.size_w() << ")" << std::endl; + + std::cout << std::endl; + std::cout << "=== 关键观察 ===" << std::endl; + std::cout << "1. Stride=2时,输出尺寸大约是输入的一半" << std::endl; + std::cout << "2. 4x4输入tile通常影响2x2或3x3的输出tile" << std::endl; + std::cout << "3. 输出位置之间有间隔,不是连续的密集映射" << std::endl; + + return 0; +} \ No newline at end of file diff --git a/tile_mapping_example.cpp b/tile_mapping_example.cpp new file mode 100644 index 0000000000..7059452693 --- /dev/null +++ b/tile_mapping_example.cpp @@ -0,0 +1,181 @@ +// 输入tile到输出tile的映射计算示例(包含dilation) +#include +#include + +struct TileRange +{ + int h_min, h_max, w_min, w_max; + int size_h() const { return h_max - h_min + 1; } + int size_w() const { return w_max - w_min + 1; } +}; + +// 计算有效滤波器大小 +int effective_filter_size(int filter_size, int dilation) +{ + return (filter_size - 1) * dilation + 1; +} + +// 计算输入tile影响的输出tile范围(包含dilation) +TileRange calculate_output_tile_with_dilation(int input_h_start, + int input_w_start, + int input_tile_size, + int filter_h, + int filter_w, + int dilation_h, + int dilation_w, + int pad_h, + int pad_w, + int stride_h, + int stride_w) +{ + TileRange output_range; + + // 计算有效滤波器大小 + int eff_filter_h = effective_filter_size(filter_h, dilation_h); + int eff_filter_w = effective_filter_size(filter_w, dilation_w); + + // 输入tile范围 + int input_h_end = input_h_start + input_tile_size - 1; + int input_w_end = input_w_start + input_tile_size - 1; + + // 计算输出范围 - 使用向上取整的整数除法 + auto ceil_div = [](int a, int b) { return (a + b - 1) / b; }; + + // 最小输出位置:当滤波器在最右下角时 + output_range.h_min = std::max(0, ceil_div(input_h_start + pad_h - eff_filter_h + 1, stride_h)); + output_range.w_min = std::max(0, ceil_div(input_w_start + pad_w - eff_filter_w + 1, stride_w)); + + // 最大输出位置:当滤波器在最左上角时 + output_range.h_max = (input_h_end + pad_h) / stride_h; + output_range.w_max = (input_w_end + pad_w) / stride_w; + + return output_range; +} + +// 验证函数:检查输入位置是否会影响输出位置(包含dilation) +bool input_affects_output_with_dilation(int input_h, + int input_w, + int output_h, + int output_w, + int filter_h, + int filter_w, + int dilation_h, + int dilation_w, + int pad_h, + int pad_w, + int stride_h, + int stride_w) +{ + // 计算滤波器在输入上的采样位置 + for(int kh = 0; kh < filter_h; kh++) + { + for(int kw = 0; kw < filter_w; kw++) + { + int sample_h = output_h * stride_h - pad_h + kh * dilation_h; + int sample_w = output_w * stride_w - pad_w + kw * dilation_w; + + if(sample_h == input_h && sample_w == input_w) + { + return true; + } + } + } + return false; +} + +int main() +{ + // 卷积参数 + int filter_h = 3, filter_w = 3; + int pad_h = 1, pad_w = 1; + int stride_h = 1, stride_w = 1; + int input_tile_size = 4; + + // 测试不同的dilation值 + int dilations[] = {1, 2, 3}; + + for(int dilation : dilations) + { + std::cout << "=== 4x4 Input Tile Mapping with Dilation=" << dilation << " ===" << std::endl; + std::cout << "Filter: " << filter_h << "x" << filter_w << std::endl; + std::cout << "Effective Filter: " << effective_filter_size(filter_h, dilation) << "x" + << effective_filter_size(filter_w, dilation) << std::endl; + std::cout << "Padding: " << pad_h << "x" << pad_w << std::endl; + std::cout << "Stride: " << stride_h << "x" << stride_w << std::endl << std::endl; + + // 测试几个不同的输入tile位置 + int test_positions[][2] = {{0, 0}, {2, 2}, {4, 4}}; + + for(auto& pos : test_positions) + { + int h_start = pos[0], w_start = pos[1]; + + TileRange output_tile = calculate_output_tile_with_dilation(h_start, + w_start, + input_tile_size, + filter_h, + filter_w, + dilation, + dilation, + pad_h, + pad_w, + stride_h, + stride_w); + + std::cout << "Input Tile [" << h_start << ":" << h_start + input_tile_size - 1 << ", " + << w_start << ":" << w_start + input_tile_size - 1 << "]" << std::endl; + std::cout << " -> Output Tile [" << output_tile.h_min << ":" << output_tile.h_max + << ", " << output_tile.w_min << ":" << output_tile.w_max << "]" << std::endl; + std::cout << " -> Output Size: " << output_tile.size_h() << "x" << output_tile.size_w() + << std::endl; + + // 验证几个关键点 + std::cout << " -> Detailed mapping for output positions:" << std::endl; + for(int h = output_tile.h_min; h <= std::min(output_tile.h_max, output_tile.h_min + 2); + h++) + { + for(int w = output_tile.w_min; + w <= std::min(output_tile.w_max, output_tile.w_min + 2); + w++) + { + std::cout << " Output(" << h << "," << w << ") samples from input: "; + + // 显示这个输出位置采样的所有输入位置 + for(int kh = 0; kh < filter_h; kh++) + { + for(int kw = 0; kw < filter_w; kw++) + { + int sample_h = h * stride_h - pad_h + kh * dilation; + int sample_w = w * stride_w - pad_w + kw * dilation; + + // 检查是否在当前tile范围内 + if(sample_h >= h_start && sample_h < h_start + input_tile_size && + sample_w >= w_start && sample_w < w_start + input_tile_size) + { + std::cout << "(" << sample_h << "," << sample_w << ") "; + } + } + } + std::cout << std::endl; + } + } + std::cout << std::endl; + } + + // 显示dilation的影响 + std::cout << "=== Dilation Effect Analysis ===" << std::endl; + std::cout << "For Input Tile [0:3, 0:3] with different dilations:" << std::endl; + + for(int d = 1; d <= 3; d++) + { + TileRange range = calculate_output_tile_with_dilation( + 0, 0, 4, filter_h, filter_w, d, d, pad_h, pad_w, stride_h, stride_w); + std::cout << " Dilation " << d << ": Output [" << range.h_min << ":" << range.h_max + << ", " << range.w_min << ":" << range.w_max << "] (size: " << range.size_h() + << "x" << range.size_w() << ")" << std::endl; + } + std::cout << std::endl; + } + + return 0; +} \ No newline at end of file