mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-21 21:39:15 +00:00
[CK_TILE] Add pooling in tile_engine (#4469)
## Motivation <!-- Explain the purpose of this PR and the goals it aims to achieve. --> Add pooling in ck tile engine ## Technical Details <!-- Explain the changes along with any relevant GitHub links. --> ## Test Plan <!-- Explain any relevant testing done to verify this PR. --> ## Test Result <!-- Briefly summarize test outcomes. --> ## Submission Checklist - [ ] Look over the contributing guidelines at https://github.com/ROCm/ROCm/blob/develop/CONTRIBUTING.md#pull-requests. --------- Co-authored-by: Adam Osewski <19374865+aosewski@users.noreply.github.com>
This commit is contained in:
@@ -0,0 +1,165 @@
|
||||
{
|
||||
"problem": {
|
||||
"description": "Comprehensive pooling coverage testing - multiple block sizes (64-512), warp configurations, thread tile sizes, and all trait combinations (max/avg, index, NaN propagation). Approximately 200+ kernels."
|
||||
},
|
||||
"test_params": {
|
||||
"problem_sizes_2d": [
|
||||
{
|
||||
"_comment": "Basic: small tensor, 2x2 window, stride 2, no padding",
|
||||
"N": 1, "H": 8, "W": 8, "C": 32,
|
||||
"Y": 2, "X": 2,
|
||||
"stride_h": 2, "stride_w": 2,
|
||||
"dilation_h": 1, "dilation_w": 1,
|
||||
"pad_h_left": 0, "pad_h_right": 0,
|
||||
"pad_w_left": 0, "pad_w_right": 0
|
||||
},
|
||||
{
|
||||
"_comment": "Padded 3x3: moderate tensor with symmetric padding, stride 1 (overlapping)",
|
||||
"N": 1, "H": 16, "W": 16, "C": 64,
|
||||
"Y": 3, "X": 3,
|
||||
"stride_h": 1, "stride_w": 1,
|
||||
"dilation_h": 1, "dilation_w": 1,
|
||||
"pad_h_left": 1, "pad_h_right": 1,
|
||||
"pad_w_left": 1, "pad_w_right": 1
|
||||
},
|
||||
{
|
||||
"_comment": "Large channels: stress-test the C dimension",
|
||||
"N": 1, "H": 16, "W": 16, "C": 256,
|
||||
"Y": 2, "X": 2,
|
||||
"stride_h": 2, "stride_w": 2,
|
||||
"dilation_h": 1, "dilation_w": 1,
|
||||
"pad_h_left": 0, "pad_h_right": 0,
|
||||
"pad_w_left": 0, "pad_w_right": 0
|
||||
},
|
||||
{
|
||||
"_comment": "Large batch: multi-batch correctness",
|
||||
"N": 4, "H": 16, "W": 16, "C": 32,
|
||||
"Y": 2, "X": 2,
|
||||
"stride_h": 2, "stride_w": 2,
|
||||
"dilation_h": 1, "dilation_w": 1,
|
||||
"pad_h_left": 0, "pad_h_right": 0,
|
||||
"pad_w_left": 0, "pad_w_right": 0
|
||||
},
|
||||
{
|
||||
"_comment": "Non-square spatial: rectangular H != W",
|
||||
"N": 2, "H": 32, "W": 16, "C": 64,
|
||||
"Y": 3, "X": 3,
|
||||
"stride_h": 2, "stride_w": 2,
|
||||
"dilation_h": 1, "dilation_w": 1,
|
||||
"pad_h_left": 1, "pad_h_right": 1,
|
||||
"pad_w_left": 1, "pad_w_right": 1
|
||||
},
|
||||
{
|
||||
"_comment": "Large window 5x5: bigger receptive field",
|
||||
"N": 1, "H": 32, "W": 32, "C": 32,
|
||||
"Y": 5, "X": 5,
|
||||
"stride_h": 2, "stride_w": 2,
|
||||
"dilation_h": 1, "dilation_w": 1,
|
||||
"pad_h_left": 2, "pad_h_right": 2,
|
||||
"pad_w_left": 2, "pad_w_right": 2
|
||||
},
|
||||
{
|
||||
"_comment": "Large window 7x7: global-style pooling",
|
||||
"N": 1, "H": 14, "W": 14, "C": 128,
|
||||
"Y": 7, "X": 7,
|
||||
"stride_h": 1, "stride_w": 1,
|
||||
"dilation_h": 1, "dilation_w": 1,
|
||||
"pad_h_left": 3, "pad_h_right": 3,
|
||||
"pad_w_left": 3, "pad_w_right": 3
|
||||
},
|
||||
{
|
||||
"_comment": "Dilated: dilation_h=2, dilation_w=2 with 3x3 window",
|
||||
"N": 1, "H": 32, "W": 32, "C": 64,
|
||||
"Y": 3, "X": 3,
|
||||
"stride_h": 1, "stride_w": 1,
|
||||
"dilation_h": 2, "dilation_w": 2,
|
||||
"pad_h_left": 2, "pad_h_right": 2,
|
||||
"pad_w_left": 2, "pad_w_right": 2
|
||||
},
|
||||
{
|
||||
"_comment": "Asymmetric padding: different left/right padding",
|
||||
"N": 2, "H": 16, "W": 16, "C": 32,
|
||||
"Y": 3, "X": 3,
|
||||
"stride_h": 2, "stride_w": 2,
|
||||
"dilation_h": 1, "dilation_w": 1,
|
||||
"pad_h_left": 0, "pad_h_right": 1,
|
||||
"pad_w_left": 0, "pad_w_right": 1
|
||||
},
|
||||
{
|
||||
"_comment": "Large spatial: bigger feature maps",
|
||||
"N": 1, "H": 64, "W": 64, "C": 64,
|
||||
"Y": 2, "X": 2,
|
||||
"stride_h": 2, "stride_w": 2,
|
||||
"dilation_h": 1, "dilation_w": 1,
|
||||
"pad_h_left": 0, "pad_h_right": 0,
|
||||
"pad_w_left": 0, "pad_w_right": 0
|
||||
},
|
||||
{
|
||||
"_comment": "Non-square window: Y != X",
|
||||
"N": 1, "H": 32, "W": 32, "C": 32,
|
||||
"Y": 3, "X": 2,
|
||||
"stride_h": 2, "stride_w": 2,
|
||||
"dilation_h": 1, "dilation_w": 1,
|
||||
"pad_h_left": 1, "pad_h_right": 0,
|
||||
"pad_w_left": 0, "pad_w_right": 0
|
||||
},
|
||||
{
|
||||
"_comment": "Stride-1 overlap: overlapping 2x2 windows",
|
||||
"N": 2, "H": 16, "W": 16, "C": 64,
|
||||
"Y": 2, "X": 2,
|
||||
"stride_h": 1, "stride_w": 1,
|
||||
"dilation_h": 1, "dilation_w": 1,
|
||||
"pad_h_left": 0, "pad_h_right": 0,
|
||||
"pad_w_left": 0, "pad_w_right": 0
|
||||
}
|
||||
],
|
||||
"problem_sizes_3d": [
|
||||
{
|
||||
"_comment": "Basic 3D: small volume, 2x2x2 window",
|
||||
"N": 1, "D": 4, "H": 4, "W": 4, "C": 32,
|
||||
"Z": 2, "Y": 2, "X": 2,
|
||||
"stride_d": 2, "stride_h": 2, "stride_w": 2,
|
||||
"dilation_d": 1, "dilation_h": 1, "dilation_w": 1,
|
||||
"pad_d_left": 0, "pad_d_right": 0,
|
||||
"pad_h_left": 0, "pad_h_right": 0,
|
||||
"pad_w_left": 0, "pad_w_right": 0
|
||||
},
|
||||
{
|
||||
"_comment": "Padded 3D: with symmetric padding",
|
||||
"N": 1, "D": 8, "H": 8, "W": 8, "C": 32,
|
||||
"Z": 3, "Y": 3, "X": 3,
|
||||
"stride_d": 2, "stride_h": 2, "stride_w": 2,
|
||||
"dilation_d": 1, "dilation_h": 1, "dilation_w": 1,
|
||||
"pad_d_left": 1, "pad_d_right": 1,
|
||||
"pad_h_left": 1, "pad_h_right": 1,
|
||||
"pad_w_left": 1, "pad_w_right": 1
|
||||
},
|
||||
{
|
||||
"_comment": "Multi-batch 3D: larger batch and channels",
|
||||
"N": 2, "D": 8, "H": 8, "W": 8, "C": 64,
|
||||
"Z": 2, "Y": 2, "X": 2,
|
||||
"stride_d": 2, "stride_h": 2, "stride_w": 2,
|
||||
"dilation_d": 1, "dilation_h": 1, "dilation_w": 1,
|
||||
"pad_d_left": 0, "pad_d_right": 0,
|
||||
"pad_h_left": 0, "pad_h_right": 0,
|
||||
"pad_w_left": 0, "pad_w_right": 0
|
||||
}
|
||||
]
|
||||
},
|
||||
"tile_config": {
|
||||
"block_m": {"values": [64, 128, 256, 512]},
|
||||
"block_n": {"values": [1]},
|
||||
"warp_m": {"values": [1, 2, 4]},
|
||||
"warp_n": {"values": [1]},
|
||||
"warp_tile_m": {"values": [64, 128, 256]},
|
||||
"warp_tile_n": {"values": [1]},
|
||||
"thread_tile_m": {"values": [1, 2, 4]},
|
||||
"thread_tile_n": {"values": [1]}
|
||||
},
|
||||
"trait_config": {
|
||||
"reduce_op": {"values": ["max", "avg"]},
|
||||
"output_index": {"values": [true, false]},
|
||||
"propagate_nan": {"values": [true, false]},
|
||||
"pooling_dim": {"values": ["2d", "3d"]}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,60 @@
|
||||
{
|
||||
"problem": {
|
||||
"description": "Basic pooling functionality validation with moderate problem sizes"
|
||||
},
|
||||
"test_params": {
|
||||
"problem_sizes_2d": [
|
||||
{
|
||||
"N": 1, "H": 8, "W": 8, "C": 32,
|
||||
"Y": 2, "X": 2,
|
||||
"stride_h": 2, "stride_w": 2,
|
||||
"dilation_h": 1, "dilation_w": 1,
|
||||
"pad_h_left": 0, "pad_h_right": 0,
|
||||
"pad_w_left": 0, "pad_w_right": 0
|
||||
},
|
||||
{
|
||||
"N": 2, "H": 16, "W": 16, "C": 32,
|
||||
"Y": 3, "X": 3,
|
||||
"stride_h": 2, "stride_w": 2,
|
||||
"dilation_h": 1, "dilation_w": 1,
|
||||
"pad_h_left": 1, "pad_h_right": 1,
|
||||
"pad_w_left": 1, "pad_w_right": 1
|
||||
},
|
||||
{
|
||||
"N": 1, "H": 32, "W": 32, "C": 64,
|
||||
"Y": 2, "X": 2,
|
||||
"stride_h": 2, "stride_w": 2,
|
||||
"dilation_h": 1, "dilation_w": 1,
|
||||
"pad_h_left": 0, "pad_h_right": 0,
|
||||
"pad_w_left": 0, "pad_w_right": 0
|
||||
}
|
||||
],
|
||||
"problem_sizes_3d": [
|
||||
{
|
||||
"N": 1, "D": 4, "H": 4, "W": 4, "C": 32,
|
||||
"Z": 2, "Y": 2, "X": 2,
|
||||
"stride_d": 2, "stride_h": 2, "stride_w": 2,
|
||||
"dilation_d": 1, "dilation_h": 1, "dilation_w": 1,
|
||||
"pad_d_left": 0, "pad_d_right": 0,
|
||||
"pad_h_left": 0, "pad_h_right": 0,
|
||||
"pad_w_left": 0, "pad_w_right": 0
|
||||
}
|
||||
]
|
||||
},
|
||||
"tile_config": {
|
||||
"block_m": {"values": [128]},
|
||||
"block_n": {"values": [1]},
|
||||
"warp_m": {"values": [1]},
|
||||
"warp_n": {"values": [1]},
|
||||
"warp_tile_m": {"values": [128]},
|
||||
"warp_tile_n": {"values": [1]},
|
||||
"thread_tile_m": {"values": [2]},
|
||||
"thread_tile_n": {"values": [1]}
|
||||
},
|
||||
"trait_config": {
|
||||
"reduce_op": {"values": ["max"]},
|
||||
"output_index": {"values": [true]},
|
||||
"propagate_nan": {"values": [false]},
|
||||
"pooling_dim": {"values": ["2d"]}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user