ckTileEngine pooling

This commit is contained in:
Aleksander Dudek
2026-02-10 12:50:42 +00:00
parent 3f04d27b68
commit 2c2125f73e
15 changed files with 2300 additions and 0 deletions

View File

@@ -0,0 +1,298 @@
# Copyright (c) Advanced Micro Devices, Inc., or its affiliates.
# SPDX-License-Identifier: MIT
# ============================================================================
# Pooling Tile Engine Unit Tests
#
# This CMake file creates unit tests for tile_engine generated pooling kernels.
# Each kernel configuration gets its own test executable.
# ============================================================================
# Locate tile_engine pooling scripts directory
set(TILE_ENGINE_POOLING_DIR "${PROJECT_SOURCE_DIR}/tile_engine/ops/pooling")
if(NOT EXISTS ${TILE_ENGINE_POOLING_DIR})
message(WARNING "Tile engine pooling directory not found: ${TILE_ENGINE_POOLING_DIR}")
return()
endif()
# ============================================================================
# create_individual_pool_test_target
#
# Creates a single test executable for a specific pooling kernel configuration.
#
# Parameters:
# datatype - Data type (fp16, fp32, bf16)
# config_name - Configuration file name without .json extension
# trait - Kernel trait combination string
# tile_config - Tile configuration parameters
# config_json - Full path to JSON configuration file
# ============================================================================
function(create_individual_pool_test_target datatype config_name trait tile_config config_json)
set(target_name "test_pooling_tile_engine_${datatype}_${config_name}_${trait}_${tile_config}")
set(working_path "${CMAKE_CURRENT_BINARY_DIR}/${datatype}/${config_name}")
# Generated header path (already created during cmake configuration)
set(test_header "${working_path}/pooling_single_pool_${datatype}_${trait}_${tile_config}.hpp")
set(test_params_header "${working_path}/test_params.hpp")
# Verify header exists
if(NOT EXISTS ${test_header})
message(WARNING "Generated header not found: ${test_header}")
return()
endif()
# Verify test parameters header exists
if(NOT EXISTS ${test_params_header})
message(WARNING "Test parameters header not found: ${test_params_header}")
return()
endif()
# Create GTest executable for this kernel configuration
add_gtest_executable(${target_name}
${CMAKE_CURRENT_SOURCE_DIR}/test_pooling_simple.cpp
)
# Configure GPU architectures for HIP compilation
set_property(TARGET ${target_name} PROPERTY HIP_ARCHITECTURES ${POOLING_TEST_GPU_TARGETS})
# Define preprocessor macros for generated header location and test parameters
target_compile_definitions(${target_name} PRIVATE
POOLING_SINGLE_INSTANCE_HPP="${test_header}"
POOLING_TEST_PARAMS_HPP="${test_params_header}"
)
# Include directories for headers and dependencies
target_include_directories(${target_name} PRIVATE
${PROJECT_SOURCE_DIR}/include
${PROJECT_BINARY_DIR}/include
${PROJECT_SOURCE_DIR} # Root directory for tile_engine access
${GTEST_INCLUDE_DIRS}
)
# Compiler options matching tile_engine requirements
target_compile_options(${target_name} PRIVATE
-Wno-undefined-func-template
-Wno-float-equal
--offload-compress
-include ${test_header}
)
# Add FP8 format definitions for proper data type interpretation
if(CK_USE_OCP_FP8)
target_compile_options(${target_name} PRIVATE -DCK_TILE_USE_OCP_FP8)
endif()
message(STATUS " Created test target: ${target_name}")
endfunction()
# ============================================================================
# build_pool_test_targets
#
# Builds all test targets for a specific datatype/config combination.
# Uses tile_engine's two-step process: list kernels, then generate tests.
#
# Parameters:
# datatype - Data type (fp16, fp32, bf16)
# config_name - Configuration file name without .json extension
# ============================================================================
function(build_pool_test_targets datatype config_name)
set(working_path "${CMAKE_CURRENT_BINARY_DIR}/${datatype}/${config_name}")
# Locate and validate configuration file
set(config_filename "${config_name}.json")
set(json_blob "${CMAKE_CURRENT_SOURCE_DIR}/configs/${config_filename}")
if(NOT EXISTS ${json_blob})
message(WARNING "Test config file not found: ${json_blob}")
return()
endif()
# Prepare build directory for this configuration
file(MAKE_DIRECTORY ${working_path})
# STEP 1: Discovery phase - list all valid kernel configurations
execute_process(
COMMAND ${Python3_EXECUTABLE} -u ${TILE_ENGINE_POOLING_DIR}/pooling_instance_builder.py
--working_path ${working_path}
--datatype ${datatype}
--config_json ${json_blob}
--list_kernels
WORKING_DIRECTORY ${TILE_ENGINE_POOLING_DIR}
RESULT_VARIABLE ret
OUTPUT_VARIABLE list_output
ERROR_VARIABLE list_error
)
if(NOT ret EQUAL 0)
message(WARNING "Failed to list pooling kernels for ${datatype}_${config_name}: ${list_error}")
return()
endif()
# Verify kernel list file was generated
if(NOT EXISTS ${working_path}/pool_kernel_list.txt)
message(STATUS "No pooling kernels found for ${datatype}_${config_name}")
return()
endif()
message(STATUS "Building pooling tests for ${datatype}_${config_name}")
# STEP 2a: Determine pooling dimension from config
# Read the trait config to find pooling_dim
file(READ ${json_blob} config_content)
string(FIND "${config_content}" "\"3d\"" found_3d)
if(found_3d GREATER -1)
set(pooling_dim "3d")
else()
set(pooling_dim "2d")
endif()
# STEP 2b: Extract test parameters from config
set(test_params_file "${working_path}/test_params.hpp")
execute_process(
COMMAND ${Python3_EXECUTABLE} -u ${CMAKE_CURRENT_SOURCE_DIR}/extract_test_params.py
--config_file ${json_blob}
--output_file ${test_params_file}
--pooling_dim ${pooling_dim}
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
RESULT_VARIABLE extract_ret
OUTPUT_VARIABLE extract_output
ERROR_VARIABLE extract_error
)
if(NOT extract_ret EQUAL 0)
message(WARNING "Failed to extract test parameters for pooling ${datatype}: ${extract_error}")
return()
endif()
# STEP 2c: Header generation phase - generate headers using --gen_single
message(STATUS " Generating pooling headers using --gen_single...")
file(STRINGS ${working_path}/pool_kernel_list.txt kernel_lines)
set(gen_count 0)
foreach(line IN LISTS kernel_lines)
# Parse kernel specification format: kernel_name|tile_config|trait_combo
string(REPLACE "|" ";" parts "${line}")
list(LENGTH parts parts_len)
if(parts_len EQUAL 3)
list(GET parts 0 kernel_name)
list(GET parts 1 tile_config)
list(GET parts 2 trait_combo)
# Generate header using --gen_single
execute_process(
COMMAND ${Python3_EXECUTABLE} -u ${TILE_ENGINE_POOLING_DIR}/pooling_instance_builder.py
--working_path ${working_path}
--datatype ${datatype}
--config_json ${json_blob}
--gen_single
--kernel_name "${kernel_name}"
--tile_config "${tile_config}"
--trait_combo "${trait_combo}"
WORKING_DIRECTORY ${TILE_ENGINE_POOLING_DIR}
RESULT_VARIABLE gen_ret
OUTPUT_VARIABLE gen_output
ERROR_VARIABLE gen_error
)
if(NOT gen_ret EQUAL 0)
message(WARNING "Failed to generate pooling header for ${kernel_name}: ${gen_error}")
else()
math(EXPR gen_count "${gen_count} + 1")
endif()
endif()
endforeach()
message(STATUS " Generated ${gen_count} pooling headers for ${datatype}")
# STEP 3: Target creation phase - create test targets
message(STATUS " Creating pooling test targets...")
file(STRINGS ${working_path}/pool_kernel_list.txt kernel_lines)
set(test_count 0)
foreach(line IN LISTS kernel_lines)
string(REPLACE "|" ";" parts "${line}")
list(LENGTH parts parts_len)
if(parts_len EQUAL 3)
list(GET parts 0 kernel_name)
list(GET parts 1 tile_config)
list(GET parts 2 trait_combo)
create_individual_pool_test_target("${datatype}" "${config_name}" "${trait_combo}" "${tile_config}" "${json_blob}")
math(EXPR test_count "${test_count} + 1")
endif()
endforeach()
message(STATUS " Created ${test_count} pooling test targets for ${datatype}")
endfunction()
# ============================================================================
# MAIN EXECUTION - Test Target Generation
# ============================================================================
message(STATUS "=== Starting Pooling Tile Engine Test Configuration ===")
message(STATUS "SUPPORTED_GPU_TARGETS: ${SUPPORTED_GPU_TARGETS}")
# GPU architecture filtering - only build tests for supported architectures
set(POOLING_TEST_GPU_TARGETS "")
set(DESIRED_TARGETS "gfx90a;gfx942")
foreach(target IN LISTS SUPPORTED_GPU_TARGETS)
if(target IN_LIST DESIRED_TARGETS)
list(APPEND POOLING_TEST_GPU_TARGETS ${target})
message(STATUS " Adding GPU target for pooling tests: ${target}")
endif()
endforeach()
# Early exit if no compatible GPU architectures are available
if(NOT POOLING_TEST_GPU_TARGETS)
message(WARNING "Skipping Pooling Tile Engine tests: No supported GPU targets (gfx90a, gfx942) found in SUPPORTED_GPU_TARGETS: ${SUPPORTED_GPU_TARGETS}")
return()
endif()
message(STATUS "Building Pooling tile engine tests for GPU targets: ${POOLING_TEST_GPU_TARGETS}")
# Enable parallel compilation optimizations
set_property(GLOBAL PROPERTY JOB_POOLS
compile_heavy=4
compile_normal=16
)
# Enable compiler cache if available and explicitly requested
option(ENABLE_CCACHE_TESTS "Enable ccache for test compilation" OFF)
if(ENABLE_CCACHE_TESTS)
find_program(CCACHE_PROGRAM ccache)
if(CCACHE_PROGRAM)
set(CMAKE_CXX_COMPILER_LAUNCHER ${CCACHE_PROGRAM})
message(STATUS "Using ccache for faster test compilation")
else()
message(WARNING "ccache requested but not found")
endif()
else()
message(STATUS "ccache disabled for tests (use -DENABLE_CCACHE_TESTS=ON to enable)")
endif()
# ============================================================================
# Test Configuration Matrix
# ============================================================================
set(TEST_DATATYPES "fp16;fp32")
# ============================================================================
# Test Target Generation
# ============================================================================
# SIMPLE TEST: Basic functionality validation
set(SIMPLE_TEST_CONFIG "simple_test_config")
set(SIMPLE_TEST_CONFIG_FILE "${CMAKE_CURRENT_SOURCE_DIR}/configs/${SIMPLE_TEST_CONFIG}.json")
if(EXISTS ${SIMPLE_TEST_CONFIG_FILE})
message(STATUS "Processing pooling simple test config: ${SIMPLE_TEST_CONFIG}")
foreach(datatype IN LISTS TEST_DATATYPES)
build_pool_test_targets("${datatype}" "${SIMPLE_TEST_CONFIG}")
endforeach()
else()
message(WARNING "Pooling simple test config file not found: ${SIMPLE_TEST_CONFIG_FILE}")
endif()
message(STATUS "Pooling tile engine tests configured:")
message(STATUS " - Simple test: fp16/fp32")

View File

@@ -0,0 +1,88 @@
# Pooling Tile Engine Tests
Unit tests for pooling kernels generated by the tile_engine pooling codegen system.
## Overview
These tests validate pooling kernels that are generated at CMake configuration time
by `pooling_instance_builder.py`. Each kernel configuration (tile shape + traits)
gets its own GTest executable that verifies correctness against a CPU reference
implementation.
## Architecture
```
test/ck_tile/pooling_tile_engine/
├── CMakeLists.txt # Build infrastructure
├── configs/
│ └── simple_test_config.json # Test configuration with problem sizes
├── extract_test_params.py # Extracts problem sizes to C++ header
├── test_pooling_simple.cpp # GTest driver (parameterized)
└── README.md # This file
```
### Build Flow
1. **CMake configuration**: `CMakeLists.txt` invokes `pooling_instance_builder.py --list_kernels`
to discover valid kernel configurations from the JSON config.
2. **Parameter extraction**: `extract_test_params.py` generates `test_params.hpp` with
problem sizes from the JSON config.
3. **Header generation**: For each kernel, `pooling_instance_builder.py --gen_single`
generates a C++ header defining `SelectedKernel` with the specific tile configuration.
4. **Compilation**: Each kernel gets a separate test executable compiled with the
generated header via `-include`.
5. **Execution**: GTest runs each problem size as a separate test case, comparing
device results against the CPU reference.
## Configuration
### `simple_test_config.json`
Defines:
- **tile_config**: Block/warp/thread tile dimensions for PoolShape
- **trait_config**: Reduce op (max/avg), output_index, propagate_nan, pooling_dim (2d/3d)
- **test_params**: Problem sizes (N, H, W, C, window, stride, dilation, padding)
### Supported configurations
- **Data types**: fp16, fp32
- **Reduce operations**: max (with index output)
- **Pooling dimensions**: 2D (NHWC), 3D (NDHWC)
- **GPU targets**: gfx90a, gfx942
## Building
```bash
# From the build directory:
cmake --build . --target test_pooling_tile_engine_fp16_simple_test_config_max_true_false_2d_128x1_1x1_128x1_2x1
# Or build all pooling tests:
cmake --build . --target tests
```
## Running
```bash
# Run a specific test:
./test_pooling_tile_engine_fp16_simple_test_config_max_true_false_2d_128x1_1x1_128x1_2x1
# Run with GTest filters:
./test_pooling_tile_engine_fp16_simple_test_config_max_true_false_2d_128x1_1x1_128x1_2x1 --gtest_filter="*BasicFunctionality*"
```
## Relationship to tile_engine
The tile_engine pooling op lives at `tile_engine/ops/pooling/` and provides:
- `pooling_instance_builder.py` - Codegen for kernel headers
- `pooling_validation_utils.py` - Configuration validation
- `pooling_common.hpp` - Shared trait definitions
- `pooling_benchmark.hpp` - Problem/metric definitions
- `pooling_profiler.hpp` - Benchmark profiling
- `pooling_benchmark_single.cpp` - Single-kernel benchmark entry point
The underlying ck_tile pooling kernel lives at `include/ck_tile/ops/pooling/` and provides:
- `PoolKernel` - GPU kernel implementation
- `PoolProblem` - Problem parameterization
- `PoolShape` - Tile shape specification
- `PoolDefaultPolicy` - Tile distribution and reduction policies

View File

@@ -0,0 +1,60 @@
{
"problem": {
"description": "Basic pooling functionality validation with moderate problem sizes"
},
"test_params": {
"problem_sizes_2d": [
{
"N": 1, "H": 8, "W": 8, "C": 32,
"Y": 2, "X": 2,
"stride_h": 2, "stride_w": 2,
"dilation_h": 1, "dilation_w": 1,
"pad_h_left": 0, "pad_h_right": 0,
"pad_w_left": 0, "pad_w_right": 0
},
{
"N": 2, "H": 16, "W": 16, "C": 32,
"Y": 3, "X": 3,
"stride_h": 2, "stride_w": 2,
"dilation_h": 1, "dilation_w": 1,
"pad_h_left": 1, "pad_h_right": 1,
"pad_w_left": 1, "pad_w_right": 1
},
{
"N": 1, "H": 32, "W": 32, "C": 64,
"Y": 2, "X": 2,
"stride_h": 2, "stride_w": 2,
"dilation_h": 1, "dilation_w": 1,
"pad_h_left": 0, "pad_h_right": 0,
"pad_w_left": 0, "pad_w_right": 0
}
],
"problem_sizes_3d": [
{
"N": 1, "D": 4, "H": 4, "W": 4, "C": 32,
"Z": 2, "Y": 2, "X": 2,
"stride_d": 2, "stride_h": 2, "stride_w": 2,
"dilation_d": 1, "dilation_h": 1, "dilation_w": 1,
"pad_d_left": 0, "pad_d_right": 0,
"pad_h_left": 0, "pad_h_right": 0,
"pad_w_left": 0, "pad_w_right": 0
}
]
},
"tile_config": {
"block_m": {"values": [128]},
"block_n": {"values": [1]},
"warp_m": {"values": [1]},
"warp_n": {"values": [1]},
"warp_tile_m": {"values": [128]},
"warp_tile_n": {"values": [1]},
"thread_tile_m": {"values": [2]},
"thread_tile_n": {"values": [1]}
},
"trait_config": {
"reduce_op": {"values": ["max"]},
"output_index": {"values": [true]},
"propagate_nan": {"values": [false]},
"pooling_dim": {"values": ["2d"]}
}
}

View File

@@ -0,0 +1,139 @@
#!/usr/bin/env python3
# Copyright (c) Advanced Micro Devices, Inc., or its affiliates.
# SPDX-License-Identifier: MIT
"""
Extract pooling test parameters from config JSON and write to C++ header.
Generates test_params.hpp with problem sizes for parameterized GTest.
"""
import json
import argparse
import os
from pathlib import Path
def extract_test_params(config_file, output_file, pooling_dim="2d"):
"""Extract test parameters from config JSON and write to output file"""
with open(config_file, "r") as f:
config = json.load(f)
# Extract test parameters based on pooling dimension
test_params = []
if pooling_dim == "2d":
if "test_params" in config and "problem_sizes_2d" in config["test_params"]:
test_params = config["test_params"]["problem_sizes_2d"]
else:
# Default 2D test parameters
test_params = [
{
"N": 1, "H": 8, "W": 8, "C": 32,
"Y": 2, "X": 2,
"stride_h": 2, "stride_w": 2,
"dilation_h": 1, "dilation_w": 1,
"pad_h_left": 0, "pad_h_right": 0,
"pad_w_left": 0, "pad_w_right": 0,
},
{
"N": 2, "H": 16, "W": 16, "C": 32,
"Y": 3, "X": 3,
"stride_h": 2, "stride_w": 2,
"dilation_h": 1, "dilation_w": 1,
"pad_h_left": 1, "pad_h_right": 1,
"pad_w_left": 1, "pad_w_right": 1,
},
]
else: # 3d
if "test_params" in config and "problem_sizes_3d" in config["test_params"]:
test_params = config["test_params"]["problem_sizes_3d"]
else:
# Default 3D test parameters
test_params = [
{
"N": 1, "D": 4, "H": 4, "W": 4, "C": 32,
"Z": 2, "Y": 2, "X": 2,
"stride_d": 2, "stride_h": 2, "stride_w": 2,
"dilation_d": 1, "dilation_h": 1, "dilation_w": 1,
"pad_d_left": 0, "pad_d_right": 0,
"pad_h_left": 0, "pad_h_right": 0,
"pad_w_left": 0, "pad_w_right": 0,
},
]
# Write to output file in C++ format
output_dir = Path(output_file).parent
output_dir.mkdir(parents=True, exist_ok=True)
with open(output_file, "w") as f:
f.write("// Generated test parameters for pooling tile_engine tests\n")
f.write("// This file is auto-generated during CMake configuration\n\n")
if pooling_dim == "2d":
f.write(
"static const std::vector<PoolTestParams2D> CONFIG_TEST_PARAMS = {\n"
)
for i, params in enumerate(test_params):
comma = "," if i < len(test_params) - 1 else ""
f.write(
f" {{"
f"{params['N']}, {params['H']}, {params['W']}, {params['C']}, "
f"{params['Y']}, {params['X']}, "
f"{params['stride_h']}, {params['stride_w']}, "
f"{params['dilation_h']}, {params['dilation_w']}, "
f"{params['pad_h_left']}, {params['pad_h_right']}, "
f"{params['pad_w_left']}, {params['pad_w_right']}"
f"}}{comma}\n"
)
f.write("};\n")
else: # 3d
f.write(
"static const std::vector<PoolTestParams3D> CONFIG_TEST_PARAMS = {\n"
)
for i, params in enumerate(test_params):
comma = "," if i < len(test_params) - 1 else ""
f.write(
f" {{"
f"{params['N']}, {params['D']}, {params['H']}, {params['W']}, {params['C']}, "
f"{params['Z']}, {params['Y']}, {params['X']}, "
f"{params['stride_d']}, {params['stride_h']}, {params['stride_w']}, "
f"{params['dilation_d']}, {params['dilation_h']}, {params['dilation_w']}, "
f"{params['pad_d_left']}, {params['pad_d_right']}, "
f"{params['pad_h_left']}, {params['pad_h_right']}, "
f"{params['pad_w_left']}, {params['pad_w_right']}"
f"}}{comma}\n"
)
f.write("};\n")
print(
f"Extracted {len(test_params)} {pooling_dim} test parameters from {config_file} -> {output_file}"
)
def main():
parser = argparse.ArgumentParser(
description="Extract pooling test parameters from config JSON"
)
parser.add_argument("--config_file", required=True, help="Input config JSON file")
parser.add_argument(
"--output_file", required=True, help="Output test parameters file"
)
parser.add_argument(
"--pooling_dim",
default="2d",
choices=["2d", "3d"],
help="Pooling dimension (2d or 3d)",
)
args = parser.parse_args()
if not os.path.exists(args.config_file):
print(f"Error: Config file not found: {args.config_file}")
return 1
extract_test_params(args.config_file, args.output_file, args.pooling_dim)
return 0
if __name__ == "__main__":
exit(main())

View File

@@ -0,0 +1,240 @@
// Copyright (c) Advanced Micro Devices, Inc., or its affiliates.
// SPDX-License-Identifier: MIT
/**
* @file test_pooling_simple.cpp
* @brief Unit tests for pooling kernels generated by pooling_instance_builder
*
* This test includes kernels generated during CMake configuration by
* pooling_instance_builder.py and tests them with problem sizes extracted
* from the corresponding JSON configuration files.
*/
#include <gtest/gtest.h>
#include <iostream>
#include <string>
#include <vector>
#include "ck_tile/core.hpp"
#include "ck_tile/host.hpp"
#include "ck_tile/ops/pooling.hpp"
#include "ck_tile/host/reference/reference_pool.hpp"
#include "tile_engine/ops/pooling/pooling_common.hpp"
// The kernel header is included via compile command line with -include flag
// It defines: SelectedKernel, KERNEL_NAME, InDataType, OutDataType,
// ComputeDataType, IndexDataType, ReduceOpType,
// TensorShape, WindowShape, POOLING_DIM
// ============================================================================
// Test parameter structures
// ============================================================================
/// @brief Test parameters for 2D pooling
struct PoolTestParams2D
{
int N, H, W, C; // Input dimensions (NHWC)
int Y, X; // Window size
int stride_h, stride_w; // Strides
int dilation_h, dilation_w; // Dilations
int pad_h_left, pad_h_right; // Height padding
int pad_w_left, pad_w_right; // Width padding
};
/// @brief Test parameters for 3D pooling
struct PoolTestParams3D
{
int N, D, H, W, C; // Input dimensions (NDHWC)
int Z, Y, X; // Window size
int stride_d, stride_h, stride_w; // Strides
int dilation_d, dilation_h, dilation_w; // Dilations
int pad_d_left, pad_d_right; // Depth padding
int pad_h_left, pad_h_right; // Height padding
int pad_w_left, pad_w_right; // Width padding
};
// Include config-specific test parameters (after parameter structs are defined)
#ifdef POOLING_TEST_PARAMS_HPP
#include POOLING_TEST_PARAMS_HPP
#endif
// ============================================================================
// 2D Pooling Tests
// ============================================================================
class PoolingTileEngineTest2D : public ::testing::TestWithParam<PoolTestParams2D>
{
protected:
void SetUp() override
{
auto params = GetParam();
N_ = params.N;
H_ = params.H;
W_ = params.W;
C_ = params.C;
Y_ = params.Y;
X_ = params.X;
stride_h_ = params.stride_h;
stride_w_ = params.stride_w;
dilation_h_ = params.dilation_h;
dilation_w_ = params.dilation_w;
pad_h_left_ = params.pad_h_left;
pad_h_right_ = params.pad_h_right;
pad_w_left_ = params.pad_w_left;
pad_w_right_ = params.pad_w_right;
// Calculate output dimensions
ck_tile::index_t Ys = (Y_ - 1) * dilation_h_ + 1;
ck_tile::index_t Xs = (X_ - 1) * dilation_w_ + 1;
Ho_ = (H_ + pad_h_left_ + pad_h_right_ - Ys) / stride_h_ + 1;
Wo_ = (W_ + pad_w_left_ + pad_w_right_ - Xs) / stride_w_ + 1;
}
int N_, H_, W_, C_;
int Y_, X_;
int stride_h_, stride_w_;
int dilation_h_, dilation_w_;
int pad_h_left_, pad_h_right_;
int pad_w_left_, pad_w_right_;
int Ho_, Wo_;
};
TEST_P(PoolingTileEngineTest2D, BasicFunctionality)
{
// Create host tensors
ck_tile::HostTensor<InDataType> h_in({N_, H_, W_, C_});
ck_tile::HostTensor<OutDataType> h_out({N_, Ho_, Wo_, C_});
ck_tile::HostTensor<OutDataType> h_out_ref({N_, Ho_, Wo_, C_});
ck_tile::HostTensor<IndexDataType> h_out_index({N_, Ho_, Wo_, C_});
ck_tile::HostTensor<IndexDataType> h_out_ref_index({N_, Ho_, Wo_, C_});
// Initialize input with random data
ck_tile::FillUniformDistribution<InDataType>{-5.f, 5.f}(h_in);
h_out.SetZero();
h_out_ref.SetZero();
// Device memory
ck_tile::DeviceMem d_in(h_in.get_element_space_size_in_bytes());
ck_tile::DeviceMem d_out(h_out.get_element_space_size_in_bytes());
ck_tile::DeviceMem d_out_index(h_out_index.get_element_space_size_in_bytes());
d_in.ToDevice(h_in.data());
d_out.SetZero();
d_out_index.SetZero();
// Build shapes and strides (NHWC layout)
const auto input_shape = ck_tile::make_tuple(N_, H_, W_, C_);
const auto output_shape = ck_tile::make_tuple(N_, Ho_, Wo_, C_);
const auto input_strides = ck_tile::make_tuple(H_ * W_ * C_, W_ * C_, C_, 1);
const auto output_strides = ck_tile::make_tuple(Ho_ * Wo_ * C_, Wo_ * C_, C_, 1);
const auto window_lengths = ck_tile::make_tuple(Y_, X_);
const auto window_strides = ck_tile::make_tuple(stride_h_, stride_w_);
const auto window_dilations = ck_tile::make_tuple(dilation_h_, dilation_w_);
const auto input_left_pads = ck_tile::make_tuple(pad_h_left_, pad_w_left_);
const auto input_right_pads = ck_tile::make_tuple(pad_h_right_, pad_w_right_);
// Build host args for the generated kernel
auto host_args =
ck_tile::PoolHostArgs<decltype(input_shape), decltype(window_lengths)>{
d_in.GetDeviceBuffer(),
d_out.GetDeviceBuffer(),
d_out_index.GetDeviceBuffer(),
input_shape,
output_shape,
input_strides,
output_strides,
window_lengths,
window_strides,
window_dilations,
input_left_pads,
input_right_pads};
// Stream config: no timing overhead for fastest execution
ck_tile::stream_config stream_config{nullptr, false, 0, 0, 1, false, false, 1};
// Launch generated kernel
try
{
SelectedKernel::launch(host_args, stream_config);
}
catch(const std::exception& e)
{
std::string error_msg(e.what());
if(error_msg.find("Arguments not supported") != std::string::npos)
{
GTEST_SKIP() << "Configuration not supported: " << e.what();
}
else
{
FAIL() << "Kernel launch failed: " << e.what();
}
}
// Copy results back
d_out.FromDevice(h_out.data());
d_out_index.FromDevice(h_out_index.data());
// Compute reference on host
auto kernel_args_ref =
ck_tile::PoolKernelArgs<decltype(input_shape), decltype(window_lengths)>{
h_in.data(),
h_out_ref.data(),
h_out_ref_index.data(),
input_shape,
output_shape,
input_strides,
output_strides,
window_lengths,
window_strides,
window_dilations,
input_left_pads,
input_right_pads};
ck_tile::reference_pool2d<InDataType,
ComputeDataType,
OutDataType,
IndexDataType,
ReduceOpType,
decltype(input_shape),
decltype(window_lengths),
SelectedKernel::kOutputIndex>(
h_in, h_out_ref, h_out_ref_index, kernel_args_ref, ReduceOpType{});
// Verify value results
bool pass_value =
ck_tile::check_err(h_out, h_out_ref, "Error: Incorrect values!", 1e-5, 1e-5);
EXPECT_TRUE(pass_value) << "Pooling value verification failed for " << KERNEL_NAME;
// Verify index results if output_index is enabled
if constexpr(SelectedKernel::kOutputIndex)
{
bool pass_index = ck_tile::check_err(
h_out_index, h_out_ref_index, "Error: Incorrect indices!", 0, 0);
EXPECT_TRUE(pass_index) << "Pooling index verification failed for " << KERNEL_NAME;
}
}
TEST_P(PoolingTileEngineTest2D, KernelInfo)
{
EXPECT_TRUE(strlen(KERNEL_NAME) > 0) << "Kernel name should not be empty";
std::cout << "Testing kernel: " << KERNEL_NAME << std::endl;
std::cout << "Problem size: N=" << N_ << " H=" << H_ << " W=" << W_ << " C=" << C_
<< " Window=" << Y_ << "x" << X_ << " Output=" << Ho_ << "x" << Wo_
<< std::endl;
}
// Instantiate test suite with config-specific test parameters
// CONFIG_TEST_PARAMS is defined in the auto-generated test_params.hpp file
INSTANTIATE_TEST_SUITE_P(
PoolingVerification,
PoolingTileEngineTest2D,
::testing::ValuesIn(CONFIG_TEST_PARAMS),
[](const ::testing::TestParamInfo<PoolTestParams2D>& param_info) {
return "N" + std::to_string(param_info.param.N) + "_H" +
std::to_string(param_info.param.H) + "_W" +
std::to_string(param_info.param.W) + "_C" +
std::to_string(param_info.param.C) + "_Y" +
std::to_string(param_info.param.Y) + "_X" +
std::to_string(param_info.param.X);
});