mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-02 12:41:26 +00:00
Ck tile engine gemm unit tests exapand test coverage (#3025)
* initial commit for testing datatypes, layouts and traits * correct warp tile size for small datatype config to make a validate instance for fp16, bf16, fp8 * add tile size coverage test * Cover more tests, parallel instance generation, documentation * update cmakelist to run more tests * initial codes to support add test params in json file * add congurable problem sizes for different tests * modify README.md * clean test_gemm_simple code * correct padding coverage test * Add comprehensive and quick tile size config files * remove fp64 from datatypes * update documents. manage selecting tile_size config (quick or Comprehensive) * correct padding test problem sizes * update comprehensive test and correct documents * Skip GEMM tests with unsupported arguments instead of failing * change gen_single instead of gen_indivisual because of an issue. add splitk tests to tile_size_quick_config * clean CMakeList, remod py file * Refactor test configs: Rename tile_size to coverage, remove separate traits config, clean cmakefile, readme * update fp32, fp8 to test all layouts, clean documents and comments * limit fp32 test layouts to rcr because of compilation error on some gpus * remove fp32 because of the removing from gemm_instance_builder, make quick test smaller, updating comments * Fix fp8/bf8 test failures on gfx950 by adding OCP FP8 format support * Reduce quick_coverage test count from ~250 to ~144 for faster CI
This commit is contained in:
@@ -32,43 +32,35 @@ function(create_individual_gemm_test_target datatype layout config_name trait ti
|
||||
set(target_name "test_gemm_tile_engine_${datatype}_${layout}_${config_name}_${trait}_${tile_config}")
|
||||
set(working_path "${CMAKE_CURRENT_BINARY_DIR}/${datatype}/${layout}/${config_name}")
|
||||
|
||||
# Generated header path for this specific kernel configuration
|
||||
# Generated header path (already created during cmake configuration)
|
||||
set(test_header "${working_path}/gemm_single_${datatype}_${layout}_${trait}_${tile_config}.hpp")
|
||||
set(test_params_header "${working_path}/test_params.hpp")
|
||||
|
||||
# Generate kernel header using tile_engine's Python script
|
||||
add_custom_command(
|
||||
OUTPUT ${test_header}
|
||||
COMMAND ${Python3_EXECUTABLE} ${TILE_ENGINE_GEMM_DIR}/gemm_instance_builder.py
|
||||
--working_path ${working_path}
|
||||
--gpu_target "${GEMM_TEST_GPU_TARGETS}"
|
||||
--datatype ${datatype}
|
||||
--layout ${layout}
|
||||
--config_json ${config_json}
|
||||
--gen_single
|
||||
--kernel_name "test_gemm_${datatype}_${layout}_${trait}_${tile_config}"
|
||||
--tile_config "${tile_config}"
|
||||
--trait_combo "${trait}"
|
||||
DEPENDS ${TILE_ENGINE_GEMM_DIR}/gemm_instance_builder.py ${config_json}
|
||||
COMMENT "Generating test header ${test_header}"
|
||||
VERBATIM
|
||||
)
|
||||
# Verify header exists (should have been generated during cmake configuration)
|
||||
if(NOT EXISTS ${test_header})
|
||||
message(WARNING "Generated header not found: ${test_header}")
|
||||
return()
|
||||
endif()
|
||||
|
||||
# Verify test parameters header exists
|
||||
if(NOT EXISTS ${test_params_header})
|
||||
message(WARNING "Test parameters header not found: ${test_params_header}")
|
||||
return()
|
||||
endif()
|
||||
|
||||
|
||||
# Create GTest executable for this kernel configuration
|
||||
add_gtest_executable(${target_name}
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/test_gemm_simple.cpp
|
||||
)
|
||||
|
||||
# Ensure header is generated before compilation
|
||||
set(header_target "${target_name}_header")
|
||||
add_custom_target(${header_target} DEPENDS ${test_header})
|
||||
add_dependencies(${target_name} ${header_target})
|
||||
|
||||
# Configure GPU architectures for HIP compilation
|
||||
set_property(TARGET ${target_name} PROPERTY HIP_ARCHITECTURES ${GEMM_TEST_GPU_TARGETS})
|
||||
|
||||
# Define preprocessor macros for generated header location
|
||||
# Define preprocessor macros for generated header location and test parameters
|
||||
target_compile_definitions(${target_name} PRIVATE
|
||||
GEMM_SINGLE_INSTANCE_HPP="${test_header}"
|
||||
GEMM_TEST_PARAMS_HPP="${test_params_header}"
|
||||
)
|
||||
|
||||
# Include directories for headers and dependencies
|
||||
@@ -87,6 +79,11 @@ function(create_individual_gemm_test_target datatype layout config_name trait ti
|
||||
-include ${test_header} # Auto-include generated header
|
||||
)
|
||||
|
||||
# Add FP8 format definitions for proper data type interpretation
|
||||
if(CK_USE_OCP_FP8)
|
||||
target_compile_options(${target_name} PRIVATE -DCK_TILE_USE_OCP_FP8)
|
||||
endif()
|
||||
|
||||
message(STATUS " Created test target: ${target_name}")
|
||||
endfunction()
|
||||
|
||||
@@ -107,7 +104,6 @@ function(build_gemm_test_targets datatype layout config_name)
|
||||
# Locate and validate configuration file
|
||||
set(config_filename "${config_name}.json")
|
||||
set(json_blob "${CMAKE_CURRENT_SOURCE_DIR}/configs/${config_filename}")
|
||||
message(STATUS " Using test config: ${config_filename}")
|
||||
|
||||
if(NOT EXISTS ${json_blob})
|
||||
message(WARNING "Test config file not found: ${json_blob}")
|
||||
@@ -118,7 +114,6 @@ function(build_gemm_test_targets datatype layout config_name)
|
||||
file(MAKE_DIRECTORY ${working_path})
|
||||
|
||||
# STEP 1: Discovery phase - list all valid kernel configurations
|
||||
message(STATUS " Listing kernel configurations for ${datatype}_${layout}...")
|
||||
execute_process(
|
||||
COMMAND ${Python3_EXECUTABLE} -u ${TILE_ENGINE_GEMM_DIR}/gemm_instance_builder.py
|
||||
--working_path ${working_path}
|
||||
@@ -134,32 +129,90 @@ function(build_gemm_test_targets datatype layout config_name)
|
||||
)
|
||||
|
||||
if(NOT ret EQUAL 0)
|
||||
message(WARNING "Failed to list kernels for ${datatype}_${layout}: ${list_error}")
|
||||
message(WARNING "Failed to list kernels for ${datatype}_${layout}_${config_name}: ${list_error}")
|
||||
return()
|
||||
endif()
|
||||
|
||||
# Validate kernel discovery results
|
||||
if(EXISTS ${working_path}/gemm_kernel_count.txt)
|
||||
file(READ ${working_path}/gemm_kernel_count.txt kernel_count)
|
||||
string(STRIP "${kernel_count}" kernel_count)
|
||||
message(STATUS " Found ${kernel_count} test configurations for ${datatype}_${layout}")
|
||||
else()
|
||||
message(WARNING "Kernel count file not found for ${datatype}_${layout}")
|
||||
# Verify kernel list file was generated
|
||||
if(NOT EXISTS ${working_path}/gemm_kernel_list.txt)
|
||||
message(STATUS "No kernels found for ${datatype}_${layout}_${config_name} (validation filtered out all combinations)")
|
||||
return()
|
||||
endif()
|
||||
|
||||
# STEP 2: Generation phase - create test targets for each discovered kernel
|
||||
if(EXISTS ${working_path}/gemm_kernel_list.txt)
|
||||
file(STRINGS ${working_path}/gemm_kernel_list.txt kernel_lines)
|
||||
set(test_count 0)
|
||||
foreach(line IN LISTS kernel_lines)
|
||||
# Parse kernel specification format: kernel_name|tile_config|trait_combo
|
||||
string(REPLACE "|" ";" parts "${line}")
|
||||
list(LENGTH parts parts_len)
|
||||
if(parts_len EQUAL 3)
|
||||
list(GET parts 0 kernel_name)
|
||||
list(GET parts 1 tile_config)
|
||||
list(GET parts 2 trait_combo)
|
||||
message(STATUS "Building tests for ${datatype}_${layout}_${config_name}")
|
||||
|
||||
# STEP 2a: Extract test parameters from config
|
||||
set(test_params_file "${working_path}/test_params.hpp")
|
||||
execute_process(
|
||||
COMMAND ${Python3_EXECUTABLE} -u ${CMAKE_CURRENT_SOURCE_DIR}/extract_test_params.py
|
||||
--config_file ${json_blob}
|
||||
--output_file ${test_params_file}
|
||||
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
|
||||
RESULT_VARIABLE extract_ret
|
||||
OUTPUT_VARIABLE extract_output
|
||||
ERROR_VARIABLE extract_error
|
||||
)
|
||||
|
||||
if(NOT extract_ret EQUAL 0)
|
||||
message(WARNING "Failed to extract test parameters for ${datatype}_${layout}: ${extract_error}")
|
||||
return()
|
||||
endif()
|
||||
|
||||
# STEP 2b: Header generation phase - generate headers using --gen_single
|
||||
message(STATUS " Generating headers using --gen_single...")
|
||||
|
||||
file(STRINGS ${working_path}/gemm_kernel_list.txt kernel_lines)
|
||||
set(gen_count 0)
|
||||
|
||||
foreach(line IN LISTS kernel_lines)
|
||||
# Parse kernel specification format: kernel_name|tile_config|trait_combo
|
||||
string(REPLACE "|" ";" parts "${line}")
|
||||
list(LENGTH parts parts_len)
|
||||
if(parts_len EQUAL 3)
|
||||
list(GET parts 0 kernel_name)
|
||||
list(GET parts 1 tile_config)
|
||||
list(GET parts 2 trait_combo)
|
||||
|
||||
# Generate header using --gen_single
|
||||
execute_process(
|
||||
COMMAND ${Python3_EXECUTABLE} -u ${TILE_ENGINE_GEMM_DIR}/gemm_instance_builder.py
|
||||
--working_path ${working_path}
|
||||
--gpu_target "${GEMM_TEST_GPU_TARGETS}"
|
||||
--datatype ${datatype}
|
||||
--layout ${layout}
|
||||
--config_json ${json_blob}
|
||||
--gen_single
|
||||
--kernel_name "${kernel_name}"
|
||||
--tile_config "${tile_config}"
|
||||
--trait_combo "${trait_combo}"
|
||||
WORKING_DIRECTORY ${TILE_ENGINE_GEMM_DIR}
|
||||
RESULT_VARIABLE gen_ret
|
||||
OUTPUT_VARIABLE gen_output
|
||||
ERROR_VARIABLE gen_error
|
||||
)
|
||||
|
||||
if(NOT gen_ret EQUAL 0)
|
||||
message(WARNING "Failed to generate header for ${kernel_name}: ${gen_error}")
|
||||
else()
|
||||
math(EXPR gen_count "${gen_count} + 1")
|
||||
endif()
|
||||
endif()
|
||||
endforeach()
|
||||
|
||||
message(STATUS " Generated ${gen_count} headers for ${datatype}_${layout}")
|
||||
|
||||
# STEP 3: Target creation phase - create test targets
|
||||
message(STATUS " Creating test targets...")
|
||||
file(STRINGS ${working_path}/gemm_kernel_list.txt kernel_lines)
|
||||
set(test_count 0)
|
||||
foreach(line IN LISTS kernel_lines)
|
||||
# Parse kernel specification format: kernel_name|tile_config|trait_combo
|
||||
string(REPLACE "|" ";" parts "${line}")
|
||||
list(LENGTH parts parts_len)
|
||||
if(parts_len EQUAL 3)
|
||||
list(GET parts 0 kernel_name)
|
||||
list(GET parts 1 tile_config)
|
||||
list(GET parts 2 trait_combo)
|
||||
|
||||
# Generate test target for this kernel configuration
|
||||
create_individual_gemm_test_target("${datatype}" "${layout}" "${config_name}" "${trait_combo}" "${tile_config}" "${json_blob}")
|
||||
@@ -167,12 +220,7 @@ function(build_gemm_test_targets datatype layout config_name)
|
||||
endif()
|
||||
endforeach()
|
||||
message(STATUS " Created ${test_count} test targets for ${datatype}_${layout}")
|
||||
else()
|
||||
message(WARNING "Kernel list file not found for ${datatype}_${layout}")
|
||||
endif()
|
||||
endfunction()
|
||||
|
||||
# ============================================================================
|
||||
endfunction()# ============================================================================
|
||||
# MAIN EXECUTION - Test Target Generation
|
||||
# ============================================================================
|
||||
|
||||
@@ -198,42 +246,100 @@ endif()
|
||||
|
||||
message(STATUS "Building GEMM tile engine tests for GPU targets: ${GEMM_TEST_GPU_TARGETS}")
|
||||
|
||||
# ============================================================================
|
||||
# Test Configuration Matrix
|
||||
# ============================================================================
|
||||
# Enable parallel compilation optimizations
|
||||
# Set up job pools for better parallel compilation control
|
||||
set_property(GLOBAL PROPERTY JOB_POOLS
|
||||
compile_heavy=4 # Limit heavy compilations to prevent OOM
|
||||
compile_normal=16 # Allow more parallel normal compilations
|
||||
)
|
||||
|
||||
# Available test configurations (minimal set for fast CI/testing)
|
||||
set(TEST_CONFIGS
|
||||
"simple_test_config"
|
||||
# "medium_tiles_config" # Uncomment for broader testing
|
||||
)
|
||||
|
||||
# Data types for testing (core precision types)
|
||||
set(TEST_DATATYPES "fp16" "bf16")
|
||||
# Extended data type options:
|
||||
# set(TEST_DATATYPES "fp16" "bf16" "fp32" "fp64" "int8")
|
||||
|
||||
# Matrix layouts for testing (row-column-row is most common)
|
||||
set(TEST_LAYOUTS "rcr")
|
||||
# Extended layout options:
|
||||
# set(TEST_LAYOUTS "rcr" "rrr" "ccr" "crr")
|
||||
# Enable compiler cache if available and explicitly requested
|
||||
# Disabled by default due to permission issues in CI environments
|
||||
option(ENABLE_CCACHE_TESTS "Enable ccache for test compilation" OFF)
|
||||
if(ENABLE_CCACHE_TESTS)
|
||||
find_program(CCACHE_PROGRAM ccache)
|
||||
if(CCACHE_PROGRAM)
|
||||
set(CMAKE_CXX_COMPILER_LAUNCHER ${CCACHE_PROGRAM})
|
||||
message(STATUS "Using ccache for faster test compilation")
|
||||
else()
|
||||
message(WARNING "ccache requested but not found")
|
||||
endif()
|
||||
else()
|
||||
message(STATUS "ccache disabled for tests (use -DENABLE_CCACHE_TESTS=ON to enable)")
|
||||
endif()
|
||||
|
||||
# ============================================================================
|
||||
# Test Target Generation Loop
|
||||
# Test Configuration Matrix - Clean Focused Design
|
||||
# ============================================================================
|
||||
|
||||
foreach(datatype IN LISTS TEST_DATATYPES)
|
||||
foreach(layout IN LISTS TEST_LAYOUTS)
|
||||
foreach(config IN LISTS TEST_CONFIGS)
|
||||
set(CONFIG_FILE "${CMAKE_CURRENT_SOURCE_DIR}/configs/${config}.json")
|
||||
if(EXISTS ${CONFIG_FILE})
|
||||
message(STATUS "Building tests for ${datatype}_${layout}_${config}")
|
||||
build_gemm_test_targets("${datatype}" "${layout}" "${config}")
|
||||
else()
|
||||
message(WARNING "Config file not found: ${CONFIG_FILE}")
|
||||
endif()
|
||||
# All supported data types and layouts for comprehensive testing
|
||||
# Note: fp64 not included (no MFMA hardware support)
|
||||
set(TEST_DATATYPES "fp16;fp8;bf16;fp32")
|
||||
set(TEST_LAYOUTS "rcr;rrr;ccr;crr")
|
||||
|
||||
# ============================================================================
|
||||
# Test Target Generation - Datatype-Specific Categories
|
||||
# ============================================================================
|
||||
|
||||
# 1. SMALL DATATYPES: Test optimized config for small data types (fp8, fp16, bf16)
|
||||
# These data types can use larger warp tiles due to smaller memory footprint
|
||||
set(SMALL_DATATYPE_CONFIG "small_datatype_config")
|
||||
set(SMALL_DATATYPE_CONFIG_FILE "${CMAKE_CURRENT_SOURCE_DIR}/configs/${SMALL_DATATYPE_CONFIG}.json")
|
||||
set(SMALL_DATATYPES "fp8;fp16;bf16")
|
||||
|
||||
if(EXISTS ${SMALL_DATATYPE_CONFIG_FILE})
|
||||
message(STATUS "Processing small datatype config: ${SMALL_DATATYPE_CONFIG} (fp8, fp16, bf16)")
|
||||
foreach(datatype IN LISTS SMALL_DATATYPES)
|
||||
# fp8, fp16, bf16: testing all layouts (rcr, rrr, ccr, crr)
|
||||
foreach(layout IN LISTS TEST_LAYOUTS)
|
||||
build_gemm_test_targets("${datatype}" "${layout}" "${SMALL_DATATYPE_CONFIG}")
|
||||
endforeach()
|
||||
endforeach()
|
||||
endforeach()
|
||||
else()
|
||||
message(WARNING "Small datatype config file not found: ${SMALL_DATATYPE_CONFIG_FILE}")
|
||||
endif()
|
||||
|
||||
message(STATUS "GEMM tile engine tests configured for ${TEST_DATATYPES} with ${TEST_LAYOUTS} layouts using ${TEST_CONFIGS} configurations")
|
||||
# 2. PADDING COVERAGE: Test padding combinations with fixed fp16/rcr configuration
|
||||
# This focuses on padding behavior (pad_m, pad_n, pad_k)
|
||||
set(PADDING_CONFIG "padding_coverage_config")
|
||||
set(PADDING_CONFIG_FILE "${CMAKE_CURRENT_SOURCE_DIR}/configs/${PADDING_CONFIG}.json")
|
||||
|
||||
if(EXISTS ${PADDING_CONFIG_FILE})
|
||||
message(STATUS "Processing padding config: ${PADDING_CONFIG} (fp16/rcr only)")
|
||||
build_gemm_test_targets("fp16" "rcr" "${PADDING_CONFIG}")
|
||||
else()
|
||||
message(WARNING "Padding config file not found: ${PADDING_CONFIG_FILE}")
|
||||
endif()
|
||||
|
||||
# 3. COVERAGE LEVEL: Quick or comprehensive testing
|
||||
# Quick: ~144 kernels with multiple tile sizes and trait combinations
|
||||
# Comprehensive: Several thousand kernels with extensive tile sizes, warp configurations, and all trait combinations
|
||||
set(COVERAGE_LEVEL "quick" CACHE STRING "Coverage level: quick or comprehensive")
|
||||
set_property(CACHE COVERAGE_LEVEL PROPERTY STRINGS "quick" "comprehensive")
|
||||
|
||||
if(COVERAGE_LEVEL STREQUAL "quick")
|
||||
set(COVERAGE_CONFIG "quick_coverage_config")
|
||||
set(COVERAGE_DESC "Quick - approximately 144 kernels with trait combinations")
|
||||
elseif(COVERAGE_LEVEL STREQUAL "comprehensive")
|
||||
set(COVERAGE_CONFIG "comprehensive_coverage_config")
|
||||
set(COVERAGE_DESC "Comprehensive - several thousand kernels with extensive tile and trait coverage")
|
||||
else()
|
||||
message(FATAL_ERROR "Invalid COVERAGE_LEVEL: ${COVERAGE_LEVEL}. Must be 'quick' or 'comprehensive'")
|
||||
endif()
|
||||
|
||||
set(COVERAGE_CONFIG_FILE "${CMAKE_CURRENT_SOURCE_DIR}/configs/${COVERAGE_CONFIG}.json")
|
||||
|
||||
if(EXISTS ${COVERAGE_CONFIG_FILE})
|
||||
message(STATUS "Processing coverage config: ${COVERAGE_LEVEL} - ${COVERAGE_DESC}")
|
||||
build_gemm_test_targets("fp16" "rcr" "${COVERAGE_CONFIG}")
|
||||
else()
|
||||
message(WARNING "Coverage config file not found: ${COVERAGE_CONFIG_FILE}")
|
||||
endif()
|
||||
# ============================================================================
|
||||
|
||||
|
||||
message(STATUS "GEMM tile engine tests configured with datatype-specific design:")
|
||||
message(STATUS " - Small datatypes: fp8/fp16/bf16 (all layouts)")
|
||||
message(STATUS " - Padding coverage with fp16/rcr")
|
||||
message(STATUS " - Coverage level: ${COVERAGE_LEVEL} (~144 kernels quick, several thousand comprehensive)")
|
||||
message(STATUS " Use -DCOVERAGE_LEVEL=comprehensive for extensive testing")
|
||||
|
||||
@@ -17,11 +17,69 @@ JSON Config → tile_engine Python scripts → Generated Headers → Test Execut
|
||||
```
|
||||
|
||||
- **`--list_kernels`**: Get available kernel configurations from JSON
|
||||
- **`--gen_individual`**: Generate all kernel headers in parallel during CMake configuration
|
||||
- **`--gen_single`**: Generate individual kernel header for each configuration
|
||||
- **Same verification**: Uses tile_engine's adaptive error thresholds and reference calculations
|
||||
- **Same patterns**: Follows tile_engine's tensor initialization, stride calculation, and kernel launching
|
||||
|
||||
### Config-Specific Test Parameters
|
||||
|
||||
Each test configuration can specify optimized problem sizes in its JSON file:
|
||||
- **`test_params.problem_sizes`**: Array of `{m, n, k, split_k}` configurations
|
||||
- **CMake extraction**: `extract_test_params.py` generates config-specific test parameter files
|
||||
- **Build integration**: Each test target uses parameters appropriate for its kernel configuration
|
||||
- **Optimized testing**: Different configs test different problem sizes that showcase their strengths
|
||||
|
||||
|
||||
The key idea: **Unit tests that use tile_engine's exact kernel generation and verification methodology** instead of creating separate test infrastructure.
|
||||
|
||||
## Test Configurations
|
||||
|
||||
### 1. **Simple Test** (`simple_test_config.json`)
|
||||
- **Purpose**: Basic functionality validation
|
||||
- **Config**: 128x128x64, warp 2x2x1, warp_tile 16x16x16
|
||||
- **Traits**: compv3 + compv4 pipelines
|
||||
- **Coverage**: ~2 kernels per datatype/layout
|
||||
|
||||
### 2. **Small Datatype** (`small_datatype_config.json`)
|
||||
- **Purpose**: Optimized for fp8/fp16/bf16 data types
|
||||
- **Config**: 128x128x32, warp 2x2x1, warp_tile 32x32x16
|
||||
- **Traits**: compv3 pipeline only
|
||||
- **Coverage**: All 4 layouts (rcr, rrr, ccr, crr) for fp8, fp16, bf16
|
||||
|
||||
### 3. **Padding Coverage** (`padding_coverage_config.json`)
|
||||
- **Purpose**: Test padding behavior with all padding flags enabled
|
||||
- **Config**: Fixed 64x64x32, warp 2x2x1, warp_tile 32x32x16
|
||||
- **Padding**: All enabled (pad_m=true, pad_n=true, pad_k=true)
|
||||
- **Problem sizes**: Vector-aligned but not tile-aligned (104×104×56, 200×152×80, 152×200×64)
|
||||
- **Coverage**: 1 kernel configuration testing padding with irregular sizes
|
||||
|
||||
### 4. **Coverage Testing** (Quick or Comprehensive)
|
||||
- **Purpose**: Comprehensive testing across tile sizes, warp configurations, and trait combinations
|
||||
- **Quick** (`quick_coverage_config.json`): Approximately 144 kernels
|
||||
- tile_m/n: [32, 64, 256], tile_k: [16, 32]
|
||||
- warp config: 2×2×1, warp_tile 16×16×16
|
||||
- Traits: 3 pipelines × 2 epilogues × 2 schedulers (persistent=false only)
|
||||
- Focused set testing trait combinations with multiple tile sizes
|
||||
- **Comprehensive** (`comprehensive_coverage_config.json`): Several thousand kernels
|
||||
- tile_m/n: [16-256 step 16]
|
||||
- tile_k: [16, 32, 64]
|
||||
- warp_m/n: [1, 2, 4], warp_tile_m/n: [16, 32], warp_tile_k: [16, 32]
|
||||
- Traits: 3 pipelines × 2 epilogues × 2 schedulers × 2 persistent
|
||||
- Extensive coverage across all tile sizes, warp configurations, and trait combinations
|
||||
- Exact count varies based on validation filtering
|
||||
- **Note**: Use CMake option `-DCOVERAGE_LEVEL=comprehensive` to enable comprehensive testing (default is quick)
|
||||
|
||||
## Data Type Support
|
||||
- ✅ **fp8, fp16, bf16**: Fully supported - all layouts (rcr, rrr, ccr, crr)
|
||||
- ❌ **fp64**: Not supported (hardware MFMA limitation)
|
||||
- ⏳ **fp32, bf8, pk-int4-t**: Not yet supported by gemm_instance_builder (will be added later)
|
||||
|
||||
## Test Result Behavior
|
||||
|
||||
Tests automatically handle unsupported configurations through runtime validation:
|
||||
- **PASSED**: Kernel executed correctly with results within error thresholds ✅
|
||||
- **SKIPPED**: Kernel validation returned "Arguments not supported" (expected for certain problem sizes/configurations) ⚠️
|
||||
- **FAILED**: Actual error or incorrect computation results ❌
|
||||
|
||||
When a kernel's `IsSupportedArgument()` check fails (e.g., due to vector alignment requirements, dimension constraints, or padding limitations), the test is automatically skipped rather than failed. This allows comprehensive testing across various problem sizes while gracefully handling configurations that don't meet specific kernel requirements.
|
||||
|
||||
@@ -0,0 +1,37 @@
|
||||
{
|
||||
"problem": {
|
||||
"description": "Comprehensive coverage testing - extensive tile size coverage (16-256, step 16) with multiple warp configurations and all trait combinations. Several thousand kernels."
|
||||
},
|
||||
"test_params": {
|
||||
"problem_sizes": [
|
||||
{"m": 512, "n": 512, "k": 256, "split_k": 1},
|
||||
{"m": 1024, "n": 512, "k": 512, "split_k": 1},
|
||||
{"m": 512, "n": 1024, "k": 512, "split_k": 1},
|
||||
{"m": 1024, "n": 1024, "k": 256, "split_k": 1},
|
||||
{"m": 1024, "n": 1024, "k": 256, "split_k": 2},
|
||||
{"m": 1024, "n": 1024, "k": 256, "split_k": 4}
|
||||
]
|
||||
},
|
||||
"tile_config": {
|
||||
"tile_m": {"values": [16, 32, 48, 64, 80, 96, 112, 128, 144, 160, 176, 192, 208, 224, 240, 256]},
|
||||
"tile_n": {"values": [16, 32, 48, 64, 80, 96, 112, 128, 144, 160, 176, 192, 208, 224, 240, 256]},
|
||||
"tile_k": {"values": [16, 32, 64]},
|
||||
"warp_m": {"values": [1, 2, 4]},
|
||||
"warp_n": {"values": [1, 2, 4]},
|
||||
"warp_k": {"values": [1]},
|
||||
"warp_tile_m": {"values": [16, 32]},
|
||||
"warp_tile_n": {"values": [16, 32]},
|
||||
"warp_tile_k": {"values": [8, 16, 32, 64, 128]}
|
||||
},
|
||||
"trait_config": {
|
||||
"pipeline": {"values": ["mem", "compv3", "compv4"]},
|
||||
"epilogue": {"values": ["default", "cshuffle"]},
|
||||
"scheduler": {"values": ["intrawave", "interwave"]},
|
||||
"pad_m": {"values": [false]},
|
||||
"pad_n": {"values": [false]},
|
||||
"pad_k": {"values": [false]},
|
||||
"persistent": {"values": [true, false]}
|
||||
},
|
||||
"k_block_per_cu": 1,
|
||||
"permute_n": false
|
||||
}
|
||||
@@ -0,0 +1,34 @@
|
||||
{
|
||||
"problem": {
|
||||
"description": "Configuration optimized for large data types (fp32) with smaller warp tiles due to memory constraints"
|
||||
},
|
||||
"test_params": {
|
||||
"problem_sizes": [
|
||||
{"m": 512, "n": 512, "k": 128, "split_k": 1},
|
||||
{"m": 512, "n": 256, "k": 192, "split_k": 1},
|
||||
{"m": 256, "n": 384, "k": 192, "split_k": 1}
|
||||
]
|
||||
},
|
||||
"tile_config": {
|
||||
"tile_m": {"values": [256]},
|
||||
"tile_n": {"values": [128]},
|
||||
"tile_k": {"values": [32]},
|
||||
"warp_m": {"values": [2]},
|
||||
"warp_n": {"values": [2]},
|
||||
"warp_k": {"values": [1]},
|
||||
"warp_tile_m": {"values": [16]},
|
||||
"warp_tile_n": {"values": [16]},
|
||||
"warp_tile_k": {"values": [16]}
|
||||
},
|
||||
"trait_config": {
|
||||
"pipeline": {"values": ["compv3"]},
|
||||
"epilogue": {"values": ["default"]},
|
||||
"scheduler": {"values": ["intrawave"]},
|
||||
"pad_m": {"values": [false]},
|
||||
"pad_n": {"values": [false]},
|
||||
"pad_k": {"values": [false]},
|
||||
"persistent": {"values": [false]}
|
||||
},
|
||||
"k_block_per_cu": 1,
|
||||
"permute_n": false
|
||||
}
|
||||
@@ -0,0 +1,34 @@
|
||||
{
|
||||
"problem": {
|
||||
"description": "Padding coverage testing - fixed config with fp16/rcr, varying only padding combinations"
|
||||
},
|
||||
"test_params": {
|
||||
"problem_sizes": [
|
||||
{"m": 104, "n": 104, "k": 56, "split_k": 1},
|
||||
{"m": 200, "n": 152, "k": 80, "split_k": 1},
|
||||
{"m": 152, "n": 200, "k": 64, "split_k": 1}
|
||||
]
|
||||
},
|
||||
"tile_config": {
|
||||
"tile_m": {"values": [64]},
|
||||
"tile_n": {"values": [64]},
|
||||
"tile_k": {"values": [32]},
|
||||
"warp_m": {"values": [2]},
|
||||
"warp_n": {"values": [2]},
|
||||
"warp_k": {"values": [1]},
|
||||
"warp_tile_m": {"values": [32]},
|
||||
"warp_tile_n": {"values": [32]},
|
||||
"warp_tile_k": {"values": [16]}
|
||||
},
|
||||
"trait_config": {
|
||||
"pipeline": {"values": ["compv3"]},
|
||||
"epilogue": {"values": ["default"]},
|
||||
"scheduler": {"values": ["intrawave"]},
|
||||
"pad_m": {"values": [true]},
|
||||
"pad_n": {"values": [true]},
|
||||
"pad_k": {"values": [true]},
|
||||
"persistent": {"values": [false]}
|
||||
},
|
||||
"k_block_per_cu": 1,
|
||||
"permute_n": false
|
||||
}
|
||||
@@ -0,0 +1,34 @@
|
||||
{
|
||||
"problem": {
|
||||
"description": "Quick coverage testing - tests multiple tile sizes with all trait combinations (pipelines, epilogues, schedulers). Approximately 144 kernels."
|
||||
},
|
||||
"test_params": {
|
||||
"problem_sizes": [
|
||||
{"m": 512, "n": 1024, "k": 512, "split_k": 1},
|
||||
{"m": 1024, "n": 1024, "k": 256, "split_k": 2},
|
||||
{"m": 1024, "n": 1024, "k": 256, "split_k": 4}
|
||||
]
|
||||
},
|
||||
"tile_config": {
|
||||
"tile_m": {"values": [32, 64, 256]},
|
||||
"tile_n": {"values": [32, 64, 256]},
|
||||
"tile_k": {"values": [16, 32]},
|
||||
"warp_m": {"values": [2]},
|
||||
"warp_n": {"values": [2]},
|
||||
"warp_k": {"values": [1]},
|
||||
"warp_tile_m": {"values": [16]},
|
||||
"warp_tile_n": {"values": [16]},
|
||||
"warp_tile_k": {"values": [16]}
|
||||
},
|
||||
"trait_config": {
|
||||
"pipeline": {"values": ["mem", "compv3", "compv4"]},
|
||||
"epilogue": {"values": ["default", "cshuffle"]},
|
||||
"scheduler": {"values": ["intrawave", "interwave"]},
|
||||
"pad_m": {"values": [false]},
|
||||
"pad_n": {"values": [false]},
|
||||
"pad_k": {"values": [false]},
|
||||
"persistent": {"values": [false]}
|
||||
},
|
||||
"k_block_per_cu": 1,
|
||||
"permute_n": false
|
||||
}
|
||||
@@ -1,88 +1,33 @@
|
||||
{
|
||||
"problem": {
|
||||
"description": "Basic functionality validation with moderate problem sizes"
|
||||
},
|
||||
"test_params": {
|
||||
"problem_sizes": [
|
||||
{"m": 256, "n": 256, "k": 128, "split_k": 1},
|
||||
{"m": 512, "n": 256, "k": 256, "split_k": 1},
|
||||
{"m": 256, "n": 512, "k": 256, "split_k": 1}
|
||||
]
|
||||
},
|
||||
"tile_config": {
|
||||
"tile_m": {
|
||||
"values": [
|
||||
128
|
||||
]
|
||||
},
|
||||
"tile_n": {
|
||||
"values": [
|
||||
128
|
||||
]
|
||||
},
|
||||
"tile_k": {
|
||||
"values": [
|
||||
64
|
||||
]
|
||||
},
|
||||
"warp_m": {
|
||||
"values": [
|
||||
2
|
||||
]
|
||||
},
|
||||
"warp_n": {
|
||||
"values": [
|
||||
2
|
||||
]
|
||||
},
|
||||
"warp_k": {
|
||||
"values": [
|
||||
1
|
||||
]
|
||||
},
|
||||
"warp_tile_m": {
|
||||
"values": [
|
||||
16
|
||||
]
|
||||
},
|
||||
"warp_tile_n": {
|
||||
"values": [
|
||||
16
|
||||
]
|
||||
},
|
||||
"warp_tile_k": {
|
||||
"values": [
|
||||
16
|
||||
]
|
||||
}
|
||||
"tile_m": {"values": [128]},
|
||||
"tile_n": {"values": [128]},
|
||||
"tile_k": {"values": [64]},
|
||||
"warp_m": {"values": [2]},
|
||||
"warp_n": {"values": [2]},
|
||||
"warp_k": {"values": [1]},
|
||||
"warp_tile_m": {"values": [16]},
|
||||
"warp_tile_n": {"values": [16]},
|
||||
"warp_tile_k": {"values": [16]}
|
||||
},
|
||||
"trait_config": {
|
||||
"pipeline": {
|
||||
"values": [
|
||||
"compv3",
|
||||
"compv4"
|
||||
]
|
||||
},
|
||||
"scheduler": {
|
||||
"values": [
|
||||
"intrawave"
|
||||
]
|
||||
},
|
||||
"epilogue": {
|
||||
"values": [
|
||||
"default"
|
||||
]
|
||||
},
|
||||
"pad_m": {
|
||||
"values": [
|
||||
false
|
||||
]
|
||||
},
|
||||
"pad_n": {
|
||||
"values": [
|
||||
false
|
||||
]
|
||||
},
|
||||
"pad_k": {
|
||||
"values": [
|
||||
false
|
||||
]
|
||||
},
|
||||
"persistent": {
|
||||
"values": [
|
||||
false
|
||||
]
|
||||
}
|
||||
"pipeline": {"values": ["compv3", "compv4"]},
|
||||
"epilogue": {"values": ["default"]},
|
||||
"scheduler": {"values": ["intrawave"]},
|
||||
"pad_m": {"values": [false]},
|
||||
"pad_n": {"values": [false]},
|
||||
"pad_k": {"values": [false]},
|
||||
"persistent": {"values": [false]}
|
||||
},
|
||||
"k_block_per_cu": 1,
|
||||
"permute_n": false
|
||||
|
||||
@@ -0,0 +1,35 @@
|
||||
{
|
||||
"problem": {
|
||||
"description": "Configuration optimized for small data types (fp8, fp16, bf16) with larger warp tiles"
|
||||
},
|
||||
"test_params": {
|
||||
"problem_sizes": [
|
||||
{"m": 512, "n": 512, "k": 256, "split_k": 1},
|
||||
{"m": 1024, "n": 512, "k": 512, "split_k": 1},
|
||||
{"m": 512, "n": 1024, "k": 512, "split_k": 1},
|
||||
{"m": 1024, "n": 1024, "k": 256, "split_k": 1}
|
||||
]
|
||||
},
|
||||
"tile_config": {
|
||||
"tile_m": {"values": [128]},
|
||||
"tile_n": {"values": [128]},
|
||||
"tile_k": {"values": [32]},
|
||||
"warp_m": {"values": [2]},
|
||||
"warp_n": {"values": [2]},
|
||||
"warp_k": {"values": [1]},
|
||||
"warp_tile_m": {"values": [32]},
|
||||
"warp_tile_n": {"values": [32]},
|
||||
"warp_tile_k": {"values": [16]}
|
||||
},
|
||||
"trait_config": {
|
||||
"pipeline": {"values": ["compv3"]},
|
||||
"epilogue": {"values": ["default"]},
|
||||
"scheduler": {"values": ["intrawave"]},
|
||||
"pad_m": {"values": [false]},
|
||||
"pad_n": {"values": [false]},
|
||||
"pad_k": {"values": [false]},
|
||||
"persistent": {"values": [false]}
|
||||
},
|
||||
"k_block_per_cu": 1,
|
||||
"permute_n": false
|
||||
}
|
||||
71
test/ck_tile/gemm_tile_engine/extract_test_params.py
Normal file
71
test/ck_tile/gemm_tile_engine/extract_test_params.py
Normal file
@@ -0,0 +1,71 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import json
|
||||
import argparse
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def extract_test_params(config_file, output_file):
|
||||
"""Extract test parameters from config JSON and write to output file"""
|
||||
|
||||
# Read config file
|
||||
with open(config_file, "r") as f:
|
||||
config = json.load(f)
|
||||
|
||||
# Extract test parameters
|
||||
test_params = []
|
||||
if "test_params" in config and "problem_sizes" in config["test_params"]:
|
||||
test_params = config["test_params"]["problem_sizes"]
|
||||
else:
|
||||
# Default test parameters if none specified
|
||||
test_params = [
|
||||
{"m": 256, "n": 256, "k": 128, "split_k": 1},
|
||||
{"m": 256, "n": 256, "k": 1024, "split_k": 1},
|
||||
{"m": 256, "n": 512, "k": 512, "split_k": 1},
|
||||
{"m": 512, "n": 256, "k": 512, "split_k": 1},
|
||||
]
|
||||
|
||||
# Write to output file in C++ format
|
||||
output_dir = Path(output_file).parent
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
with open(output_file, "w") as f:
|
||||
f.write("// Generated test parameters for this configuration\n")
|
||||
f.write("// This file is auto-generated during CMake configuration\n\n")
|
||||
f.write("static const std::vector<GemmTestParams> CONFIG_TEST_PARAMS = {\n")
|
||||
|
||||
for i, params in enumerate(test_params):
|
||||
comma = "," if i < len(test_params) - 1 else ""
|
||||
f.write(
|
||||
f" {{{params['m']}, {params['n']}, {params['k']}, {params['split_k']}}}{comma}\n"
|
||||
)
|
||||
|
||||
f.write("};\n")
|
||||
|
||||
print(
|
||||
f"Extracted {len(test_params)} test parameters from {config_file} -> {output_file}"
|
||||
)
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Extract test parameters from config JSON"
|
||||
)
|
||||
parser.add_argument("--config_file", required=True, help="Input config JSON file")
|
||||
parser.add_argument(
|
||||
"--output_file", required=True, help="Output test parameters file"
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
if not os.path.exists(args.config_file):
|
||||
print(f"Error: Config file not found: {args.config_file}")
|
||||
return 1
|
||||
|
||||
extract_test_params(args.config_file, args.output_file)
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
exit(main())
|
||||
@@ -1,8 +1,14 @@
|
||||
// SPDX-License-Identifier: MIT
|
||||
// Copyright (c) 2025, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
// Unit tests for tile_engine generated GEMM kernels
|
||||
// Tests kernel correctness using tile_engine's verification methodology
|
||||
/**
|
||||
* @file test_gemm_simple.cpp
|
||||
* @brief Unit tests for GEMM kernels generated by gemm_instance_builder
|
||||
*
|
||||
* This test includes kernels generated during CMake configuration by
|
||||
* gemm_instance_builder.py and tests them with problem sizes extracted
|
||||
* from the corresponding JSON configuration files.
|
||||
*/
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
#include <iostream>
|
||||
@@ -68,6 +74,11 @@ struct GemmTestParams
|
||||
int m, n, k, split_k;
|
||||
};
|
||||
|
||||
// Include config-specific test parameters (after GemmTestParams struct is defined)
|
||||
#ifdef GEMM_TEST_PARAMS_HPP
|
||||
#include GEMM_TEST_PARAMS_HPP
|
||||
#endif
|
||||
|
||||
class GemmTileEngineTest : public ::testing::TestWithParam<GemmTestParams>
|
||||
{
|
||||
protected:
|
||||
@@ -185,7 +196,16 @@ TEST_P(GemmTileEngineTest, BasicFunctionality)
|
||||
}
|
||||
catch(const std::exception& e)
|
||||
{
|
||||
FAIL() << "Kernel launch failed: " << e.what();
|
||||
std::string error_msg(e.what());
|
||||
// If arguments not supported, skip the test (configuration validation failure, not a bug)
|
||||
if(error_msg.find("Arguments not supported") != std::string::npos)
|
||||
{
|
||||
GTEST_SKIP() << "Configuration not supported: " << e.what();
|
||||
}
|
||||
else
|
||||
{
|
||||
FAIL() << "Kernel launch failed: " << e.what();
|
||||
}
|
||||
}
|
||||
|
||||
// Copy result back from device
|
||||
@@ -208,13 +228,11 @@ TEST_P(GemmTileEngineTest, KernelInfo)
|
||||
<< std::endl;
|
||||
}
|
||||
|
||||
// Define test parameters for GEMM verification
|
||||
// Use config-specific test parameters (included via compile flags)
|
||||
// CONFIG_TEST_PARAMS is defined in the auto-generated test_params.hpp file
|
||||
INSTANTIATE_TEST_SUITE_P(GemmVerification,
|
||||
GemmTileEngineTest,
|
||||
::testing::Values(GemmTestParams{256, 256, 128, 1},
|
||||
GemmTestParams{256, 256, 1024, 1},
|
||||
GemmTestParams{256, 512, 512, 1},
|
||||
GemmTestParams{512, 256, 512, 1}),
|
||||
::testing::ValuesIn(CONFIG_TEST_PARAMS),
|
||||
[](const ::testing::TestParamInfo<GemmTestParams>& param_info) {
|
||||
return std::to_string(param_info.param.m) + "x" +
|
||||
std::to_string(param_info.param.n) + "x" +
|
||||
|
||||
Reference in New Issue
Block a user