Files
composable_kernel/experimental/grouped_convolution_tile_instances/CMakeLists.txt
Ville Pietilä ec2dbfbfde [rocm-libraries] ROCm/rocm-libraries#5516 (commit ff3afda)
[CK_TILE, CK_BUILDER] Add bwd data to CK Tile profiler
 (#5516)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

## Motivation

We want close the performance gap between old CK and CK Tile for bwd
data convolutions. To achieve this, we need tow things

- Configurations for the old CK kernel instances such that we can map
them into CK Tile instances.
- Support in CK profiler to run the CK Tile instance with the same API
as for old CK instances.

## Technical Details

Extracted kernel configurations from old CK. The codegen python script
for CK Tile convs is extended to support also bwd data. The generated
instances are added to the CMake build (target
`device_grouped_conv_bwd_data_tile_instances`).
A new profiler op (`grouped_conv_bwd_data_tile`) has been added to the
CK Profiler. The API is same as for old CK's profiler op
`grouped_conv_bwd_data`.
2026-03-25 14:36:11 +00:00

42 lines
2.2 KiB
CMake

# Copyright (c) Advanced Micro Devices, Inc., or its affiliates.
# SPDX-License-Identifier: MIT
if(GPU_TARGETS MATCHES "gfx9")
# Generate instances using python script if instance directories don't exist
set(INSTANCES_DIR ${CMAKE_CURRENT_BINARY_DIR})
if(NOT EXISTS ${INSTANCES_DIR}/forward OR
NOT EXISTS ${INSTANCES_DIR}/backward_weight OR
NOT EXISTS ${INSTANCES_DIR}/backward_data)
find_package(Python3 COMPONENTS Interpreter Development)
execute_process(
COMMAND ${Python3_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/generate_instances.py --mode=tests --instances_dir=${CMAKE_CURRENT_BINARY_DIR}
RESULT_VARIABLE ret
OUTPUT_VARIABLE output
ERROR_VARIABLE error
)
if(NOT ret EQUAL 0)
message(FATAL_ERROR "Failed to generate instances. Return code: ${ret}\nOutput: ${output}\nError: ${error}")
endif()
endif()
# Find cpp files and create lib for instances
file(GLOB_RECURSE GROUPED_CONV_FWD_TILE "${CMAKE_CURRENT_BINARY_DIR}/forward/*.cpp")
add_instance_library(device_grouped_conv_fwd_tile_instances ${GROUPED_CONV_FWD_TILE})
target_include_directories(device_grouped_conv_fwd_tile_instances PRIVATE
"${PROJECT_SOURCE_DIR}/experimental/builder/test/utils")
target_compile_options(device_grouped_conv_fwd_tile_instances PRIVATE -DCK_TILE_FLOAT_TO_BFLOAT16_DEFAULT=0)
file(GLOB_RECURSE GROUPED_CONV_BWD_WEIGHT_TILE "${CMAKE_CURRENT_BINARY_DIR}/backward_weight/*.cpp")
add_instance_library(device_grouped_conv_bwd_weight_tile_instances ${GROUPED_CONV_BWD_WEIGHT_TILE})
target_include_directories(device_grouped_conv_bwd_weight_tile_instances PRIVATE
"${PROJECT_SOURCE_DIR}/experimental/builder/test/utils")
target_compile_options(device_grouped_conv_bwd_weight_tile_instances PRIVATE -DCK_TILE_FLOAT_TO_BFLOAT16_DEFAULT=0)
file(GLOB_RECURSE GROUPED_CONV_BWD_DATA_TILE "${CMAKE_CURRENT_BINARY_DIR}/backward_data/*.cpp")
add_instance_library(device_grouped_conv_bwd_data_tile_instances ${GROUPED_CONV_BWD_DATA_TILE})
target_include_directories(device_grouped_conv_bwd_data_tile_instances PRIVATE
"${PROJECT_SOURCE_DIR}/experimental/builder/test/utils")
target_compile_options(device_grouped_conv_bwd_data_tile_instances PRIVATE -DCK_TILE_FLOAT_TO_BFLOAT16_DEFAULT=0)
endif()