mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-02 04:31:25 +00:00
[CK_TILE, CK_BUILDER] Add bwd data to CK Tile profiler (#5516) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Motivation We want close the performance gap between old CK and CK Tile for bwd data convolutions. To achieve this, we need tow things - Configurations for the old CK kernel instances such that we can map them into CK Tile instances. - Support in CK profiler to run the CK Tile instance with the same API as for old CK instances. ## Technical Details Extracted kernel configurations from old CK. The codegen python script for CK Tile convs is extended to support also bwd data. The generated instances are added to the CMake build (target `device_grouped_conv_bwd_data_tile_instances`). A new profiler op (`grouped_conv_bwd_data_tile`) has been added to the CK Profiler. The API is same as for old CK's profiler op `grouped_conv_bwd_data`.
42 lines
2.2 KiB
CMake
42 lines
2.2 KiB
CMake
# Copyright (c) Advanced Micro Devices, Inc., or its affiliates.
|
|
# SPDX-License-Identifier: MIT
|
|
|
|
if(GPU_TARGETS MATCHES "gfx9")
|
|
# Generate instances using python script if instance directories don't exist
|
|
set(INSTANCES_DIR ${CMAKE_CURRENT_BINARY_DIR})
|
|
if(NOT EXISTS ${INSTANCES_DIR}/forward OR
|
|
NOT EXISTS ${INSTANCES_DIR}/backward_weight OR
|
|
NOT EXISTS ${INSTANCES_DIR}/backward_data)
|
|
find_package(Python3 COMPONENTS Interpreter Development)
|
|
execute_process(
|
|
COMMAND ${Python3_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/generate_instances.py --mode=tests --instances_dir=${CMAKE_CURRENT_BINARY_DIR}
|
|
RESULT_VARIABLE ret
|
|
OUTPUT_VARIABLE output
|
|
ERROR_VARIABLE error
|
|
)
|
|
|
|
if(NOT ret EQUAL 0)
|
|
message(FATAL_ERROR "Failed to generate instances. Return code: ${ret}\nOutput: ${output}\nError: ${error}")
|
|
endif()
|
|
endif()
|
|
|
|
# Find cpp files and create lib for instances
|
|
file(GLOB_RECURSE GROUPED_CONV_FWD_TILE "${CMAKE_CURRENT_BINARY_DIR}/forward/*.cpp")
|
|
add_instance_library(device_grouped_conv_fwd_tile_instances ${GROUPED_CONV_FWD_TILE})
|
|
target_include_directories(device_grouped_conv_fwd_tile_instances PRIVATE
|
|
"${PROJECT_SOURCE_DIR}/experimental/builder/test/utils")
|
|
target_compile_options(device_grouped_conv_fwd_tile_instances PRIVATE -DCK_TILE_FLOAT_TO_BFLOAT16_DEFAULT=0)
|
|
|
|
file(GLOB_RECURSE GROUPED_CONV_BWD_WEIGHT_TILE "${CMAKE_CURRENT_BINARY_DIR}/backward_weight/*.cpp")
|
|
add_instance_library(device_grouped_conv_bwd_weight_tile_instances ${GROUPED_CONV_BWD_WEIGHT_TILE})
|
|
target_include_directories(device_grouped_conv_bwd_weight_tile_instances PRIVATE
|
|
"${PROJECT_SOURCE_DIR}/experimental/builder/test/utils")
|
|
target_compile_options(device_grouped_conv_bwd_weight_tile_instances PRIVATE -DCK_TILE_FLOAT_TO_BFLOAT16_DEFAULT=0)
|
|
|
|
file(GLOB_RECURSE GROUPED_CONV_BWD_DATA_TILE "${CMAKE_CURRENT_BINARY_DIR}/backward_data/*.cpp")
|
|
add_instance_library(device_grouped_conv_bwd_data_tile_instances ${GROUPED_CONV_BWD_DATA_TILE})
|
|
target_include_directories(device_grouped_conv_bwd_data_tile_instances PRIVATE
|
|
"${PROJECT_SOURCE_DIR}/experimental/builder/test/utils")
|
|
target_compile_options(device_grouped_conv_bwd_data_tile_instances PRIVATE -DCK_TILE_FLOAT_TO_BFLOAT16_DEFAULT=0)
|
|
endif()
|