mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-18 20:09:25 +00:00
* WIP POC of dispatcher
* Dispatcher python workflow setup.
* Dispatcher cleanup and updates.
Further dispatcher cleanup and updates.
Build fixes
Improvements and python to CK example
Improvements to readme
* Fixes to python paths
* Cleaning up code
* Improving dispatcher support for different arch
Fixing typos
* Fix formatting errors
* Cleaning up examples
* Improving codegeneration
* Improving and fixing C++ examples
* Adding conv functionality (fwd,bwd,bwdw) and examples.
* Fixes based on feedback.
* Further fixes based on feedback.
* Adding stress test for autogeneration and autocorrection, and fixing preshuffle bug.
* Another round of improvements based on feedback.
* Trimming out unnecessary code.
* Fixing the multi-D implementation.
* Using gpu verification for gemms and fixing convolutions tflops calculation.
* Fix counter usage issue and arch filtering per ops.
* Adding changelog and other fixes.
* Improve examples and resolve critical bugs.
* Reduce build time for python examples.
* Fixing minor bug.
* Fix compilation error.
* Improve installation instructions for dispatcher.
* Add docker based installation instructions for dispatcher.
* Fixing arch-based filtering to match tile engine.
* Remove dead code and fix arch filtering.
* Minor bugfix.
* Updates after rebase.
* Trimming code.
* Fix copyright headers.
* Consolidate examples, cut down code.
* Minor fixes.
* Improving python examples.
* Update readmes.
* Remove conv functionality.
* Cleanup following conv removable.
[ROCm/composable_kernel commit: 9e049a32a1]
126 lines
4.0 KiB
CMake
126 lines
4.0 KiB
CMake
# Copyright (c) Advanced Micro Devices, Inc., or its affiliates.
|
|
# SPDX-License-Identifier: MIT
|
|
|
|
# CK Tile GEMM Unified Code Generator
|
|
|
|
cmake_minimum_required(VERSION 3.16)
|
|
|
|
# Find Python
|
|
find_package(Python3 COMPONENTS Interpreter REQUIRED)
|
|
|
|
# Configuration
|
|
set(CODEGEN_SCRIPT "${CMAKE_CURRENT_SOURCE_DIR}/unified_gemm_codegen.py")
|
|
set(CODEGEN_CONFIG "${CMAKE_CURRENT_SOURCE_DIR}/default_config.json")
|
|
set(CODEGEN_OUTPUT_DIR "${CMAKE_BINARY_DIR}/generated/tile_gemm")
|
|
|
|
# Configurable options
|
|
set(CK_TILE_GEMM_DATATYPE "fp16" CACHE STRING "GEMM data type (fp16, bf16, fp32, fp8, bf8, int8)")
|
|
set(CK_TILE_GEMM_LAYOUT "rcr" CACHE STRING "GEMM layout (rcr, rrr, crr, ccr)")
|
|
set(CK_TILE_GEMM_VARIANTS "standard" CACHE STRING "GEMM variants (standard, preshuffle, multi_d)")
|
|
set(CK_TILE_GEMM_GPU_TARGET "gfx942" CACHE STRING "Target GPU architecture")
|
|
set(CK_TILE_GEMM_PARALLEL ON CACHE BOOL "Enable parallel generation")
|
|
|
|
# Custom target to run code generation
|
|
add_custom_target(generate_tile_gemm_kernels
|
|
COMMAND ${Python3_EXECUTABLE} ${CODEGEN_SCRIPT}
|
|
--output-dir ${CODEGEN_OUTPUT_DIR}
|
|
--datatype ${CK_TILE_GEMM_DATATYPE}
|
|
--layout ${CK_TILE_GEMM_LAYOUT}
|
|
--gpu-target ${CK_TILE_GEMM_GPU_TARGET}
|
|
--config ${CODEGEN_CONFIG}
|
|
--variants ${CK_TILE_GEMM_VARIANTS}
|
|
$<$<NOT:$<BOOL:${CK_TILE_GEMM_PARALLEL}>>:--no-parallel>
|
|
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
|
|
COMMENT "Generating CK Tile GEMM kernels and dispatcher wrappers..."
|
|
VERBATIM
|
|
)
|
|
|
|
# Create output directory
|
|
file(MAKE_DIRECTORY ${CODEGEN_OUTPUT_DIR})
|
|
|
|
# Add generated headers to include path
|
|
include_directories(${CODEGEN_OUTPUT_DIR})
|
|
|
|
# Installation
|
|
install(FILES
|
|
${CODEGEN_SCRIPT}
|
|
${CODEGEN_CONFIG}
|
|
README.md
|
|
DESTINATION share/ck_tile/codegen
|
|
)
|
|
|
|
# Helper function for projects to generate kernels
|
|
function(ck_tile_generate_gemm_kernels)
|
|
set(options PARALLEL)
|
|
set(oneValueArgs OUTPUT_DIR DATATYPE LAYOUT GPU_TARGET CONFIG)
|
|
set(multiValueArgs VARIANTS)
|
|
cmake_parse_arguments(ARG "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
|
|
|
|
# Set defaults
|
|
if(NOT ARG_OUTPUT_DIR)
|
|
set(ARG_OUTPUT_DIR "${CMAKE_BINARY_DIR}/generated/tile_gemm")
|
|
endif()
|
|
if(NOT ARG_DATATYPE)
|
|
set(ARG_DATATYPE "fp16")
|
|
endif()
|
|
if(NOT ARG_LAYOUT)
|
|
set(ARG_LAYOUT "rcr")
|
|
endif()
|
|
if(NOT ARG_GPU_TARGET)
|
|
set(ARG_GPU_TARGET "gfx942")
|
|
endif()
|
|
if(NOT ARG_CONFIG)
|
|
set(ARG_CONFIG "${CMAKE_CURRENT_SOURCE_DIR}/default_config.json")
|
|
endif()
|
|
if(NOT ARG_VARIANTS)
|
|
set(ARG_VARIANTS "standard")
|
|
endif()
|
|
|
|
# Build command
|
|
set(CMD ${Python3_EXECUTABLE} ${CODEGEN_SCRIPT}
|
|
--output-dir ${ARG_OUTPUT_DIR}
|
|
--datatype ${ARG_DATATYPE}
|
|
--layout ${ARG_LAYOUT}
|
|
--gpu-target ${ARG_GPU_TARGET}
|
|
--config ${ARG_CONFIG}
|
|
--variants ${ARG_VARIANTS}
|
|
)
|
|
|
|
if(NOT ARG_PARALLEL)
|
|
list(APPEND CMD --no-parallel)
|
|
endif()
|
|
|
|
# Execute
|
|
execute_process(
|
|
COMMAND ${CMD}
|
|
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
|
|
RESULT_VARIABLE RESULT
|
|
OUTPUT_VARIABLE OUTPUT
|
|
ERROR_VARIABLE ERROR
|
|
)
|
|
|
|
if(NOT RESULT EQUAL 0)
|
|
message(FATAL_ERROR "Failed to generate GEMM kernels:\n${ERROR}")
|
|
else()
|
|
message(STATUS "Generated GEMM kernels: ${OUTPUT}")
|
|
endif()
|
|
endfunction()
|
|
|
|
# Example usage documentation
|
|
message(STATUS "CK Tile GEMM Code Generator configured")
|
|
message(STATUS " Script: ${CODEGEN_SCRIPT}")
|
|
message(STATUS " Config: ${CODEGEN_CONFIG}")
|
|
message(STATUS " Output: ${CODEGEN_OUTPUT_DIR}")
|
|
message(STATUS "")
|
|
message(STATUS "To generate kernels:")
|
|
message(STATUS " cmake --build . --target generate_tile_gemm_kernels")
|
|
message(STATUS "")
|
|
message(STATUS "Or use CMake function:")
|
|
message(STATUS " ck_tile_generate_gemm_kernels(")
|
|
message(STATUS " OUTPUT_DIR ./generated")
|
|
message(STATUS " DATATYPE fp16")
|
|
message(STATUS " LAYOUT rcr")
|
|
message(STATUS " VARIANTS standard preshuffle multi_d")
|
|
message(STATUS " PARALLEL")
|
|
message(STATUS " )")
|