mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-04-19 22:39:03 +00:00
This reverts commit 3a0cb27966.
This commit is contained in:
@@ -1,116 +0,0 @@
|
||||
# Function to generate templated instantiation functions and caller function.
|
||||
|
||||
# In order to reduce build times, we split the instantiation of template functions into multiple files.
|
||||
# Developers can use ck::util::generate_sharded_instantiations to generate the instantiation functions,
|
||||
# which can be placed the TEMPLATE_FILE (typically a .in file).
|
||||
|
||||
# This CMake function generates the instantiation functions and a caller function that calls all the instantiation
|
||||
# functions. The ck::util::generate_sharded_instantiations function allows us to generate an arbitrary number of
|
||||
# shards (NUM_SHARDS). This function loops over the shards, generates an instantiation function for each shard,
|
||||
# and generates a caller function that calls all the instantiation functions.
|
||||
|
||||
# The explicit instatiation pattern requires the use of `extern template` to avoid implicit instantiation
|
||||
# of the template functions in the caller function, and that code is automatically generated by this function.
|
||||
|
||||
# In addition to the user-supplied template, this CMake function uses two generic templates:
|
||||
#
|
||||
# 1. `instantiate_shard.in`: This is the template for the instantiation functions.
|
||||
# 2. `call_shard.in`: This is the template for the caller function that calls all the instantiation functions.
|
||||
|
||||
# This function takes the following arguments:
|
||||
#
|
||||
# - INSTANCES_NAME: The name of the instances (the calling function will be named `add_${INSTANCE_NAMES}`).
|
||||
# - TEMPLATE_FILE: The path to the template file that contains the templated instantiation function definitions.
|
||||
# - NUM_SHARDS: The number of shards to generate.
|
||||
# - OUTPUT_DIR: The build directory where the generated source files will be placed.
|
||||
# - SRC_LIST: The list of source files to which the generated source files will be added.
|
||||
|
||||
|
||||
function(generate_sharded_instantiations)
|
||||
cmake_parse_arguments(
|
||||
GEN_SHARDED
|
||||
# No boolean arguments
|
||||
""
|
||||
# Single-value arguments
|
||||
"INSTANCES_NAME;TEMPLATE_FILE;NUM_SHARDS;OUTPUT_DIR;SRC_LIST"
|
||||
# No multi-value arguments.
|
||||
""
|
||||
${ARGN}
|
||||
)
|
||||
if (NOT GEN_SHARDED_INSTANCES_NAME)
|
||||
message(FATAL_ERROR "INSTANCES_NAME is required for generate_sharded_instantiations")
|
||||
endif()
|
||||
if (NOT GEN_SHARDED_TEMPLATE_FILE)
|
||||
message(FATAL_ERROR "TEMPLATE_FILE is required for generate_sharded_instantiations")
|
||||
endif()
|
||||
if (NOT GEN_SHARDED_NUM_SHARDS)
|
||||
message(FATAL_ERROR "NUM_SHARDS is required for generate_sharded_instantiations")
|
||||
endif()
|
||||
if(NOT GEN_SHARDED_OUTPUT_DIR)
|
||||
message(FATAL_ERROR "OUTPUT_DIR is required for generate_sharded_instantiations")
|
||||
endif()
|
||||
if (NOT GEN_SHARDED_SRC_LIST)
|
||||
message(FATAL_ERROR "SRC_LIST is required for generate_sharded_instantiations")
|
||||
endif()
|
||||
|
||||
file(MAKE_DIRECTORY ${GEN_SHARDED_OUTPUT_DIR})
|
||||
|
||||
|
||||
set(GENERATED_SOURCE_FILES "")
|
||||
set(EXTERN_TEMPLATE_STATEMENTS "")
|
||||
set(CALL_STATEMENTS "")
|
||||
message(STATUS "Generating sharded instantiations for target: ${GEN_SHARDED_INSTANCES_NAME}")
|
||||
|
||||
set(INSTANCES "${GEN_SHARDED_INSTANCES_NAME}")
|
||||
|
||||
# Generate the inc file with the template function defintions.
|
||||
# This include file will hold the template function definitions and a using alias for all the shard
|
||||
# instantiation functions.
|
||||
configure_file(
|
||||
"${GEN_SHARDED_TEMPLATE_FILE}"
|
||||
"${GEN_SHARDED_OUTPUT_DIR}/${INSTANCES}.inc"
|
||||
@ONLY
|
||||
)
|
||||
|
||||
# Generate the sharded instantiation functions.
|
||||
# This is where the build parallelization happens.
|
||||
# Each of these source files will contain a single instantiation function for a shard,
|
||||
# which will be called sequentially by the caller function.
|
||||
set(INC_DIR "${GEN_SHARDED_INC_DIR}")
|
||||
math(EXPR LAST_SHARD_ID "${GEN_SHARDED_NUM_SHARDS} - 1")
|
||||
foreach(SHARD_ID RANGE 0 ${LAST_SHARD_ID})
|
||||
set(NUM_SHARDS "${GEN_SHARDED_NUM_SHARDS}")
|
||||
set(SHARD_FUNCTION_PATH "${GEN_SHARDED_OUTPUT_DIR}/${INSTANCES}_shard_${SHARD_ID}.cpp")
|
||||
set(SHARD_FUNCTION_TEMPLATE "${PROJECT_SOURCE_DIR}/cmake/instantiate_shard.in")
|
||||
configure_file(
|
||||
"${SHARD_FUNCTION_TEMPLATE}"
|
||||
"${SHARD_FUNCTION_PATH}"
|
||||
@ONLY
|
||||
)
|
||||
list(APPEND GENERATED_SOURCE_FILES "${SHARD_FUNCTION_PATH}")
|
||||
set(SHARDED_FUNCTION_NAME "add_${INSTANCES}_shard<${NUM_SHARDS}, ${SHARD_ID}>")
|
||||
list(APPEND EXTERN_TEMPLATE_STATEMENTS "extern template void\n${SHARDED_FUNCTION_NAME}(\n ${INSTANCES}& instances)")
|
||||
list(APPEND CALL_STATEMENTS " ${SHARDED_FUNCTION_NAME}(instances)")
|
||||
endforeach()
|
||||
|
||||
# Join the include statements, the extern template declarations, and the call statements each
|
||||
# into a single string for variable substitution in the caller function.
|
||||
string(REPLACE ";" ";\n" INCLUDE_STATEMENTS "${INCLUDE_STATEMENTS}")
|
||||
string(REPLACE ";" ";\n" CALL_STATEMENTS "${CALL_STATEMENTS}")
|
||||
string(REPLACE ";" ";\n" EXTERN_TEMPLATE_STATEMENTS "${EXTERN_TEMPLATE_STATEMENTS}")
|
||||
|
||||
# Generate the caller function.
|
||||
set(CALLER_FUNCTION_PATH "${GEN_SHARDED_OUTPUT_DIR}/${INSTANCES}.cpp")
|
||||
set(FUNCTION_TEMPLATE "${PROJECT_SOURCE_DIR}/cmake/call_shard.in")
|
||||
configure_file(
|
||||
"${FUNCTION_TEMPLATE}"
|
||||
"${CALLER_FUNCTION_PATH}"
|
||||
@ONLY
|
||||
)
|
||||
list(APPEND GENERATED_SOURCE_FILES "${CALLER_FUNCTION_PATH}")
|
||||
|
||||
# Add the generated source files to the list of source files.
|
||||
# This allows the generated source files to be included in the build.
|
||||
list(APPEND ${GEN_SHARDED_SRC_LIST} ${GENERATED_SOURCE_FILES})
|
||||
set(${GEN_SHARDED_SRC_LIST} "${${GEN_SHARDED_SRC_LIST}}" PARENT_SCOPE)
|
||||
endfunction()
|
||||
@@ -1,15 +0,0 @@
|
||||
// SPDX-License-Identifier: MIT
|
||||
// Copyright (c) 2025, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
#include "@INSTANCES@.inc"
|
||||
|
||||
namespace ck::tensor_operation::device::instance {
|
||||
|
||||
@EXTERN_TEMPLATE_STATEMENTS@;
|
||||
|
||||
void add_@INSTANCES@(
|
||||
@INSTANCES@& instances) {
|
||||
@CALL_STATEMENTS@;
|
||||
}
|
||||
|
||||
} // namespace ck::tensor_operation::device::instance
|
||||
@@ -1,9 +0,0 @@
|
||||
// SPDX-License-Identifier: MIT
|
||||
// Copyright (c) 2025, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
#include "@INSTANCES@.inc"
|
||||
|
||||
namespace ck::tensor_operation::device::instance {
|
||||
template void add_@INSTANCES@_shard<@NUM_SHARDS@, @SHARD_ID@>(
|
||||
@INSTANCES@& instances);
|
||||
} // namespace ck::tensor_operation::device::instance
|
||||
Reference in New Issue
Block a user