mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-20 12:59:49 +00:00
[rocm-libraries] ROCm/rocm-libraries#7090 (commit 316fded)
[CK] Add rocm_ck directory structure with feature flag (#7090) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Summary Adds initial rocm_ck directory structure, #7119. - Establishes production `rocm_ck/` directory at `composablekernel/rocm_ck/`, peer to `tile_engine/` and `dispatcher/` - Adds `CK_ENABLE_ROCM_CK` option (default OFF) as a CK-internal feature flag — no superbuild or TheRock changes needed - Creates `rocm_ck` INTERFACE library, `ck_tile_headers` target, GTest integration with builder-style convenience targets (`smoke-rocm-ck`, `check-rocm-ck`) - Adds Jenkins `RUN_ROCM_CK_TESTS` parameter for CI, following the `RUN_BUILDER_TESTS` pattern - README explains the constexpr schema model: host-device separation via constexpr data rather than template parameters, enabling multi-arch distribution through kpack archives ## Test plan - [x] `cmake -DCK_ENABLE_ROCM_CK=ON` configures without errors - [x] `ninja check-rocm-ck` passes (4 host-only index type tests) - [x] Default build (`CK_ENABLE_ROCM_CK=OFF`) is unaffected — no rocm_ck targets present - [x] Jenkins `RUN_ROCM_CK_TESTS=true` enables the flag and runs `check-rocm-ck` 🤖 Generated with [Claude Code](https://claude.com/claude-code)
This commit is contained in:
committed by
assistant-librarian[bot]
parent
d931e8703d
commit
6cd06382b3
@@ -55,6 +55,7 @@ option(FORCE_DISABLE_WMMA "Skip compiling WMMA specific instances (even if suppo
|
||||
option(BUILD_CK_TILE_ENGINE "Build the tile_engine subdirectory" OFF)
|
||||
option(BUILD_CK_EXAMPLES "Build the example subdirectory" ON)
|
||||
option(BUILD_CK_TUTORIALS "Build the tutorial subdirectory" ON)
|
||||
option(CK_ENABLE_ROCM_CK "Build rocm_ck API" OFF)
|
||||
|
||||
if(CK_EXPERIMENTAL_BUILDER)
|
||||
add_definitions(-DCK_EXPERIMENTAL_BUILDER)
|
||||
@@ -771,6 +772,12 @@ if(NOT GPU_ARCHS AND USER_GPU_TARGETS AND NOT MIOPEN_REQ_LIBS_ONLY AND NOT HIPTE
|
||||
if(BUILD_CK_TILE_ENGINE)
|
||||
add_subdirectory(tile_engine)
|
||||
endif()
|
||||
if(CK_ENABLE_ROCM_CK)
|
||||
add_subdirectory(rocm_ck)
|
||||
if(TARGET check)
|
||||
add_dependencies(check build-smoke-rocm-ck)
|
||||
endif()
|
||||
endif()
|
||||
if(BUILD_TESTING)
|
||||
rocm_package_setup_component(tests
|
||||
LIBRARY_NAME composablekernel
|
||||
|
||||
13
Jenkinsfile
vendored
13
Jenkinsfile
vendored
@@ -721,6 +721,9 @@ def cmake_build(Map conf=[:]){
|
||||
if (params.RUN_BUILDER_TESTS && !setup_args.contains("-DCK_CXX_STANDARD=") && !setup_args.contains("gfx10") && !setup_args.contains("gfx11")) {
|
||||
setup_args = " -D CK_EXPERIMENTAL_BUILDER=ON " + setup_args
|
||||
}
|
||||
if (params.RUN_ROCM_CK_TESTS) {
|
||||
setup_args = " -D CK_ENABLE_ROCM_CK=ON " + setup_args
|
||||
}
|
||||
setup_cmd = conf.get(
|
||||
"setup_cmd",
|
||||
"""${cmake_envs} cmake -G Ninja ${setup_args} -DCMAKE_EXPORT_COMPILE_COMMANDS=ON -DCMAKE_CXX_FLAGS=" -O3 " .. """
|
||||
@@ -837,6 +840,9 @@ def cmake_build(Map conf=[:]){
|
||||
if (params.RUN_BUILDER_TESTS && !setup_args.contains("-DCK_CXX_STANDARD=") && !setup_args.contains("gfx10") && !setup_args.contains("gfx11")) {
|
||||
sh 'ninja check-builder'
|
||||
}
|
||||
if (params.RUN_ROCM_CK_TESTS) {
|
||||
sh 'ninja check-rocm-ck'
|
||||
}
|
||||
if(params.BUILD_PACKAGES){
|
||||
echo "Build ckProfiler packages"
|
||||
sh 'ninja -j64 package'
|
||||
@@ -876,6 +882,9 @@ def cmake_build(Map conf=[:]){
|
||||
if (params.RUN_BUILDER_TESTS && !setup_args.contains("-DCK_CXX_STANDARD=") && !setup_args.contains("gfx10") && !setup_args.contains("gfx11")) {
|
||||
sh 'ninja check-builder'
|
||||
}
|
||||
if (params.RUN_ROCM_CK_TESTS) {
|
||||
sh 'ninja check-rocm-ck'
|
||||
}
|
||||
if(params.BUILD_PACKAGES){
|
||||
echo "Build ckProfiler packages"
|
||||
sh 'ninja -j64 package'
|
||||
@@ -1425,6 +1434,10 @@ pipeline {
|
||||
name: "RUN_BUILDER_TESTS",
|
||||
defaultValue: false,
|
||||
description: "Run CK_BUILDER tests (default: OFF)")
|
||||
booleanParam(
|
||||
name: "RUN_ROCM_CK_TESTS",
|
||||
defaultValue: true,
|
||||
description: "Run rocm_ck tests (default: ON)")
|
||||
booleanParam(
|
||||
name: "RUN_ALL_UNIT_TESTS",
|
||||
defaultValue: false,
|
||||
|
||||
29
rocm_ck/CMakeLists.txt
Normal file
29
rocm_ck/CMakeLists.txt
Normal file
@@ -0,0 +1,29 @@
|
||||
# Copyright (c) Advanced Micro Devices, Inc., or its affiliates.
|
||||
# SPDX-License-Identifier: MIT
|
||||
#
|
||||
# rocm_ck — constexpr schema API over CK Tile device kernels.
|
||||
#
|
||||
# Build from CK root:
|
||||
# cmake -B build -S . -G Ninja -DCK_ENABLE_ROCM_CK=ON
|
||||
# ninja -C build smoke-rocm-ck
|
||||
|
||||
# rocm_ck — header-only INTERFACE library
|
||||
add_library(rocm_ck INTERFACE)
|
||||
target_include_directories(rocm_ck INTERFACE
|
||||
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
|
||||
)
|
||||
# rocm_ck requires C++20, but the CK library is still migrating from c++17
|
||||
# We should remove this once the library fully migrates to c++20.
|
||||
target_compile_features(rocm_ck INTERFACE cxx_std_20)
|
||||
target_compile_options(rocm_ck INTERFACE -Wno-c++20-compat)
|
||||
|
||||
# CK Tile headers — required for device code compilation
|
||||
if(NOT TARGET ck_tile_headers)
|
||||
add_library(ck_tile_headers INTERFACE)
|
||||
target_include_directories(ck_tile_headers INTERFACE
|
||||
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/../include>
|
||||
)
|
||||
endif()
|
||||
|
||||
enable_testing()
|
||||
add_subdirectory(tests)
|
||||
113
rocm_ck/README.md
Normal file
113
rocm_ck/README.md
Normal file
@@ -0,0 +1,113 @@
|
||||
# rocm_ck
|
||||
|
||||
A C++20 constexpr API for configuring and distributing
|
||||
[CK Tile](../include/ck_tile/) GPU kernels across multiple architectures.
|
||||
|
||||
> **Status**: Early development. The current code establishes the directory
|
||||
> structure, build integration, and CI pipeline. A single unit test verifies
|
||||
> that the build and test infrastructure works end-to-end in Jenkins.
|
||||
> The schema types, device bridge, and kernel tests described below are
|
||||
> under active development.
|
||||
|
||||
## Why rocm_ck exists
|
||||
|
||||
CK Tile kernels are C++ templates. A GEMM kernel's tile size, pipeline
|
||||
strategy, data types, and epilogue are all template parameters — fixed at
|
||||
compile time. This is excellent for performance (zero-overhead abstraction,
|
||||
full inlining), but it creates a problem for multi-architecture distribution:
|
||||
the host program must be compiled separately from device code, and the host
|
||||
compiler must never see CK Tile headers.
|
||||
|
||||
rocm_ck solves this by introducing a **host-device boundary** built on
|
||||
constexpr data rather than template parameters:
|
||||
|
||||
1. **On the host side**, kernel configurations are plain C++20 structs
|
||||
(`Signature`, `Algorithm`, `GemmSpec`). These are constexpr data —
|
||||
they describe *what* to compute and *how*, without instantiating any
|
||||
templates. Host code reasons about kernels using values, not types.
|
||||
|
||||
2. **On the device side**, a thin bridge layer lowers these constexpr
|
||||
descriptions into CK Tile template instantiations. Each `GemmSpec`
|
||||
maps to exactly one `ck_tile::GemmPipeline<...>` specialization.
|
||||
|
||||
3. **At the boundary**, pre-compiled kernels are packaged into
|
||||
[kpack archives](https://github.com/ROCm/TheRock/blob/main/docs/rfcs/RFC0008-Multi-Arch-Packaging.md) —
|
||||
self-describing, compressed, multi-architecture bundles. The host loads kernels at runtime
|
||||
by matching a `GemmSpec` against the kpack table of contents. No
|
||||
recompilation, no template instantiation on the host.
|
||||
|
||||
This separation is what makes CK Tile viable in
|
||||
[TheRock](https://github.com/ROCm/TheRock)'s multi-arch build system,
|
||||
where a single host binary must work with device code compiled for
|
||||
many GPU targets (e.g. gfx90a, gfx942, gfx1151).
|
||||
|
||||
## The constexpr schema model
|
||||
|
||||
Traditional GPU kernel libraries select kernels through template
|
||||
parameters or runtime enums. rocm_ck uses a third approach: **constexpr
|
||||
structs that are validated at compile time and lowered to templates on
|
||||
the device side.**
|
||||
|
||||
A kernel configuration has two axes:
|
||||
|
||||
- **Signature** — *what* the kernel computes: a directed graph of
|
||||
operators (`GemmOp`, `AddOp`, `ReluOp`, ...) connecting named tensor
|
||||
slots. Data types, layouts, and batch dimensions are part of the
|
||||
signature.
|
||||
|
||||
- **Algorithm** — *how* the kernel computes it: tile geometry, pipeline
|
||||
strategy, warp layout, padding, and scheduling. These are tuning
|
||||
parameters that don't change the mathematical result.
|
||||
|
||||
The `Signature` and `Algorithm` are plain aggregate structs with
|
||||
designated initializers — no constructors, no inheritance, no runtime
|
||||
polymorphism. Validation happens in `consteval` functions: invalid
|
||||
configurations (unsupported tile size, incompatible data types, missing
|
||||
tensor slots) fail at compile time with actionable error messages.
|
||||
|
||||
Here is a preview of the API direction (not yet implemented):
|
||||
|
||||
```cpp
|
||||
// Host side — pure constexpr, any C++20 compiler, no CK headers
|
||||
constexpr Signature sig = {
|
||||
.dtype = DataType::FP16,
|
||||
.ops = {
|
||||
GemmOp{.lhs = "A", .rhs = "B", .out = "C"},
|
||||
AddOp{.lhs = "C", .rhs = "bias", .out = "D"},
|
||||
ReluOp{.in = "D", .out = "E"},
|
||||
},
|
||||
};
|
||||
|
||||
// Device side — make_kernel lowers to a CK Tile template instantiation.
|
||||
// Compiled separately per architecture, packaged into .kpack archives.
|
||||
```
|
||||
|
||||
## Directory layout
|
||||
|
||||
```text
|
||||
rocm_ck/
|
||||
├── CMakeLists.txt # INTERFACE library, C++20, ck_tile_headers target
|
||||
├── include/rocm_ck/ # Public headers — host-safe, no CK/HIP deps
|
||||
├── src/ # (planned) Device bridge, kpack loading
|
||||
└── tests/
|
||||
├── CMakeLists.txt # Test tiers: ROCM_CK_SMOKE, ROCM_CK_KERNEL
|
||||
├── unit/ # Fast host-only tests (< 1s, no GPU)
|
||||
└── kernel/ # (planned) GPU kernel tests
|
||||
```
|
||||
|
||||
## Build
|
||||
|
||||
rocm_ck is a CK feature, gated by `CK_ENABLE_ROCM_CK`:
|
||||
|
||||
```bash
|
||||
cd composablekernel
|
||||
cmake -B build -S . -G Ninja \
|
||||
-DCK_ENABLE_ROCM_CK=ON \
|
||||
-DCMAKE_CXX_COMPILER=/opt/rocm/llvm/bin/clang++
|
||||
|
||||
ninja -C build smoke-rocm-ck # host-only smoke tests
|
||||
ninja -C build check-rocm-ck # all rocm_ck tests
|
||||
ctest --test-dir build -L ROCM_CK_SMOKE --output-on-failure
|
||||
```
|
||||
|
||||
Default CK builds (`CK_ENABLE_ROCM_CK=OFF`) are unaffected.
|
||||
17
rocm_ck/include/rocm_ck/index_t.hpp
Normal file
17
rocm_ck/include/rocm_ck/index_t.hpp
Normal file
@@ -0,0 +1,17 @@
|
||||
// Copyright (c) Advanced Micro Devices, Inc., or its affiliates.
|
||||
// SPDX-License-Identifier: MIT
|
||||
// Role: types — index_t, long_index_t. No runtime, no CK deps.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
namespace rocm_ck {
|
||||
|
||||
// Matches ck_tile::index_t without pulling in CK Tile headers.
|
||||
using index_t = std::int32_t;
|
||||
|
||||
// batch_stride * nhead can exceed int32. Matches ck_tile::long_index_t.
|
||||
using long_index_t = std::int64_t;
|
||||
|
||||
} // namespace rocm_ck
|
||||
67
rocm_ck/tests/CMakeLists.txt
Normal file
67
rocm_ck/tests/CMakeLists.txt
Normal file
@@ -0,0 +1,67 @@
|
||||
# Copyright (c) Advanced Micro Devices, Inc., or its affiliates.
|
||||
# SPDX-License-Identifier: MIT
|
||||
#
|
||||
# rocm_ck tests
|
||||
#
|
||||
# Test tiers:
|
||||
# ROCM_CK_SMOKE — Fast host-only tests (< 1s total). No GPU, no HIP.
|
||||
# ROCM_CK_KERNEL — GPU kernel tests. Require HIP and a GPU.
|
||||
#
|
||||
# Usage:
|
||||
# ninja smoke-rocm-ck # build + run smoke tests
|
||||
# ninja build-smoke-rocm-ck # build only (no run)
|
||||
# ninja check-rocm-ck # run all rocm_ck tests
|
||||
#
|
||||
# ctest -L ROCM_CK_SMOKE --output-on-failure
|
||||
|
||||
# Google Test (via CK's FetchContent wrapper)
|
||||
include(${CMAKE_CURRENT_SOURCE_DIR}/../../cmake/gtest.cmake)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Helper function — reusable per-test setup
|
||||
# ---------------------------------------------------------------------------
|
||||
function(add_rocm_ck_test test_name)
|
||||
add_executable(${test_name} ${ARGN})
|
||||
target_link_libraries(${test_name} PRIVATE rocm_ck GTest::gtest_main)
|
||||
target_compile_options(${test_name} PRIVATE
|
||||
-Wno-global-constructors # GTest registration macros
|
||||
-Wno-undef # GTest internal headers
|
||||
)
|
||||
endfunction()
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Smoke tests (fast, host-only, no GPU)
|
||||
# ---------------------------------------------------------------------------
|
||||
set(ROCM_CK_SMOKE_TESTS
|
||||
unit/unit_index_t.cpp
|
||||
)
|
||||
|
||||
set(ROCM_CK_SMOKE_TARGETS)
|
||||
foreach(test_source ${ROCM_CK_SMOKE_TESTS})
|
||||
get_filename_component(test_name ${test_source} NAME_WLE)
|
||||
set(target_name "rocm_ck_${test_name}")
|
||||
add_rocm_ck_test(${target_name} ${test_source})
|
||||
add_test(NAME ${target_name} COMMAND ${target_name})
|
||||
set_tests_properties(${target_name} PROPERTIES LABELS "ROCM_CK_SMOKE")
|
||||
list(APPEND ROCM_CK_SMOKE_TARGETS ${target_name})
|
||||
endforeach()
|
||||
|
||||
# rocm_ck_unit_index_t verifies rocm_ck index types match ck_tile
|
||||
target_include_directories(rocm_ck_unit_index_t PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../../include)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Convenience targets
|
||||
# ---------------------------------------------------------------------------
|
||||
add_custom_target(build-smoke-rocm-ck DEPENDS ${ROCM_CK_SMOKE_TARGETS})
|
||||
|
||||
add_custom_target(smoke-rocm-ck
|
||||
COMMAND ${CMAKE_CTEST_COMMAND} --output-on-failure -L "ROCM_CK_SMOKE"
|
||||
DEPENDS build-smoke-rocm-ck
|
||||
USES_TERMINAL
|
||||
COMMENT "Running rocm_ck smoke tests...")
|
||||
|
||||
add_custom_target(check-rocm-ck
|
||||
COMMAND ${CMAKE_CTEST_COMMAND} --output-on-failure -L "ROCM_CK"
|
||||
DEPENDS build-smoke-rocm-ck
|
||||
USES_TERMINAL
|
||||
COMMENT "Running all rocm_ck tests...")
|
||||
33
rocm_ck/tests/unit/unit_index_t.cpp
Normal file
33
rocm_ck/tests/unit/unit_index_t.cpp
Normal file
@@ -0,0 +1,33 @@
|
||||
// Copyright (c) Advanced Micro Devices, Inc., or its affiliates.
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
#include <rocm_ck/index_t.hpp>
|
||||
|
||||
#include <ck_tile/core/numeric/integer.hpp>
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
using ::rocm_ck::index_t;
|
||||
using ::rocm_ck::long_index_t;
|
||||
|
||||
namespace {
|
||||
|
||||
TEST(IndexTypes, IndexTypeIs32Bit) { EXPECT_EQ(sizeof(index_t), 4); }
|
||||
|
||||
TEST(IndexTypes, LongIndexTypeIs64Bit) { EXPECT_EQ(sizeof(long_index_t), 8); }
|
||||
|
||||
TEST(IndexTypes, IndexTypeIsSigned) { EXPECT_TRUE(index_t(-1) < 0); }
|
||||
|
||||
TEST(IndexTypes, LongIndexTypeIsSigned) { EXPECT_TRUE(long_index_t(-1) < 0); }
|
||||
|
||||
TEST(IndexTypes, MatchesCkTileIndexType)
|
||||
{
|
||||
EXPECT_TRUE((std::is_same_v<index_t, ck_tile::index_t>));
|
||||
}
|
||||
|
||||
TEST(IndexTypes, MatchesCkTileLongIndexType)
|
||||
{
|
||||
EXPECT_TRUE((std::is_same_v<long_index_t, ck_tile::long_index_t>));
|
||||
}
|
||||
|
||||
} // namespace
|
||||
Reference in New Issue
Block a user