[rocm-libraries] ROCm/rocm-libraries#7090 (commit 316fded)

[CK] Add rocm_ck directory structure with feature flag
 (#7090)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

## Summary

Adds initial rocm_ck directory structure, #7119.

- Establishes production `rocm_ck/` directory at
`composablekernel/rocm_ck/`, peer to `tile_engine/` and `dispatcher/`
- Adds `CK_ENABLE_ROCM_CK` option (default OFF) as a CK-internal feature
flag — no superbuild or TheRock changes needed
- Creates `rocm_ck` INTERFACE library, `ck_tile_headers` target, GTest
integration with builder-style convenience targets (`smoke-rocm-ck`,
`check-rocm-ck`)
- Adds Jenkins `RUN_ROCM_CK_TESTS` parameter for CI, following the
`RUN_BUILDER_TESTS` pattern
- README explains the constexpr schema model: host-device separation via
constexpr data rather than template parameters, enabling multi-arch
distribution through kpack archives

## Test plan

- [x] `cmake -DCK_ENABLE_ROCM_CK=ON` configures without errors
- [x] `ninja check-rocm-ck` passes (4 host-only index type tests)
- [x] Default build (`CK_ENABLE_ROCM_CK=OFF`) is unaffected — no rocm_ck
targets present
- [x] Jenkins `RUN_ROCM_CK_TESTS=true` enables the flag and runs
`check-rocm-ck`

🤖 Generated with [Claude Code](https://claude.com/claude-code)
This commit is contained in:
John Shumway
2026-05-14 18:52:38 +00:00
committed by assistant-librarian[bot]
parent d931e8703d
commit 6cd06382b3
7 changed files with 279 additions and 0 deletions

View File

@@ -55,6 +55,7 @@ option(FORCE_DISABLE_WMMA "Skip compiling WMMA specific instances (even if suppo
option(BUILD_CK_TILE_ENGINE "Build the tile_engine subdirectory" OFF)
option(BUILD_CK_EXAMPLES "Build the example subdirectory" ON)
option(BUILD_CK_TUTORIALS "Build the tutorial subdirectory" ON)
option(CK_ENABLE_ROCM_CK "Build rocm_ck API" OFF)
if(CK_EXPERIMENTAL_BUILDER)
add_definitions(-DCK_EXPERIMENTAL_BUILDER)
@@ -771,6 +772,12 @@ if(NOT GPU_ARCHS AND USER_GPU_TARGETS AND NOT MIOPEN_REQ_LIBS_ONLY AND NOT HIPTE
if(BUILD_CK_TILE_ENGINE)
add_subdirectory(tile_engine)
endif()
if(CK_ENABLE_ROCM_CK)
add_subdirectory(rocm_ck)
if(TARGET check)
add_dependencies(check build-smoke-rocm-ck)
endif()
endif()
if(BUILD_TESTING)
rocm_package_setup_component(tests
LIBRARY_NAME composablekernel

13
Jenkinsfile vendored
View File

@@ -721,6 +721,9 @@ def cmake_build(Map conf=[:]){
if (params.RUN_BUILDER_TESTS && !setup_args.contains("-DCK_CXX_STANDARD=") && !setup_args.contains("gfx10") && !setup_args.contains("gfx11")) {
setup_args = " -D CK_EXPERIMENTAL_BUILDER=ON " + setup_args
}
if (params.RUN_ROCM_CK_TESTS) {
setup_args = " -D CK_ENABLE_ROCM_CK=ON " + setup_args
}
setup_cmd = conf.get(
"setup_cmd",
"""${cmake_envs} cmake -G Ninja ${setup_args} -DCMAKE_EXPORT_COMPILE_COMMANDS=ON -DCMAKE_CXX_FLAGS=" -O3 " .. """
@@ -837,6 +840,9 @@ def cmake_build(Map conf=[:]){
if (params.RUN_BUILDER_TESTS && !setup_args.contains("-DCK_CXX_STANDARD=") && !setup_args.contains("gfx10") && !setup_args.contains("gfx11")) {
sh 'ninja check-builder'
}
if (params.RUN_ROCM_CK_TESTS) {
sh 'ninja check-rocm-ck'
}
if(params.BUILD_PACKAGES){
echo "Build ckProfiler packages"
sh 'ninja -j64 package'
@@ -876,6 +882,9 @@ def cmake_build(Map conf=[:]){
if (params.RUN_BUILDER_TESTS && !setup_args.contains("-DCK_CXX_STANDARD=") && !setup_args.contains("gfx10") && !setup_args.contains("gfx11")) {
sh 'ninja check-builder'
}
if (params.RUN_ROCM_CK_TESTS) {
sh 'ninja check-rocm-ck'
}
if(params.BUILD_PACKAGES){
echo "Build ckProfiler packages"
sh 'ninja -j64 package'
@@ -1425,6 +1434,10 @@ pipeline {
name: "RUN_BUILDER_TESTS",
defaultValue: false,
description: "Run CK_BUILDER tests (default: OFF)")
booleanParam(
name: "RUN_ROCM_CK_TESTS",
defaultValue: true,
description: "Run rocm_ck tests (default: ON)")
booleanParam(
name: "RUN_ALL_UNIT_TESTS",
defaultValue: false,

29
rocm_ck/CMakeLists.txt Normal file
View File

@@ -0,0 +1,29 @@
# Copyright (c) Advanced Micro Devices, Inc., or its affiliates.
# SPDX-License-Identifier: MIT
#
# rocm_ck — constexpr schema API over CK Tile device kernels.
#
# Build from CK root:
# cmake -B build -S . -G Ninja -DCK_ENABLE_ROCM_CK=ON
# ninja -C build smoke-rocm-ck
# rocm_ck — header-only INTERFACE library
add_library(rocm_ck INTERFACE)
target_include_directories(rocm_ck INTERFACE
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
)
# rocm_ck requires C++20, but the CK library is still migrating from c++17
# We should remove this once the library fully migrates to c++20.
target_compile_features(rocm_ck INTERFACE cxx_std_20)
target_compile_options(rocm_ck INTERFACE -Wno-c++20-compat)
# CK Tile headers — required for device code compilation
if(NOT TARGET ck_tile_headers)
add_library(ck_tile_headers INTERFACE)
target_include_directories(ck_tile_headers INTERFACE
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/../include>
)
endif()
enable_testing()
add_subdirectory(tests)

113
rocm_ck/README.md Normal file
View File

@@ -0,0 +1,113 @@
# rocm_ck
A C++20 constexpr API for configuring and distributing
[CK Tile](../include/ck_tile/) GPU kernels across multiple architectures.
> **Status**: Early development. The current code establishes the directory
> structure, build integration, and CI pipeline. A single unit test verifies
> that the build and test infrastructure works end-to-end in Jenkins.
> The schema types, device bridge, and kernel tests described below are
> under active development.
## Why rocm_ck exists
CK Tile kernels are C++ templates. A GEMM kernel's tile size, pipeline
strategy, data types, and epilogue are all template parameters — fixed at
compile time. This is excellent for performance (zero-overhead abstraction,
full inlining), but it creates a problem for multi-architecture distribution:
the host program must be compiled separately from device code, and the host
compiler must never see CK Tile headers.
rocm_ck solves this by introducing a **host-device boundary** built on
constexpr data rather than template parameters:
1. **On the host side**, kernel configurations are plain C++20 structs
(`Signature`, `Algorithm`, `GemmSpec`). These are constexpr data —
they describe *what* to compute and *how*, without instantiating any
templates. Host code reasons about kernels using values, not types.
2. **On the device side**, a thin bridge layer lowers these constexpr
descriptions into CK Tile template instantiations. Each `GemmSpec`
maps to exactly one `ck_tile::GemmPipeline<...>` specialization.
3. **At the boundary**, pre-compiled kernels are packaged into
[kpack archives](https://github.com/ROCm/TheRock/blob/main/docs/rfcs/RFC0008-Multi-Arch-Packaging.md) —
self-describing, compressed, multi-architecture bundles. The host loads kernels at runtime
by matching a `GemmSpec` against the kpack table of contents. No
recompilation, no template instantiation on the host.
This separation is what makes CK Tile viable in
[TheRock](https://github.com/ROCm/TheRock)'s multi-arch build system,
where a single host binary must work with device code compiled for
many GPU targets (e.g. gfx90a, gfx942, gfx1151).
## The constexpr schema model
Traditional GPU kernel libraries select kernels through template
parameters or runtime enums. rocm_ck uses a third approach: **constexpr
structs that are validated at compile time and lowered to templates on
the device side.**
A kernel configuration has two axes:
- **Signature** — *what* the kernel computes: a directed graph of
operators (`GemmOp`, `AddOp`, `ReluOp`, ...) connecting named tensor
slots. Data types, layouts, and batch dimensions are part of the
signature.
- **Algorithm** — *how* the kernel computes it: tile geometry, pipeline
strategy, warp layout, padding, and scheduling. These are tuning
parameters that don't change the mathematical result.
The `Signature` and `Algorithm` are plain aggregate structs with
designated initializers — no constructors, no inheritance, no runtime
polymorphism. Validation happens in `consteval` functions: invalid
configurations (unsupported tile size, incompatible data types, missing
tensor slots) fail at compile time with actionable error messages.
Here is a preview of the API direction (not yet implemented):
```cpp
// Host side — pure constexpr, any C++20 compiler, no CK headers
constexpr Signature sig = {
.dtype = DataType::FP16,
.ops = {
GemmOp{.lhs = "A", .rhs = "B", .out = "C"},
AddOp{.lhs = "C", .rhs = "bias", .out = "D"},
ReluOp{.in = "D", .out = "E"},
},
};
// Device side — make_kernel lowers to a CK Tile template instantiation.
// Compiled separately per architecture, packaged into .kpack archives.
```
## Directory layout
```text
rocm_ck/
├── CMakeLists.txt # INTERFACE library, C++20, ck_tile_headers target
├── include/rocm_ck/ # Public headers — host-safe, no CK/HIP deps
├── src/ # (planned) Device bridge, kpack loading
└── tests/
├── CMakeLists.txt # Test tiers: ROCM_CK_SMOKE, ROCM_CK_KERNEL
├── unit/ # Fast host-only tests (< 1s, no GPU)
└── kernel/ # (planned) GPU kernel tests
```
## Build
rocm_ck is a CK feature, gated by `CK_ENABLE_ROCM_CK`:
```bash
cd composablekernel
cmake -B build -S . -G Ninja \
-DCK_ENABLE_ROCM_CK=ON \
-DCMAKE_CXX_COMPILER=/opt/rocm/llvm/bin/clang++
ninja -C build smoke-rocm-ck # host-only smoke tests
ninja -C build check-rocm-ck # all rocm_ck tests
ctest --test-dir build -L ROCM_CK_SMOKE --output-on-failure
```
Default CK builds (`CK_ENABLE_ROCM_CK=OFF`) are unaffected.

View File

@@ -0,0 +1,17 @@
// Copyright (c) Advanced Micro Devices, Inc., or its affiliates.
// SPDX-License-Identifier: MIT
// Role: types — index_t, long_index_t. No runtime, no CK deps.
#pragma once
#include <cstdint>
namespace rocm_ck {
// Matches ck_tile::index_t without pulling in CK Tile headers.
using index_t = std::int32_t;
// batch_stride * nhead can exceed int32. Matches ck_tile::long_index_t.
using long_index_t = std::int64_t;
} // namespace rocm_ck

View File

@@ -0,0 +1,67 @@
# Copyright (c) Advanced Micro Devices, Inc., or its affiliates.
# SPDX-License-Identifier: MIT
#
# rocm_ck tests
#
# Test tiers:
# ROCM_CK_SMOKE — Fast host-only tests (< 1s total). No GPU, no HIP.
# ROCM_CK_KERNEL — GPU kernel tests. Require HIP and a GPU.
#
# Usage:
# ninja smoke-rocm-ck # build + run smoke tests
# ninja build-smoke-rocm-ck # build only (no run)
# ninja check-rocm-ck # run all rocm_ck tests
#
# ctest -L ROCM_CK_SMOKE --output-on-failure
# Google Test (via CK's FetchContent wrapper)
include(${CMAKE_CURRENT_SOURCE_DIR}/../../cmake/gtest.cmake)
# ---------------------------------------------------------------------------
# Helper function — reusable per-test setup
# ---------------------------------------------------------------------------
function(add_rocm_ck_test test_name)
add_executable(${test_name} ${ARGN})
target_link_libraries(${test_name} PRIVATE rocm_ck GTest::gtest_main)
target_compile_options(${test_name} PRIVATE
-Wno-global-constructors # GTest registration macros
-Wno-undef # GTest internal headers
)
endfunction()
# ---------------------------------------------------------------------------
# Smoke tests (fast, host-only, no GPU)
# ---------------------------------------------------------------------------
set(ROCM_CK_SMOKE_TESTS
unit/unit_index_t.cpp
)
set(ROCM_CK_SMOKE_TARGETS)
foreach(test_source ${ROCM_CK_SMOKE_TESTS})
get_filename_component(test_name ${test_source} NAME_WLE)
set(target_name "rocm_ck_${test_name}")
add_rocm_ck_test(${target_name} ${test_source})
add_test(NAME ${target_name} COMMAND ${target_name})
set_tests_properties(${target_name} PROPERTIES LABELS "ROCM_CK_SMOKE")
list(APPEND ROCM_CK_SMOKE_TARGETS ${target_name})
endforeach()
# rocm_ck_unit_index_t verifies rocm_ck index types match ck_tile
target_include_directories(rocm_ck_unit_index_t PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../../include)
# ---------------------------------------------------------------------------
# Convenience targets
# ---------------------------------------------------------------------------
add_custom_target(build-smoke-rocm-ck DEPENDS ${ROCM_CK_SMOKE_TARGETS})
add_custom_target(smoke-rocm-ck
COMMAND ${CMAKE_CTEST_COMMAND} --output-on-failure -L "ROCM_CK_SMOKE"
DEPENDS build-smoke-rocm-ck
USES_TERMINAL
COMMENT "Running rocm_ck smoke tests...")
add_custom_target(check-rocm-ck
COMMAND ${CMAKE_CTEST_COMMAND} --output-on-failure -L "ROCM_CK"
DEPENDS build-smoke-rocm-ck
USES_TERMINAL
COMMENT "Running all rocm_ck tests...")

View File

@@ -0,0 +1,33 @@
// Copyright (c) Advanced Micro Devices, Inc., or its affiliates.
// SPDX-License-Identifier: MIT
#include <rocm_ck/index_t.hpp>
#include <ck_tile/core/numeric/integer.hpp>
#include <gtest/gtest.h>
using ::rocm_ck::index_t;
using ::rocm_ck::long_index_t;
namespace {
TEST(IndexTypes, IndexTypeIs32Bit) { EXPECT_EQ(sizeof(index_t), 4); }
TEST(IndexTypes, LongIndexTypeIs64Bit) { EXPECT_EQ(sizeof(long_index_t), 8); }
TEST(IndexTypes, IndexTypeIsSigned) { EXPECT_TRUE(index_t(-1) < 0); }
TEST(IndexTypes, LongIndexTypeIsSigned) { EXPECT_TRUE(long_index_t(-1) < 0); }
TEST(IndexTypes, MatchesCkTileIndexType)
{
EXPECT_TRUE((std::is_same_v<index_t, ck_tile::index_t>));
}
TEST(IndexTypes, MatchesCkTileLongIndexType)
{
EXPECT_TRUE((std::is_same_v<long_index_t, ck_tile::long_index_t>));
}
} // namespace