mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-13 17:55:48 +00:00
Backup commit grouping all in-progress local work so nothing is lost: - Modified CK-UA kernel + example sources (unified_attention.cpp, unified_attention_kernel.hpp) and CMake/build files. - Updated dispatcher README and ctypes_utils.py. - New unified_attention example notes: PARAMETERS.md, VARIABLES.md. - New unified_attention instances for d128 fp16/bf16 (mask/nmask, gqa6). - New 99_toy_tutorial/ collection: bank-conflict investigations (test_*.cpp, *.js, *.gdb, *.asm, *.md), tile distribution / row reduction / calling_gemm / thread_buffer tutorials. - Slide decks and supporting assets (bank_conflict_slides.qmd/.html, tile_distribution_slides.qmd, assets/, *_files/, step1_reshape_only, xor_full_steps_simple). - GDB helper script (break_on_ds_read.gdb). Not intended for upstream review; pure WIP snapshot.
130 lines
4.0 KiB
JSON
130 lines
4.0 KiB
JSON
{
|
|
"version": 3,
|
|
"cmakeMinimumRequired": {
|
|
"major": 3,
|
|
"minor": 21,
|
|
"patch": 0
|
|
},
|
|
"configurePresets": [
|
|
{
|
|
"name": "use-gfx908",
|
|
"hidden": true,
|
|
"cacheVariables": {
|
|
"GPU_TARGETS": "gfx908"
|
|
}
|
|
},
|
|
{
|
|
"name": "use-gfx90a",
|
|
"hidden": true,
|
|
"cacheVariables": {
|
|
"GPU_TARGETS": "gfx90a"
|
|
}
|
|
},
|
|
{
|
|
"name": "use-gfx942",
|
|
"hidden": true,
|
|
"cacheVariables": {
|
|
"GPU_TARGETS": "gfx942"
|
|
}
|
|
},
|
|
{
|
|
"name": "use-gfx950",
|
|
"hidden": true,
|
|
"cacheVariables": {
|
|
"GPU_TARGETS": "gfx950"
|
|
}
|
|
},
|
|
{
|
|
"name": "dev",
|
|
"binaryDir": "${sourceDir}/build",
|
|
"displayName": "CK Dev",
|
|
"environment": {},
|
|
"cacheVariables": {
|
|
"CMAKE_PREFIX_PATH": "/opt/rocm/",
|
|
"CMAKE_CXX_COMPILER": "/opt/rocm/llvm/bin/clang++",
|
|
"CMAKE_HIP_COMPILER": "/opt/rocm/llvm/bin/clang++",
|
|
"CMAKE_CXX_FLAGS": "-ftemplate-backtrace-limit=0 -fPIE -Wno-gnu-line-marker -fbracket-depth=1024",
|
|
"CMAKE_BUILD_TYPE": "Release",
|
|
"BUILD_DEV": "ON",
|
|
"CMAKE_VERBOSE_MAKEFILE": "ON",
|
|
"USE_BITINT_EXTENSION_INT4": "OFF",
|
|
"GPU_TARGETS": "gfx908;gfx90a;gfx942"
|
|
}
|
|
},
|
|
{
|
|
"name": "dev-gfx908",
|
|
"displayName": "CK Dev - gfx908",
|
|
"description": "Development build for AMD GPU gfx908",
|
|
"inherits": [
|
|
"use-gfx908",
|
|
"dev"
|
|
]
|
|
},
|
|
{
|
|
"name": "dev-gfx90a",
|
|
"displayName": "CK Dev - gfx90a",
|
|
"description": "Development build for AMD GPU gfx90a",
|
|
"inherits": [
|
|
"use-gfx90a",
|
|
"dev"
|
|
]
|
|
},
|
|
{
|
|
"name": "dev-gfx942",
|
|
"displayName": "CK Dev - gfx942",
|
|
"description": "Development build for AMD GPU gfx942",
|
|
"inherits": [
|
|
"use-gfx942",
|
|
"dev"
|
|
]
|
|
},
|
|
{
|
|
"name": "dev-gfx950",
|
|
"displayName": "CK Dev - gfx950",
|
|
"description": "Development build for AMD GPU gfx950",
|
|
"inherits": [
|
|
"use-gfx950",
|
|
"dev"
|
|
]
|
|
},
|
|
{
|
|
"name": "debug",
|
|
"binaryDir": "${sourceDir}/build-debug",
|
|
"displayName": "CK Debug",
|
|
"description": "Debug build (no opt, full symbols, large code model)",
|
|
"generator": "Ninja",
|
|
"environment": {},
|
|
"cacheVariables": {
|
|
"CMAKE_PREFIX_PATH": "/opt/rocm",
|
|
"CMAKE_CXX_COMPILER": "/opt/rocm/bin/hipcc",
|
|
"CMAKE_BUILD_TYPE": "Debug",
|
|
"CMAKE_EXPORT_COMPILE_COMMANDS": "ON",
|
|
"CMAKE_MAKE_PROGRAM": "ninja",
|
|
"CMAKE_CXX_FLAGS_DEBUG": "-O0 -g -ggdb3 -fno-inline -fno-omit-frame-pointer -mcmodel=large",
|
|
"CMAKE_HIP_FLAGS_DEBUG": "-O0 -g -ggdb3 -mcmodel=large",
|
|
"CMAKE_EXE_LINKER_FLAGS": "-mcmodel=large",
|
|
"CK_DEBUG_UNOPTIMIZED": "ON",
|
|
"BUILD_DEV": "ON",
|
|
"USE_BITINT_EXTENSION_INT4": "OFF"
|
|
}
|
|
},
|
|
{
|
|
"name": "debug-gfx942",
|
|
"displayName": "CK Debug - gfx942",
|
|
"description": "Debug build for AMD GPU gfx942",
|
|
"inherits": [
|
|
"use-gfx942",
|
|
"debug"
|
|
]
|
|
},
|
|
{
|
|
"name": "debug-gfx950",
|
|
"displayName": "CK Debug - gfx950",
|
|
"description": "Debug build for AMD GPU gfx950",
|
|
"inherits": [
|
|
"use-gfx950",
|
|
"debug"
|
|
]
|
|
}
|
|
]
|
|
} |