mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-06-30 19:57:40 +00:00
Introduces a new Python toolset in script/analyze_build/ for analyzing Clang -ftime-trace JSON output to identify compilation bottlenecks and optimize C++ metaprogramming build times. Key features: - Fast parallel processing of trace json files using all CPU cores (> 100 files/sec) - Simple, cache-free architecture for consistent performance - Comprehensive analysis of template instantiations and event types - Command-line tools and Jupyter notebook support - Automatic orjson detection for JSON parsing speedup Components: - trace_analysis/: Core library (models, parser, transformer) - examples/: CLI tools for single-file and directory analysis - notebooks/: Comprehensive Jupyter notebook with analysis patterns - Detailed README with usage examples and performance data Also adds ruff configuration to pyproject.toml to ignore E402 (module level import not at top of file) for Jupyter notebooks, which commonly have imports after markdown cells. This toolset addresses the critical problem of long build times in CK's C++17 metaprogramming codebase by treating -ftime-trace as a big data problem, using pandas and modern analysis tools to understand compilation patterns and measure improvement opportunities.
53 lines
1.8 KiB
TOML
53 lines
1.8 KiB
TOML
[build-system]
|
|
requires = ["setuptools"]
|
|
build-backend = "setuptools.build_meta"
|
|
|
|
[project]
|
|
name = "rocm-composable-kernel"
|
|
dynamic = ["version"]
|
|
description = "Composable Kernel, performance-critical kernels for machine learning workloads"
|
|
readme = "README.md"
|
|
requires-python = ">=3.8"
|
|
license = {file = "LICENSE"}
|
|
classifiers = [
|
|
"Programming Language :: Python :: 3",
|
|
"License :: OSI Approved :: MIT License",
|
|
"Operating System :: OS Independent",
|
|
]
|
|
dependencies = []
|
|
|
|
[project.urls]
|
|
"Homepage" = "https://github.com/rocm/composable_kernel"
|
|
"Bug Tracker" = "https://github.com/rocm/composable_kernel/issues"
|
|
|
|
[tool.setuptools]
|
|
packages = [
|
|
"ck4inductor",
|
|
"ck4inductor.include",
|
|
"ck4inductor.library",
|
|
"ck4inductor.universal_gemm",
|
|
"ck4inductor.batched_universal_gemm",
|
|
"ck4inductor.grouped_conv_fwd",
|
|
"ck4inductor.ck_tile_universal_gemm",
|
|
]
|
|
|
|
[tool.setuptools.package-dir]
|
|
ck4inductor = "python/ck4inductor"
|
|
"ck4inductor.universal_gemm" = "python/ck4inductor/universal_gemm"
|
|
"ck4inductor.batched_universal_gemm" = "python/ck4inductor/batched_universal_gemm"
|
|
"ck4inductor.grouped_conv_fwd" = "python/ck4inductor/grouped_conv_fwd"
|
|
"ck4inductor.ck_tile_universal_gemm" = "python/ck4inductor/ck_tile_universal_gemm"
|
|
"ck4inductor.include" = "include"
|
|
"ck4inductor.library" = "library"
|
|
|
|
[tool.setuptools.package-data]
|
|
"ck4inductor.include" = ["ck/**/*.hpp"]
|
|
"ck4inductor.library" = ["src/tensor_operation_instance/gpu/gemm_universal/**/*.hpp", "src/tensor_operation_instance/gpu/gemm_universal_batched/**/*.hpp", "include/ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/**/*.hpp"]
|
|
|
|
[tool.setuptools.dynamic]
|
|
version = { attr = "ck4inductor.__version__" }
|
|
|
|
[tool.ruff]
|
|
[tool.ruff.lint.per-file-ignores]
|
|
"script/analyze_build/notebooks/*.ipynb" = ["E402"]
|