From ef120d0d09de6886f141780f59010b4a48948fa0 Mon Sep 17 00:00:00 2001 From: Haicheng Wu <57973641+hwu36@users.noreply.github.com> Date: Tue, 12 May 2026 02:44:22 -0400 Subject: [PATCH] update to 4.5 (#3228) --- CHANGELOG.md | 1 + README.md | 5 ++--- include/cutlass/version.h | 4 ++-- python/cutlass_cppgen/__init__.py | 2 +- python/setup_cutlass.py | 2 +- python/setup_library.py | 2 +- python/setup_pycute.py | 2 +- 7 files changed, 9 insertions(+), 9 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 568379566..ee07879f8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -48,6 +48,7 @@ * Use 64-bit adds for SM100 MMA descriptor offsets and reduce move instructions for improved code generation. * Add [example 95](https://github.com/NVIDIA/cutlass/tree/main/examples/95_blackwell_gemm_green_context) to support green context SM partition - Enables launching GEMM on stream with partial SM allocation. +* Add [Snake](https://github.com/NVIDIA/cutlass/blob/main/test/unit/epilogue/thread/activation.cu#L409) activation functor for EVT. * Fix some kernel issues: - Fix l2_capacity=0 handling in Blackwell SM100/SM120 kernel templates - Fix CUTLASS clang build issues diff --git a/README.md b/README.md index 43925736e..92950b9ae 100644 --- a/README.md +++ b/README.md @@ -89,6 +89,7 @@ To get started quickly - please refer : * Use 64-bit adds for SM100 MMA descriptor offsets and reduce move instructions for improved code generation. * Add [example 95](https://github.com/NVIDIA/cutlass/tree/main/examples/95_blackwell_gemm_green_context) to support green context SM partition - Enables launching GEMM on stream with partial SM allocation. +* Add [Snake](https://github.com/NVIDIA/cutlass/blob/main/test/unit/epilogue/thread/activation.cu#L409) activation functor for EVT. * Fix some kernel issues: - Fix l2_capacity=0 handling in Blackwell SM100/SM120 kernel templates - Fix CUTLASS clang build issues @@ -97,9 +98,7 @@ To get started quickly - please refer : - Remove `PipelineStorage` shadowing in SM100 complex epilogue - Fix build issue in SM90 epilogue fusion visitor TMA warpspecialized * Fix some profiler issues: - - Add missing reference kernels for blockwise GEMM profiler -* Various improvements and fixes from the community and CUTLASS team. Thanks to everyone who submitted PRs! -* Optimal code generation with CUDA toolkit versions 13.2. + - Add missing reference kernels for blockwise GEMM profiler. Note: CUTLASS 4.x builds are known to be down on Windows platforms for all CUDA toolkits. CUTLASS team is working on a fix. diff --git a/include/cutlass/version.h b/include/cutlass/version.h index f388aa75e..5c30d8c6a 100644 --- a/include/cutlass/version.h +++ b/include/cutlass/version.h @@ -35,8 +35,8 @@ #include #define CUTLASS_MAJOR 4 -#define CUTLASS_MINOR 4 -#define CUTLASS_PATCH 2 +#define CUTLASS_MINOR 5 +#define CUTLASS_PATCH 0 #ifdef CUTLASS_VERSIONS_GENERATED #include "cutlass/version_extended.h" diff --git a/python/cutlass_cppgen/__init__.py b/python/cutlass_cppgen/__init__.py index 889b6f453..0cbf25180 100644 --- a/python/cutlass_cppgen/__init__.py +++ b/python/cutlass_cppgen/__init__.py @@ -133,7 +133,7 @@ def get_option_registry(): this._option_registry = OptionRegistry(device_cc()) return this._option_registry -this.__version__ = '4.4.2' +this.__version__ = '4.5.0' from cutlass_cppgen.backend import create_memory_pool from cutlass_cppgen.emit.pytorch import pytorch diff --git a/python/setup_cutlass.py b/python/setup_cutlass.py index 0d1abbb32..98d2e077c 100644 --- a/python/setup_cutlass.py +++ b/python/setup_cutlass.py @@ -51,7 +51,7 @@ setup_pycute.perform_setup() setup( name='cutlass_cppgen', - version='4.4.2', + version='4.5.0', description='CUTLASS Pythonic Interface', package_dir={'': '.'}, packages=[ diff --git a/python/setup_library.py b/python/setup_library.py index 84edebc8c..c88e3320c 100644 --- a/python/setup_library.py +++ b/python/setup_library.py @@ -36,7 +36,7 @@ from setuptools import setup def perform_setup(): setup( name='cutlass_library', - version='4.4.2', + version='4.5.0', description='CUTLASS library generation scripts', packages=['cutlass_library'] ) diff --git a/python/setup_pycute.py b/python/setup_pycute.py index c4ae36938..7892f866c 100644 --- a/python/setup_pycute.py +++ b/python/setup_pycute.py @@ -36,7 +36,7 @@ from setuptools import setup def perform_setup(): setup( name='pycute', - version='4.4.2', + version='4.5.0', description='Python implementation of CuTe', packages=['pycute'], )