From b18df7d206c53394bcdbd71dca917fed0842e684 Mon Sep 17 00:00:00 2001 From: Haicheng Wu Date: Mon, 15 Jun 2026 20:40:53 -0700 Subject: [PATCH] update to 4.4.3 --- CHANGELOG.md | 5 +++++ README.md | 4 ++-- include/cutlass/version.h | 2 +- python/cutlass_cppgen/__init__.py | 2 +- python/setup_cutlass.py | 2 +- python/setup_library.py | 2 +- python/setup_pycute.py | 2 +- 7 files changed, 12 insertions(+), 7 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9bc51a129..bb3d291e7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,11 @@ # CUTLASS 4.x +## [4.4.3](https://github.com/NVIDIA/cutlass/releases/tag/v4.4.3) (2026-06-15) + +### CUTLASS C++ +* Make [version.h](https://github.com/NVIDIA/cutlass/blob/release/4.4/include/cutlass/version.h) NVRTC JIT compilation compatible. + ## [4.4.2](https://github.com/NVIDIA/cutlass/releases/tag/v4.4.2) (2026-03-13) ### CuTe DSL diff --git a/README.md b/README.md index 122369cb5..439c594b9 100644 --- a/README.md +++ b/README.md @@ -1,9 +1,9 @@ ![ALT](./media/images/gemm-hierarchy-with-epilogue-no-labels.png "Complete CUDA GEMM decomposition") # Overview -# CUTLASS 4.4.2 +# CUTLASS 4.4.3 -_CUTLASS 4.4.2 - March 2026_ +_CUTLASS 4.4.3 - June 2026_ CUTLASS is a collection of abstractions for implementing high-performance matrix-matrix multiplication (GEMM) and related computations at all levels and scales within CUDA. It incorporates strategies for diff --git a/include/cutlass/version.h b/include/cutlass/version.h index 0780fae25..960431532 100644 --- a/include/cutlass/version.h +++ b/include/cutlass/version.h @@ -40,7 +40,7 @@ #define CUTLASS_MAJOR 4 #define CUTLASS_MINOR 4 -#define CUTLASS_PATCH 2 +#define CUTLASS_PATCH 3 #ifdef CUTLASS_VERSIONS_GENERATED #include "cutlass/version_extended.h" diff --git a/python/cutlass_cppgen/__init__.py b/python/cutlass_cppgen/__init__.py index 889b6f453..0364a47cd 100644 --- a/python/cutlass_cppgen/__init__.py +++ b/python/cutlass_cppgen/__init__.py @@ -133,7 +133,7 @@ def get_option_registry(): this._option_registry = OptionRegistry(device_cc()) return this._option_registry -this.__version__ = '4.4.2' +this.__version__ = '4.4.3' from cutlass_cppgen.backend import create_memory_pool from cutlass_cppgen.emit.pytorch import pytorch diff --git a/python/setup_cutlass.py b/python/setup_cutlass.py index 0d1abbb32..85fac9477 100644 --- a/python/setup_cutlass.py +++ b/python/setup_cutlass.py @@ -51,7 +51,7 @@ setup_pycute.perform_setup() setup( name='cutlass_cppgen', - version='4.4.2', + version='4.4.3', description='CUTLASS Pythonic Interface', package_dir={'': '.'}, packages=[ diff --git a/python/setup_library.py b/python/setup_library.py index 84edebc8c..0a8839bae 100644 --- a/python/setup_library.py +++ b/python/setup_library.py @@ -36,7 +36,7 @@ from setuptools import setup def perform_setup(): setup( name='cutlass_library', - version='4.4.2', + version='4.4.3', description='CUTLASS library generation scripts', packages=['cutlass_library'] ) diff --git a/python/setup_pycute.py b/python/setup_pycute.py index c4ae36938..c888bcc4f 100644 --- a/python/setup_pycute.py +++ b/python/setup_pycute.py @@ -36,7 +36,7 @@ from setuptools import setup def perform_setup(): setup( name='pycute', - version='4.4.2', + version='4.4.3', description='Python implementation of CuTe', packages=['pycute'], )