From 4b235cdaa427d6ebabab7f379ebcb4f460552da7 Mon Sep 17 00:00:00 2001 From: Jianwei Dong Date: Mon, 29 Dec 2025 12:42:06 +0800 Subject: [PATCH] fix cuda wheel build (#1766) --- .github/workflows/release-pypi.yml | 31 +++++++++++++++--------------- kt-kernel/README.md | 10 ++-------- kt-kernel/setup.py | 28 ++++++++++++++------------- 3 files changed, 33 insertions(+), 36 deletions(-) diff --git a/.github/workflows/release-pypi.yml b/.github/workflows/release-pypi.yml index b65db3d..e2d503e 100644 --- a/.github/workflows/release-pypi.yml +++ b/.github/workflows/release-pypi.yml @@ -282,36 +282,37 @@ jobs: run: | echo "## 🎉 kt-kernel v${{ steps.get_version.outputs.VERSION }} Published to PyPI" >> $GITHUB_STEP_SUMMARY echo "" >> $GITHUB_STEP_SUMMARY + echo "### Published Packages" >> $GITHUB_STEP_SUMMARY + echo "- **kt-kernel** (CPU-only)" >> $GITHUB_STEP_SUMMARY + echo "- **kt-kernel-cuda** (CUDA support)" >> $GITHUB_STEP_SUMMARY + echo "" >> $GITHUB_STEP_SUMMARY + echo "Total wheels: $(ls -1 dist/*.whl | wc -l) (3 Python versions: 3.10, 3.11, 3.12)" >> $GITHUB_STEP_SUMMARY + echo "" >> $GITHUB_STEP_SUMMARY echo "### Installation" >> $GITHUB_STEP_SUMMARY echo '```bash' >> $GITHUB_STEP_SUMMARY + echo "# CPU version (AMX/AVX512/AVX2 multi-variant)" >> $GITHUB_STEP_SUMMARY echo "pip install kt-kernel==${{ steps.get_version.outputs.VERSION }}" >> $GITHUB_STEP_SUMMARY - echo '```' >> $GITHUB_STEP_SUMMARY echo "" >> $GITHUB_STEP_SUMMARY - echo "### Published Wheels" >> $GITHUB_STEP_SUMMARY - echo "Total: $(ls -1 dist/*.whl | wc -l) wheels (3 Python versions: 3.10, 3.11, 3.12)" >> $GITHUB_STEP_SUMMARY + echo "# CUDA version (requires NVIDIA driver with CUDA 11.8+ or 12.x support)" >> $GITHUB_STEP_SUMMARY + echo "pip install kt-kernel-cuda==${{ steps.get_version.outputs.VERSION }}" >> $GITHUB_STEP_SUMMARY + echo '```' >> $GITHUB_STEP_SUMMARY echo "" >> $GITHUB_STEP_SUMMARY echo "### Features" >> $GITHUB_STEP_SUMMARY echo "" >> $GITHUB_STEP_SUMMARY - echo "**CPU wheels with multi-variant support:**" >> $GITHUB_STEP_SUMMARY + echo "**kt-kernel (CPU) - Multi-variant support:**" >> $GITHUB_STEP_SUMMARY echo "- ✅ AMX (Intel Sapphire Rapids+)" >> $GITHUB_STEP_SUMMARY echo "- ✅ AVX512 (Intel Skylake-X/Ice Lake/Cascade Lake)" >> $GITHUB_STEP_SUMMARY echo "- ✅ AVX2 (Maximum compatibility)" >> $GITHUB_STEP_SUMMARY echo "- 🔧 Runtime CPU detection: Automatically selects optimal variant" >> $GITHUB_STEP_SUMMARY echo "" >> $GITHUB_STEP_SUMMARY - echo "**CUDA wheels with multi-architecture support:**" >> $GITHUB_STEP_SUMMARY + echo "**kt-kernel-cuda (CUDA) - Multi-architecture support:**" >> $GITHUB_STEP_SUMMARY echo "- ✅ SM 80 (Ampere: A100, RTX 3000 series)" >> $GITHUB_STEP_SUMMARY echo "- ✅ SM 86 (Ampere: RTX 3060-3090)" >> $GITHUB_STEP_SUMMARY echo "- ✅ SM 89 (Ada Lovelace: RTX 4000 series)" >> $GITHUB_STEP_SUMMARY echo "- ✅ SM 90 (Hopper: H100)" >> $GITHUB_STEP_SUMMARY echo "- 🔧 Static CUDA runtime: Compatible with CUDA 11.8+ and 12.x drivers" >> $GITHUB_STEP_SUMMARY + echo "- 🔧 Includes multi-variant CPU code (AMX/AVX512/AVX2)" >> $GITHUB_STEP_SUMMARY echo "" >> $GITHUB_STEP_SUMMARY - echo "**Installation:**" >> $GITHUB_STEP_SUMMARY - echo '```bash' >> $GITHUB_STEP_SUMMARY - echo "# CPU version" >> $GITHUB_STEP_SUMMARY - echo "pip install kt-kernel==${{ steps.get_version.outputs.VERSION }}+cpu" >> $GITHUB_STEP_SUMMARY - echo "" >> $GITHUB_STEP_SUMMARY - echo "# CUDA version (requires NVIDIA driver with CUDA 11.8+ or 12.x support)" >> $GITHUB_STEP_SUMMARY - echo "pip install kt-kernel==${{ steps.get_version.outputs.VERSION }}+cuda118" >> $GITHUB_STEP_SUMMARY - echo '```' >> $GITHUB_STEP_SUMMARY - echo "" >> $GITHUB_STEP_SUMMARY - echo "PyPI link: https://pypi.org/project/kt-kernel/#history" >> $GITHUB_STEP_SUMMARY + echo "### Links" >> $GITHUB_STEP_SUMMARY + echo "- CPU package: https://pypi.org/project/kt-kernel/${{ steps.get_version.outputs.VERSION }}/" >> $GITHUB_STEP_SUMMARY + echo "- CUDA package: https://pypi.org/project/kt-kernel-cuda/${{ steps.get_version.outputs.VERSION }}/" >> $GITHUB_STEP_SUMMARY diff --git a/kt-kernel/README.md b/kt-kernel/README.md index 2a39d31..b6d3ac9 100644 --- a/kt-kernel/README.md +++ b/kt-kernel/README.md @@ -48,13 +48,7 @@ High-performance kernel operations for KTransformers, featuring CPU-optimized Mo Install the latest CPU-only version: ```bash -pip install "kt-kernel==0.5.0+cpu" -``` - -Or let pip auto-select the latest CPU version: - -```bash -pip install kt-kernel # Defaults to CPU version +pip install kt-kernel ``` > **Note**: Check the [latest version on PyPI](https://pypi.org/project/kt-kernel/#history) @@ -75,7 +69,7 @@ pip install kt-kernel # Defaults to CPU version For NVIDIA GPU-accelerated inference: ```bash -pip install "kt-kernel==0.5.0+cuda118" +pip install kt-kernel-cuda ``` **Features:** diff --git a/kt-kernel/setup.py b/kt-kernel/setup.py index e598c2d..4669070 100644 --- a/kt-kernel/setup.py +++ b/kt-kernel/setup.py @@ -698,31 +698,33 @@ if _version_file.exists(): else: _base_version = "0.5.0" -# Auto-detect version suffix based on build type +# Determine package name and version based on build type +# PyPI doesn't allow local version identifiers (+suffix), so we use separate package names if "CPUINFER_VERSION" in os.environ: # User explicitly set version (e.g., for testing) VERSION = os.environ["CPUINFER_VERSION"] print(f"-- Explicit version: {VERSION}") else: - # Auto-detect suffix based on CUDA usage - cuda_enabled = _env_get_bool("CPUINFER_USE_CUDA", False) + VERSION = _base_version - if cuda_enabled: - # CUDA build: add +cuda118 suffix - # (CUDA 11.8 is the build toolkit version for compatibility with 11.8+ and 12.x) - VERSION = f"{_base_version}+cuda118" - print(f"-- CUDA wheel version: {VERSION}") - else: - # CPU-only build: add +cpu suffix - VERSION = f"{_base_version}+cpu" - print(f"-- CPU wheel version: {VERSION}") +# Determine package name based on CUDA usage +cuda_enabled = _env_get_bool("CPUINFER_USE_CUDA", False) +if cuda_enabled: + # CUDA build: use kt-kernel-cuda package name + # Compatible with CUDA 11.8+ and 12.x drivers + PACKAGE_NAME = "kt-kernel-cuda" + print(f"-- CUDA wheel: {PACKAGE_NAME} version {VERSION}") +else: + # CPU-only build: use kt-kernel package name + PACKAGE_NAME = "kt-kernel" + print(f"-- CPU wheel: {PACKAGE_NAME} version {VERSION}") ################################################################################ # Setup ################################################################################ setup( - name="kt-kernel", + name=PACKAGE_NAME, version=VERSION, description="KT-Kernel: High-performance kernel operations for KTransformers (AMX/AVX/KML optimizations)", author="kvcache-ai",