mirror of
https://github.com/kvcache-ai/ktransformers.git
synced 2026-04-29 02:31:20 +00:00
fix cuda wheel build (#1766)
This commit is contained in:
31
.github/workflows/release-pypi.yml
vendored
31
.github/workflows/release-pypi.yml
vendored
@@ -282,36 +282,37 @@ jobs:
|
|||||||
run: |
|
run: |
|
||||||
echo "## 🎉 kt-kernel v${{ steps.get_version.outputs.VERSION }} Published to PyPI" >> $GITHUB_STEP_SUMMARY
|
echo "## 🎉 kt-kernel v${{ steps.get_version.outputs.VERSION }} Published to PyPI" >> $GITHUB_STEP_SUMMARY
|
||||||
echo "" >> $GITHUB_STEP_SUMMARY
|
echo "" >> $GITHUB_STEP_SUMMARY
|
||||||
|
echo "### Published Packages" >> $GITHUB_STEP_SUMMARY
|
||||||
|
echo "- **kt-kernel** (CPU-only)" >> $GITHUB_STEP_SUMMARY
|
||||||
|
echo "- **kt-kernel-cuda** (CUDA support)" >> $GITHUB_STEP_SUMMARY
|
||||||
|
echo "" >> $GITHUB_STEP_SUMMARY
|
||||||
|
echo "Total wheels: $(ls -1 dist/*.whl | wc -l) (3 Python versions: 3.10, 3.11, 3.12)" >> $GITHUB_STEP_SUMMARY
|
||||||
|
echo "" >> $GITHUB_STEP_SUMMARY
|
||||||
echo "### Installation" >> $GITHUB_STEP_SUMMARY
|
echo "### Installation" >> $GITHUB_STEP_SUMMARY
|
||||||
echo '```bash' >> $GITHUB_STEP_SUMMARY
|
echo '```bash' >> $GITHUB_STEP_SUMMARY
|
||||||
|
echo "# CPU version (AMX/AVX512/AVX2 multi-variant)" >> $GITHUB_STEP_SUMMARY
|
||||||
echo "pip install kt-kernel==${{ steps.get_version.outputs.VERSION }}" >> $GITHUB_STEP_SUMMARY
|
echo "pip install kt-kernel==${{ steps.get_version.outputs.VERSION }}" >> $GITHUB_STEP_SUMMARY
|
||||||
echo '```' >> $GITHUB_STEP_SUMMARY
|
|
||||||
echo "" >> $GITHUB_STEP_SUMMARY
|
echo "" >> $GITHUB_STEP_SUMMARY
|
||||||
echo "### Published Wheels" >> $GITHUB_STEP_SUMMARY
|
echo "# CUDA version (requires NVIDIA driver with CUDA 11.8+ or 12.x support)" >> $GITHUB_STEP_SUMMARY
|
||||||
echo "Total: $(ls -1 dist/*.whl | wc -l) wheels (3 Python versions: 3.10, 3.11, 3.12)" >> $GITHUB_STEP_SUMMARY
|
echo "pip install kt-kernel-cuda==${{ steps.get_version.outputs.VERSION }}" >> $GITHUB_STEP_SUMMARY
|
||||||
|
echo '```' >> $GITHUB_STEP_SUMMARY
|
||||||
echo "" >> $GITHUB_STEP_SUMMARY
|
echo "" >> $GITHUB_STEP_SUMMARY
|
||||||
echo "### Features" >> $GITHUB_STEP_SUMMARY
|
echo "### Features" >> $GITHUB_STEP_SUMMARY
|
||||||
echo "" >> $GITHUB_STEP_SUMMARY
|
echo "" >> $GITHUB_STEP_SUMMARY
|
||||||
echo "**CPU wheels with multi-variant support:**" >> $GITHUB_STEP_SUMMARY
|
echo "**kt-kernel (CPU) - Multi-variant support:**" >> $GITHUB_STEP_SUMMARY
|
||||||
echo "- ✅ AMX (Intel Sapphire Rapids+)" >> $GITHUB_STEP_SUMMARY
|
echo "- ✅ AMX (Intel Sapphire Rapids+)" >> $GITHUB_STEP_SUMMARY
|
||||||
echo "- ✅ AVX512 (Intel Skylake-X/Ice Lake/Cascade Lake)" >> $GITHUB_STEP_SUMMARY
|
echo "- ✅ AVX512 (Intel Skylake-X/Ice Lake/Cascade Lake)" >> $GITHUB_STEP_SUMMARY
|
||||||
echo "- ✅ AVX2 (Maximum compatibility)" >> $GITHUB_STEP_SUMMARY
|
echo "- ✅ AVX2 (Maximum compatibility)" >> $GITHUB_STEP_SUMMARY
|
||||||
echo "- 🔧 Runtime CPU detection: Automatically selects optimal variant" >> $GITHUB_STEP_SUMMARY
|
echo "- 🔧 Runtime CPU detection: Automatically selects optimal variant" >> $GITHUB_STEP_SUMMARY
|
||||||
echo "" >> $GITHUB_STEP_SUMMARY
|
echo "" >> $GITHUB_STEP_SUMMARY
|
||||||
echo "**CUDA wheels with multi-architecture support:**" >> $GITHUB_STEP_SUMMARY
|
echo "**kt-kernel-cuda (CUDA) - Multi-architecture support:**" >> $GITHUB_STEP_SUMMARY
|
||||||
echo "- ✅ SM 80 (Ampere: A100, RTX 3000 series)" >> $GITHUB_STEP_SUMMARY
|
echo "- ✅ SM 80 (Ampere: A100, RTX 3000 series)" >> $GITHUB_STEP_SUMMARY
|
||||||
echo "- ✅ SM 86 (Ampere: RTX 3060-3090)" >> $GITHUB_STEP_SUMMARY
|
echo "- ✅ SM 86 (Ampere: RTX 3060-3090)" >> $GITHUB_STEP_SUMMARY
|
||||||
echo "- ✅ SM 89 (Ada Lovelace: RTX 4000 series)" >> $GITHUB_STEP_SUMMARY
|
echo "- ✅ SM 89 (Ada Lovelace: RTX 4000 series)" >> $GITHUB_STEP_SUMMARY
|
||||||
echo "- ✅ SM 90 (Hopper: H100)" >> $GITHUB_STEP_SUMMARY
|
echo "- ✅ SM 90 (Hopper: H100)" >> $GITHUB_STEP_SUMMARY
|
||||||
echo "- 🔧 Static CUDA runtime: Compatible with CUDA 11.8+ and 12.x drivers" >> $GITHUB_STEP_SUMMARY
|
echo "- 🔧 Static CUDA runtime: Compatible with CUDA 11.8+ and 12.x drivers" >> $GITHUB_STEP_SUMMARY
|
||||||
|
echo "- 🔧 Includes multi-variant CPU code (AMX/AVX512/AVX2)" >> $GITHUB_STEP_SUMMARY
|
||||||
echo "" >> $GITHUB_STEP_SUMMARY
|
echo "" >> $GITHUB_STEP_SUMMARY
|
||||||
echo "**Installation:**" >> $GITHUB_STEP_SUMMARY
|
echo "### Links" >> $GITHUB_STEP_SUMMARY
|
||||||
echo '```bash' >> $GITHUB_STEP_SUMMARY
|
echo "- CPU package: https://pypi.org/project/kt-kernel/${{ steps.get_version.outputs.VERSION }}/" >> $GITHUB_STEP_SUMMARY
|
||||||
echo "# CPU version" >> $GITHUB_STEP_SUMMARY
|
echo "- CUDA package: https://pypi.org/project/kt-kernel-cuda/${{ steps.get_version.outputs.VERSION }}/" >> $GITHUB_STEP_SUMMARY
|
||||||
echo "pip install kt-kernel==${{ steps.get_version.outputs.VERSION }}+cpu" >> $GITHUB_STEP_SUMMARY
|
|
||||||
echo "" >> $GITHUB_STEP_SUMMARY
|
|
||||||
echo "# CUDA version (requires NVIDIA driver with CUDA 11.8+ or 12.x support)" >> $GITHUB_STEP_SUMMARY
|
|
||||||
echo "pip install kt-kernel==${{ steps.get_version.outputs.VERSION }}+cuda118" >> $GITHUB_STEP_SUMMARY
|
|
||||||
echo '```' >> $GITHUB_STEP_SUMMARY
|
|
||||||
echo "" >> $GITHUB_STEP_SUMMARY
|
|
||||||
echo "PyPI link: https://pypi.org/project/kt-kernel/#history" >> $GITHUB_STEP_SUMMARY
|
|
||||||
|
|||||||
@@ -48,13 +48,7 @@ High-performance kernel operations for KTransformers, featuring CPU-optimized Mo
|
|||||||
Install the latest CPU-only version:
|
Install the latest CPU-only version:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
pip install "kt-kernel==0.5.0+cpu"
|
pip install kt-kernel
|
||||||
```
|
|
||||||
|
|
||||||
Or let pip auto-select the latest CPU version:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
pip install kt-kernel # Defaults to CPU version
|
|
||||||
```
|
```
|
||||||
|
|
||||||
> **Note**: Check the [latest version on PyPI](https://pypi.org/project/kt-kernel/#history)
|
> **Note**: Check the [latest version on PyPI](https://pypi.org/project/kt-kernel/#history)
|
||||||
@@ -75,7 +69,7 @@ pip install kt-kernel # Defaults to CPU version
|
|||||||
For NVIDIA GPU-accelerated inference:
|
For NVIDIA GPU-accelerated inference:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
pip install "kt-kernel==0.5.0+cuda118"
|
pip install kt-kernel-cuda
|
||||||
```
|
```
|
||||||
|
|
||||||
**Features:**
|
**Features:**
|
||||||
|
|||||||
@@ -698,31 +698,33 @@ if _version_file.exists():
|
|||||||
else:
|
else:
|
||||||
_base_version = "0.5.0"
|
_base_version = "0.5.0"
|
||||||
|
|
||||||
# Auto-detect version suffix based on build type
|
# Determine package name and version based on build type
|
||||||
|
# PyPI doesn't allow local version identifiers (+suffix), so we use separate package names
|
||||||
if "CPUINFER_VERSION" in os.environ:
|
if "CPUINFER_VERSION" in os.environ:
|
||||||
# User explicitly set version (e.g., for testing)
|
# User explicitly set version (e.g., for testing)
|
||||||
VERSION = os.environ["CPUINFER_VERSION"]
|
VERSION = os.environ["CPUINFER_VERSION"]
|
||||||
print(f"-- Explicit version: {VERSION}")
|
print(f"-- Explicit version: {VERSION}")
|
||||||
else:
|
else:
|
||||||
# Auto-detect suffix based on CUDA usage
|
VERSION = _base_version
|
||||||
cuda_enabled = _env_get_bool("CPUINFER_USE_CUDA", False)
|
|
||||||
|
|
||||||
if cuda_enabled:
|
# Determine package name based on CUDA usage
|
||||||
# CUDA build: add +cuda118 suffix
|
cuda_enabled = _env_get_bool("CPUINFER_USE_CUDA", False)
|
||||||
# (CUDA 11.8 is the build toolkit version for compatibility with 11.8+ and 12.x)
|
if cuda_enabled:
|
||||||
VERSION = f"{_base_version}+cuda118"
|
# CUDA build: use kt-kernel-cuda package name
|
||||||
print(f"-- CUDA wheel version: {VERSION}")
|
# Compatible with CUDA 11.8+ and 12.x drivers
|
||||||
else:
|
PACKAGE_NAME = "kt-kernel-cuda"
|
||||||
# CPU-only build: add +cpu suffix
|
print(f"-- CUDA wheel: {PACKAGE_NAME} version {VERSION}")
|
||||||
VERSION = f"{_base_version}+cpu"
|
else:
|
||||||
print(f"-- CPU wheel version: {VERSION}")
|
# CPU-only build: use kt-kernel package name
|
||||||
|
PACKAGE_NAME = "kt-kernel"
|
||||||
|
print(f"-- CPU wheel: {PACKAGE_NAME} version {VERSION}")
|
||||||
|
|
||||||
################################################################################
|
################################################################################
|
||||||
# Setup
|
# Setup
|
||||||
################################################################################
|
################################################################################
|
||||||
|
|
||||||
setup(
|
setup(
|
||||||
name="kt-kernel",
|
name=PACKAGE_NAME,
|
||||||
version=VERSION,
|
version=VERSION,
|
||||||
description="KT-Kernel: High-performance kernel operations for KTransformers (AMX/AVX/KML optimizations)",
|
description="KT-Kernel: High-performance kernel operations for KTransformers (AMX/AVX/KML optimizations)",
|
||||||
author="kvcache-ai",
|
author="kvcache-ai",
|
||||||
|
|||||||
Reference in New Issue
Block a user