mirror of
https://github.com/kvcache-ai/ktransformers.git
synced 2026-03-14 18:37:23 +00:00
update PyPI Install and readme (#1731)
This commit is contained in:
@@ -50,59 +50,67 @@ High-performance kernel operations for KTransformers, featuring CPU-optimized Mo
|
||||
|
||||
### Option 1: Install from PyPI (Recommended for Most Users)
|
||||
|
||||
Coming soon...
|
||||
|
||||
Choose the version matching your CUDA installation:
|
||||
Install the latest stable version:
|
||||
|
||||
```bash
|
||||
# For CUDA 11.8
|
||||
pip install kt-kernel==0.4.2.cu118
|
||||
|
||||
# For CUDA 12.1
|
||||
pip install kt-kernel==0.4.2.cu121
|
||||
|
||||
# For CUDA 12.4
|
||||
pip install kt-kernel==0.4.2.cu124
|
||||
|
||||
# For CUDA 12.6
|
||||
pip install kt-kernel==0.4.2.cu126
|
||||
pip install kt-kernel
|
||||
```
|
||||
|
||||
> **Note**: Replace `0.4.2` with the [latest version](https://pypi.org/project/kt-kernel/#history) if available.
|
||||
Or install a specific version:
|
||||
|
||||
```bash
|
||||
pip install kt-kernel==0.4.3
|
||||
```
|
||||
|
||||
> **Note**: Check the [latest version on PyPI](https://pypi.org/project/kt-kernel/#history)
|
||||
|
||||
**Features:**
|
||||
- ✅ **Automatic CPU detection**: Detects your CPU and loads the optimal kernel variant
|
||||
- ✅ **Multi-variant wheel**: Includes AMX, AVX512, and AVX2 variants in a single package
|
||||
- ✅ **No compilation needed**: Pre-built wheels for Python 3.10, 3.11, 3.12
|
||||
- ✅ **Multiple CUDA versions**: Choose the version matching your environment
|
||||
- ✅ **Universal compatibility**: Works on any x86-64 Linux system (2013+)
|
||||
|
||||
**Requirements:**
|
||||
- CUDA 11.8+ or 12.x runtime (must match the package version you install)
|
||||
- PyTorch 2.0+ (install separately, must match CUDA version)
|
||||
- Linux x86-64
|
||||
- Python 3.10, 3.11, or 3.12
|
||||
- Linux x86-64 (manylinux_2_17 compatible)
|
||||
- CPU with AVX2 support (Intel Haswell 2013+, AMD Zen+)
|
||||
|
||||
**CPU Variants Included:**
|
||||
| Variant | CPU Support | Use Case |
|
||||
|---------|-------------|----------|
|
||||
| **AMX** | Intel Sapphire Rapids+ | Best performance on latest Intel CPUs |
|
||||
| **AVX512** | Intel Skylake-X/Ice Lake/Cascade Lake | AVX512-capable CPUs without AMX |
|
||||
| **AVX2** | Intel Haswell+, AMD Zen+ | Maximum compatibility |
|
||||
|
||||
**Check which variant is loaded:**
|
||||
The wheel includes 3 optimized variants that are **automatically selected at runtime** based on your CPU:
|
||||
|
||||
| Variant | CPU Support | Performance | Auto-Selected When |
|
||||
|---------|-------------|-------------|-------------------|
|
||||
| **AMX** | Intel Sapphire Rapids+ (2023+) | ⚡⚡⚡ Best | AMX instructions detected |
|
||||
| **AVX512** | Intel Skylake-X/Ice Lake/Cascade Lake (2017+) | ⚡⚡ Great | AVX512 instructions detected |
|
||||
| **AVX2** | Intel Haswell+ (2013+), AMD Zen+ | ⚡ Good | Fallback for maximum compatibility |
|
||||
|
||||
**Verify installation:**
|
||||
```python
|
||||
import kt_kernel
|
||||
|
||||
# Check which CPU variant was loaded
|
||||
print(f"CPU variant: {kt_kernel.__cpu_variant__}") # 'amx', 'avx512', or 'avx2'
|
||||
print(f"Version: {kt_kernel.__version__}")
|
||||
|
||||
# Test import
|
||||
from kt_kernel import KTMoEWrapper
|
||||
print("✓ kt-kernel installed successfully!")
|
||||
```
|
||||
|
||||
**Environment Variables:**
|
||||
```bash
|
||||
# Override automatic CPU detection
|
||||
export KT_KERNEL_CPU_VARIANT=avx2 # or 'avx512', 'amx'
|
||||
# Override automatic CPU detection (for testing or debugging)
|
||||
export KT_KERNEL_CPU_VARIANT=avx2 # Force AVX2 variant (options: 'avx2', 'avx512', 'amx')
|
||||
|
||||
# Enable debug output
|
||||
# Enable debug output to see detection process
|
||||
export KT_KERNEL_DEBUG=1
|
||||
python -c "import kt_kernel"
|
||||
# Output:
|
||||
# [kt-kernel] Detected AMX support via /proc/cpuinfo
|
||||
# [kt-kernel] Selected CPU variant: amx
|
||||
# [kt-kernel] Loading amx from: /path/to/_kt_kernel_ext_amx.cpython-311-x86_64-linux-gnu.so
|
||||
# [kt-kernel] Successfully loaded AMX variant
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
@@ -49,15 +49,32 @@ kt_kernel_ext = _kt_kernel_ext
|
||||
# Import main API
|
||||
from .experts import KTMoEWrapper
|
||||
|
||||
# Read version from project root version.py
|
||||
import os
|
||||
_root_version_file = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(__file__))), 'version.py')
|
||||
if os.path.exists(_root_version_file):
|
||||
_version_ns = {}
|
||||
with open(_root_version_file, 'r', encoding='utf-8') as f:
|
||||
exec(f.read(), _version_ns)
|
||||
__version__ = _version_ns.get('__version__', '0.4.2')
|
||||
else:
|
||||
__version__ = "0.4.2"
|
||||
# Read version from package metadata (preferred) or fallback to project root
|
||||
try:
|
||||
# Try to get version from installed package metadata (works in installed environment)
|
||||
from importlib.metadata import version, PackageNotFoundError
|
||||
try:
|
||||
__version__ = version('kt-kernel')
|
||||
except PackageNotFoundError:
|
||||
# Package not installed, try to read from source tree version.py
|
||||
import os
|
||||
_root_version_file = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(__file__))), 'version.py')
|
||||
if os.path.exists(_root_version_file):
|
||||
_version_ns = {}
|
||||
with open(_root_version_file, 'r', encoding='utf-8') as f:
|
||||
exec(f.read(), _version_ns)
|
||||
__version__ = _version_ns.get('__version__', '0.4.3')
|
||||
else:
|
||||
__version__ = "0.4.3"
|
||||
except ImportError:
|
||||
# Python < 3.8, fallback to pkg_resources or hardcoded version
|
||||
try:
|
||||
from pkg_resources import get_distribution, DistributionNotFound
|
||||
try:
|
||||
__version__ = get_distribution('kt-kernel').version
|
||||
except DistributionNotFound:
|
||||
__version__ = "0.4.3"
|
||||
except ImportError:
|
||||
__version__ = "0.4.3"
|
||||
|
||||
__all__ = ["KTMoEWrapper", "kt_kernel_ext", "__cpu_variant__", "__version__"]
|
||||
|
||||
@@ -239,6 +239,151 @@ class CMakeBuild(build_ext):
|
||||
return info
|
||||
|
||||
def build_extension(self, ext: CMakeExtension):
|
||||
"""
|
||||
Main entry point for building the extension.
|
||||
|
||||
Checks if multi-variant build is requested (CPUINFER_BUILD_ALL_VARIANTS=1)
|
||||
and routes to the appropriate build method.
|
||||
"""
|
||||
if _env_get_bool("CPUINFER_BUILD_ALL_VARIANTS", False):
|
||||
# Build all 3 variants (AMX, AVX512, AVX2)
|
||||
self.build_multi_variants(ext)
|
||||
else:
|
||||
# Build single variant (original behavior)
|
||||
self._build_single_variant(ext)
|
||||
|
||||
def build_multi_variants(self, ext: CMakeExtension):
|
||||
"""
|
||||
Build all 3 CPU variants (AMX, AVX512, AVX2) in a single wheel.
|
||||
|
||||
This creates 3 separate .so files:
|
||||
- _kt_kernel_ext_amx.cpython-311-x86_64-linux-gnu.so
|
||||
- _kt_kernel_ext_avx512.cpython-311-x86_64-linux-gnu.so
|
||||
- _kt_kernel_ext_avx2.cpython-311-x86_64-linux-gnu.so
|
||||
|
||||
Runtime CPU detection (in _cpu_detect.py) will automatically load the best one.
|
||||
"""
|
||||
print("=" * 70)
|
||||
print("Building kt-kernel with ALL CPU variants (AMX, AVX512, AVX2)")
|
||||
print("=" * 70)
|
||||
print()
|
||||
print("This will build three variants in a single wheel:")
|
||||
print(" - AMX variant (Intel Sapphire Rapids+)")
|
||||
print(" - AVX512 variant (Intel Skylake-X/Ice Lake+, AMD Zen 4+)")
|
||||
print(" - AVX2 variant (maximum compatibility, 2013+)")
|
||||
print()
|
||||
print("Runtime CPU detection will automatically select the best variant.")
|
||||
print()
|
||||
|
||||
extdir = Path(self.get_ext_fullpath(ext.name)).parent.resolve()
|
||||
cfg = default_build_type()
|
||||
|
||||
# Save original env vars
|
||||
orig_cpu_instruct = os.environ.get("CPUINFER_CPU_INSTRUCT")
|
||||
orig_enable_amx = os.environ.get("CPUINFER_ENABLE_AMX")
|
||||
orig_enable_avx512 = os.environ.get("CPUINFER_ENABLE_AVX512")
|
||||
|
||||
# Variant configurations: (name, CPUINFER_CPU_INSTRUCT, CPUINFER_ENABLE_AMX)
|
||||
variants = [
|
||||
("amx", "AVX512", "ON"), # AVX512 + AMX
|
||||
("avx512", "AVX512", "OFF"), # AVX512 only
|
||||
("avx2", "AVX2", "OFF"), # AVX2 only
|
||||
]
|
||||
|
||||
for variant_name, cpu_instruct, enable_amx in variants:
|
||||
print("=" * 70)
|
||||
print(f"Building {variant_name.upper()} variant...")
|
||||
print("=" * 70)
|
||||
print()
|
||||
|
||||
# Set environment variables for this variant
|
||||
os.environ["CPUINFER_CPU_INSTRUCT"] = cpu_instruct
|
||||
os.environ["CPUINFER_ENABLE_AMX"] = enable_amx
|
||||
if variant_name == "avx2":
|
||||
# For AVX2 variant, disable AVX512 umbrella to prevent AVX512 code
|
||||
os.environ["CPUINFER_ENABLE_AVX512"] = "OFF"
|
||||
else:
|
||||
# For AMX and AVX512 variants, enable AVX512 umbrella
|
||||
os.environ["CPUINFER_ENABLE_AVX512"] = "ON"
|
||||
|
||||
# Use separate build directory for each variant
|
||||
build_temp = Path(self.build_temp) / f"{ext.name}_{cfg}_{variant_name}"
|
||||
build_temp.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Build this variant
|
||||
self._build_single_variant_impl(ext, extdir, build_temp, cfg)
|
||||
|
||||
# Rename the built .so file to include variant suffix
|
||||
# Original name: kt_kernel_ext.cpython-311-x86_64-linux-gnu.so
|
||||
# New name: _kt_kernel_ext_amx.cpython-311-x86_64-linux-gnu.so
|
||||
built_so_files = list(extdir.glob(f"{ext.name.split('.')[-1]}.*.so"))
|
||||
if built_so_files:
|
||||
original_so = built_so_files[0]
|
||||
# Extract the suffix after the module name
|
||||
# e.g., "kt_kernel_ext.cpython-311-x86_64-linux-gnu.so" -> ".cpython-311-x86_64-linux-gnu.so"
|
||||
suffix = original_so.name.replace(ext.name.split(".")[-1], "")
|
||||
new_name = f"_kt_kernel_ext_{variant_name}{suffix}"
|
||||
new_path = extdir / new_name
|
||||
|
||||
# Remove existing file if present
|
||||
if new_path.exists():
|
||||
new_path.unlink()
|
||||
|
||||
# Rename
|
||||
original_so.rename(new_path)
|
||||
print(f"✓ Built and renamed to: {new_name}")
|
||||
print()
|
||||
else:
|
||||
print(f"⚠ Warning: Could not find built .so file for {variant_name} variant")
|
||||
print()
|
||||
|
||||
# Restore original env vars
|
||||
if orig_cpu_instruct is not None:
|
||||
os.environ["CPUINFER_CPU_INSTRUCT"] = orig_cpu_instruct
|
||||
elif "CPUINFER_CPU_INSTRUCT" in os.environ:
|
||||
del os.environ["CPUINFER_CPU_INSTRUCT"]
|
||||
|
||||
if orig_enable_amx is not None:
|
||||
os.environ["CPUINFER_ENABLE_AMX"] = orig_enable_amx
|
||||
elif "CPUINFER_ENABLE_AMX" in os.environ:
|
||||
del os.environ["CPUINFER_ENABLE_AMX"]
|
||||
|
||||
if orig_enable_avx512 is not None:
|
||||
os.environ["CPUINFER_ENABLE_AVX512"] = orig_enable_avx512
|
||||
elif "CPUINFER_ENABLE_AVX512" in os.environ:
|
||||
del os.environ["CPUINFER_ENABLE_AVX512"]
|
||||
|
||||
print("=" * 70)
|
||||
print("✓ All variants built successfully!")
|
||||
print("=" * 70)
|
||||
print()
|
||||
print("The wheel now contains 3 CPU variants:")
|
||||
for so_file in sorted(extdir.glob("_kt_kernel_ext_*.so")):
|
||||
print(f" - {so_file.name}")
|
||||
print()
|
||||
|
||||
def _build_single_variant(self, ext: CMakeExtension):
|
||||
"""Original single-variant build logic - wrapper for backward compatibility."""
|
||||
extdir = Path(self.get_ext_fullpath(ext.name)).parent.resolve()
|
||||
cfg = default_build_type()
|
||||
build_temp = Path(self.build_temp) / f"{ext.name}_{cfg}"
|
||||
build_temp.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
self._build_single_variant_impl(ext, extdir, build_temp, cfg)
|
||||
|
||||
def _build_single_variant_impl(self, ext: CMakeExtension, extdir: Path, build_temp: Path, cfg: str):
|
||||
"""
|
||||
Core build logic for a single variant.
|
||||
|
||||
This method contains the actual CMake configuration and build steps.
|
||||
It's called by both _build_single_variant() and build_multi_variants().
|
||||
|
||||
Args:
|
||||
ext: The CMakeExtension to build
|
||||
extdir: Directory where the .so file should be placed
|
||||
build_temp: Temporary build directory for CMake
|
||||
cfg: Build type (Release/Debug/etc.)
|
||||
"""
|
||||
# Auto-detect CUDA toolkit if user did not explicitly set CPUINFER_USE_CUDA
|
||||
def detect_cuda_toolkit() -> bool:
|
||||
# Respect CUDA_HOME
|
||||
@@ -287,11 +432,6 @@ class CMakeBuild(build_ext):
|
||||
os.environ["CPUINFER_USE_CUDA"] = "1" if auto_cuda else "0"
|
||||
print(f"-- CPUINFER_USE_CUDA not set; auto-detected CUDA toolkit: {'YES' if auto_cuda else 'NO'}")
|
||||
|
||||
extdir = Path(self.get_ext_fullpath(ext.name)).parent.resolve()
|
||||
cfg = default_build_type()
|
||||
build_temp = Path(self.build_temp) / f"{ext.name}_{cfg}"
|
||||
build_temp.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Base CMake args
|
||||
cmake_args = [
|
||||
f"-DCMAKE_LIBRARY_OUTPUT_DIRECTORY={extdir}/",
|
||||
|
||||
Reference in New Issue
Block a user