mirror of
https://github.com/kvcache-ai/ktransformers.git
synced 2026-03-15 02:47:22 +00:00
* [feat]: simplify sglang installation with submodule, auto-sync CI, and version alignment
- Add kvcache-ai/sglang as git submodule at third_party/sglang (branch = main)
- Add top-level install.sh for one-click source installation (sglang + kt-kernel)
- Add sglang-kt as hard dependency in kt-kernel/pyproject.toml
- Add CI workflow to auto-sync sglang submodule daily and create PR
- Add CI workflow to build and publish sglang-kt to PyPI
- Integrate sglang-kt build into release-pypi.yml (version.py bump publishes both packages)
- Align sglang-kt version with ktransformers via SGLANG_KT_VERSION env var injection
- Update Dockerfile to use submodule and inject aligned version
- Update all 13 doc files, CLI hints, and i18n strings to reference new install methods
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
* [build]: bump version to 0.5.2
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
* [build]: rename PyPI package from kt-kernel to ktransformers
Users can now `pip install ktransformers` to get everything
(sglang-kt is auto-installed as a dependency).
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
* Revert "[build]: rename PyPI package from kt-kernel to ktransformers"
This reverts commit e0cbbf6364.
* [build]: add ktransformers meta-package for PyPI
`pip install ktransformers` now works as a single install command.
It pulls kt-kernel (which in turn pulls sglang-kt).
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
* [fix]: show sglang-kt package version in kt version command
- Prioritize sglang-kt package version (aligned with ktransformers)
over sglang internal __version__
- Update display name from "sglang" to "sglang-kt"
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
* [fix]: improve sglang-kt detection in kt doctor and kt version
Recognize sglang-kt package name as proof of kvcache-ai fork installation.
Previously both commands fell through to "PyPI (not recommended)" for
non-editable local source installs. Now version.py reuses the centralized
check_sglang_installation() logic.
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
* [build]: bump version to 0.5.2.post1
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---------
Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
273 lines
10 KiB
YAML
273 lines
10 KiB
YAML
name: Release to PyPI
|
|
|
|
on:
|
|
push:
|
|
branches:
|
|
- main
|
|
paths:
|
|
- "version.py"
|
|
workflow_dispatch:
|
|
inputs:
|
|
test_pypi:
|
|
description: 'Publish to TestPyPI instead of PyPI (for testing)'
|
|
required: false
|
|
default: 'false'
|
|
type: choice
|
|
options:
|
|
- 'true'
|
|
- 'false'
|
|
|
|
permissions:
|
|
contents: read
|
|
|
|
jobs:
|
|
# ── sglang-kt (must be on PyPI before users can pip install kt-kernel) ──
|
|
build-and-publish-sglang-kt:
|
|
name: Build & publish sglang-kt
|
|
runs-on: [self-hosted, linux, x64]
|
|
if: github.repository == 'kvcache-ai/ktransformers' && github.ref == 'refs/heads/main'
|
|
environment: prod
|
|
permissions:
|
|
id-token: write
|
|
contents: read
|
|
|
|
steps:
|
|
- name: Checkout repository
|
|
uses: actions/checkout@v4
|
|
with:
|
|
submodules: recursive
|
|
|
|
- name: Set up Python
|
|
uses: actions/setup-python@v4
|
|
with:
|
|
python-version: '3.12'
|
|
|
|
- name: Install build tools
|
|
run: |
|
|
python -m pip install --upgrade pip
|
|
pip install build wheel setuptools twine
|
|
|
|
- name: Build sglang-kt wheel
|
|
working-directory: third_party/sglang/python
|
|
run: |
|
|
KT_VERSION=$(python3 -c "exec(open('${{ github.workspace }}/version.py').read()); print(__version__)")
|
|
export SGLANG_KT_VERSION="$KT_VERSION"
|
|
echo "Building sglang-kt v${KT_VERSION} wheel..."
|
|
python -m build --wheel -v
|
|
ls dist/ | grep -q "sglang_kt" || (echo "ERROR: Wheel name does not contain sglang_kt" && exit 1)
|
|
|
|
- name: Publish sglang-kt to PyPI
|
|
if: github.event.inputs.test_pypi != 'true'
|
|
env:
|
|
TWINE_USERNAME: __token__
|
|
TWINE_PASSWORD: ${{ secrets.PYPI_API_TOKEN }}
|
|
run: |
|
|
python -m twine upload --skip-existing --verbose third_party/sglang/python/dist/*.whl
|
|
|
|
- name: Publish sglang-kt to TestPyPI (if requested)
|
|
if: github.event.inputs.test_pypi == 'true'
|
|
env:
|
|
TWINE_USERNAME: __token__
|
|
TWINE_PASSWORD: ${{ secrets.TEST_PYPI_API_TOKEN }}
|
|
run: |
|
|
python -m twine upload --repository testpypi --skip-existing --verbose third_party/sglang/python/dist/*.whl
|
|
|
|
# ── kt-kernel ──
|
|
build-kt-kernel:
|
|
name: Build kt-kernel (Python ${{ matrix.python-version }})
|
|
runs-on: [self-hosted, linux, x64, gpu]
|
|
strategy:
|
|
fail-fast: false
|
|
matrix:
|
|
python-version: ['3.11', '3.12']
|
|
|
|
steps:
|
|
- name: Checkout repository
|
|
uses: actions/checkout@v4
|
|
with:
|
|
submodules: recursive
|
|
|
|
- name: Set up Python ${{ matrix.python-version }}
|
|
uses: actions/setup-python@v4
|
|
with:
|
|
python-version: ${{ matrix.python-version }}
|
|
|
|
- name: Verify CUDA availability
|
|
run: |
|
|
nvidia-smi || (echo "ERROR: GPU not available" && exit 1)
|
|
nvcc --version || (echo "ERROR: CUDA toolkit not found" && exit 1)
|
|
|
|
- name: Install dependencies
|
|
run: |
|
|
apt-get update && apt-get install -y cmake libhwloc-dev pkg-config libnuma-dev
|
|
python -m pip install --upgrade pip
|
|
pip install build wheel setuptools torch --index-url https://download.pytorch.org/whl/cu118
|
|
|
|
- name: Build kt-kernel wheel
|
|
working-directory: kt-kernel
|
|
env:
|
|
CPUINFER_BUILD_ALL_VARIANTS: '1'
|
|
CPUINFER_USE_CUDA: '1'
|
|
CPUINFER_CUDA_ARCHS: '80;86;89;90'
|
|
CPUINFER_CUDA_STATIC_RUNTIME: '1'
|
|
CPUINFER_BUILD_TYPE: 'Release'
|
|
CPUINFER_PARALLEL: '4'
|
|
CPUINFER_FORCE_REBUILD: '1'
|
|
CUDA_HOME: '/usr/local/cuda-11.8'
|
|
run: |
|
|
echo "Building kt-kernel with:"
|
|
echo " - CUDA support (SM 80, 86, 89, 90)"
|
|
echo " - CPU multi-variant (AMX, AVX512, AVX2)"
|
|
python -m build --wheel -v
|
|
|
|
- name: Verify wheel
|
|
working-directory: kt-kernel
|
|
run: |
|
|
echo "Generated wheel:"
|
|
ls -lh dist/
|
|
|
|
# Install and test
|
|
pip install dist/*.whl
|
|
python -c "import kt_kernel; print(f'✓ Version: {kt_kernel.__version__}')"
|
|
python -c "import kt_kernel; print(f'✓ CPU variant: {kt_kernel.__cpu_variant__}')"
|
|
|
|
# Verify CUDA support
|
|
python -c "
|
|
from kt_kernel import kt_kernel_ext
|
|
cpu_infer = kt_kernel_ext.CPUInfer(4)
|
|
methods = dir(cpu_infer)
|
|
has_cuda = 'submit_with_cuda_stream' in methods
|
|
print(f'✓ CUDA support: {has_cuda}')
|
|
"
|
|
|
|
# Verify CPU multi-variant support
|
|
echo "Checking CPU variants in wheel..."
|
|
python -m zipfile -l dist/*.whl | grep "_kt_kernel_ext_" || echo "Warning: No variant .so files found"
|
|
python -m zipfile -l dist/*.whl | grep "_kt_kernel_ext_amx.cpython" && echo "✓ AMX variant found" || echo "Note: AMX variant missing"
|
|
python -m zipfile -l dist/*.whl | grep "_kt_kernel_ext_avx512" && echo "✓ AVX512 variants found" || echo "Note: AVX512 variants missing"
|
|
python -m zipfile -l dist/*.whl | grep "_kt_kernel_ext_avx2.cpython" && echo "✓ AVX2 variant found" || echo "Note: AVX2 variant missing"
|
|
|
|
# Verify static linking (should NOT depend on libcudart.so)
|
|
rm -rf /tmp/check
|
|
unzip -q dist/*.whl -d /tmp/check
|
|
if ldd /tmp/check/kt_kernel/*.so 2>/dev/null | grep -q "libcudart.so"; then
|
|
echo "ERROR: Dynamic cudart found, should be statically linked"
|
|
exit 1
|
|
else
|
|
echo "✓ CUDA runtime statically linked"
|
|
fi
|
|
|
|
- name: Repair wheel for manylinux
|
|
working-directory: kt-kernel
|
|
run: |
|
|
pip install auditwheel patchelf
|
|
mkdir -p wheelhouse
|
|
for wheel in dist/*.whl; do
|
|
auditwheel repair "$wheel" --plat manylinux_2_17_x86_64 --exclude libcuda.so.1 -w wheelhouse/ || \
|
|
cp "$wheel" wheelhouse/$(basename "$wheel" | sed 's/linux_x86_64/manylinux_2_17_x86_64/')
|
|
done
|
|
rm -f dist/*.whl && cp wheelhouse/*.whl dist/
|
|
|
|
- name: Upload artifact
|
|
uses: actions/upload-artifact@v4
|
|
with:
|
|
name: kt-kernel-wheels-py${{ matrix.python-version }}
|
|
path: kt-kernel/dist/*.whl
|
|
retention-days: 7
|
|
|
|
publish-pypi:
|
|
name: Publish kt-kernel to PyPI
|
|
needs: [build-and-publish-sglang-kt, build-kt-kernel]
|
|
runs-on: [self-hosted, linux, x64]
|
|
if: github.repository == 'kvcache-ai/ktransformers' && github.ref == 'refs/heads/main'
|
|
environment: prod
|
|
permissions:
|
|
id-token: write # For trusted publishing (OIDC)
|
|
contents: read
|
|
|
|
steps:
|
|
- name: Download all wheel artifacts
|
|
uses: actions/download-artifact@v4
|
|
with:
|
|
path: artifacts/
|
|
|
|
- name: Organize wheels into dist/
|
|
run: |
|
|
mkdir -p dist/
|
|
find artifacts/ -name "*.whl" -exec cp {} dist/ \;
|
|
echo "Wheels to publish:"
|
|
ls -lh dist/
|
|
|
|
- name: Get version from wheel
|
|
id: get_version
|
|
run: |
|
|
# Extract version from first wheel filename
|
|
wheel_name=$(ls dist/*.whl | head -1 | xargs basename)
|
|
# Extract version (format: kt_kernel-X.Y.Z-...)
|
|
version=$(echo "$wheel_name" | sed 's/kt_kernel-\([0-9.]*\)-.*/\1/')
|
|
echo "VERSION=$version" >> $GITHUB_OUTPUT
|
|
echo "Publishing version: $version"
|
|
|
|
- name: Install twine
|
|
run: |
|
|
python -m pip install --upgrade pip
|
|
pip install twine
|
|
|
|
- name: Publish to TestPyPI (if requested)
|
|
if: github.event.inputs.test_pypi == 'true'
|
|
env:
|
|
TWINE_USERNAME: __token__
|
|
TWINE_PASSWORD: ${{ secrets.TEST_PYPI_API_TOKEN }}
|
|
run: |
|
|
python -m twine upload \
|
|
--repository testpypi \
|
|
--skip-existing \
|
|
--verbose \
|
|
dist/*.whl
|
|
|
|
- name: Publish to PyPI
|
|
if: github.event.inputs.test_pypi != 'true'
|
|
env:
|
|
TWINE_USERNAME: __token__
|
|
TWINE_PASSWORD: ${{ secrets.PYPI_API_TOKEN }}
|
|
run: |
|
|
python -m twine upload \
|
|
--skip-existing \
|
|
--verbose \
|
|
dist/*.whl
|
|
|
|
- name: Create release summary
|
|
run: |
|
|
echo "## 🎉 kt-kernel v${{ steps.get_version.outputs.VERSION }} Published to PyPI" >> $GITHUB_STEP_SUMMARY
|
|
echo "" >> $GITHUB_STEP_SUMMARY
|
|
echo "### Installation" >> $GITHUB_STEP_SUMMARY
|
|
echo '```bash' >> $GITHUB_STEP_SUMMARY
|
|
echo "pip install kt-kernel==${{ steps.get_version.outputs.VERSION }}" >> $GITHUB_STEP_SUMMARY
|
|
echo '```' >> $GITHUB_STEP_SUMMARY
|
|
echo "" >> $GITHUB_STEP_SUMMARY
|
|
echo "### Published Wheels" >> $GITHUB_STEP_SUMMARY
|
|
echo "Total: $(ls -1 dist/*.whl | wc -l) wheels (Python 3.10, 3.11, 3.12)" >> $GITHUB_STEP_SUMMARY
|
|
echo "" >> $GITHUB_STEP_SUMMARY
|
|
echo "### Features" >> $GITHUB_STEP_SUMMARY
|
|
echo "" >> $GITHUB_STEP_SUMMARY
|
|
echo "**CPU Multi-Variant Support:**" >> $GITHUB_STEP_SUMMARY
|
|
echo "- ✅ AMX (Intel Sapphire Rapids+, 2023)" >> $GITHUB_STEP_SUMMARY
|
|
echo "- ✅ AVX512 Base/VNNI/VBMI/BF16 (Intel Skylake-X/Ice Lake/Cascade Lake, 2017+)" >> $GITHUB_STEP_SUMMARY
|
|
echo "- ✅ AVX2 (Maximum compatibility, 2013+)" >> $GITHUB_STEP_SUMMARY
|
|
echo "- 🔧 Runtime CPU detection: Automatically selects optimal variant" >> $GITHUB_STEP_SUMMARY
|
|
echo "" >> $GITHUB_STEP_SUMMARY
|
|
echo "**CUDA Support:**" >> $GITHUB_STEP_SUMMARY
|
|
echo "- ✅ SM 80 (Ampere: A100, RTX 3000 series)" >> $GITHUB_STEP_SUMMARY
|
|
echo "- ✅ SM 86 (Ampere: RTX 3060-3090)" >> $GITHUB_STEP_SUMMARY
|
|
echo "- ✅ SM 89 (Ada Lovelace: RTX 4000 series)" >> $GITHUB_STEP_SUMMARY
|
|
echo "- ✅ SM 90 (Hopper: H100)" >> $GITHUB_STEP_SUMMARY
|
|
echo "- 🔧 Static CUDA runtime: Compatible with CUDA 11.8+ and 12.x drivers" >> $GITHUB_STEP_SUMMARY
|
|
echo "- 🔧 Works on CPU-only systems (CUDA features disabled gracefully)" >> $GITHUB_STEP_SUMMARY
|
|
echo "" >> $GITHUB_STEP_SUMMARY
|
|
echo "**Requirements:**" >> $GITHUB_STEP_SUMMARY
|
|
echo "- Python 3.10, 3.11, or 3.12" >> $GITHUB_STEP_SUMMARY
|
|
echo "- Linux x86-64 (manylinux_2_17 compatible)" >> $GITHUB_STEP_SUMMARY
|
|
echo "- For CUDA features: NVIDIA driver with CUDA 11.8+ or 12.x support" >> $GITHUB_STEP_SUMMARY
|
|
echo "" >> $GITHUB_STEP_SUMMARY
|
|
echo "PyPI link: https://pypi.org/project/kt-kernel/${{ steps.get_version.outputs.VERSION }}/" >> $GITHUB_STEP_SUMMARY
|