name: Release to PyPI on: push: branches: - main paths: - "version.py" workflow_dispatch: inputs: test_pypi: description: 'Publish to TestPyPI instead of PyPI (for testing)' required: false default: 'false' type: choice options: - 'true' - 'false' permissions: contents: read jobs: # ── sglang-kt (must be on PyPI before users can pip install kt-kernel) ── build-and-publish-sglang-kt: name: Build & publish sglang-kt runs-on: [self-hosted, linux, x64] if: github.repository == 'kvcache-ai/ktransformers' && github.ref == 'refs/heads/main' environment: prod permissions: id-token: write contents: read steps: - name: Checkout repository uses: actions/checkout@v4 with: submodules: recursive - name: Set up Python uses: actions/setup-python@v4 with: python-version: '3.12' - name: Install build tools run: | python -m pip install --upgrade pip pip install build wheel setuptools twine - name: Build sglang-kt wheel working-directory: third_party/sglang/python run: | KT_VERSION=$(python3 -c "exec(open('${{ github.workspace }}/version.py').read()); print(__version__)") export SGLANG_KT_VERSION="$KT_VERSION" echo "Building sglang-kt v${KT_VERSION} wheel..." python -m build --wheel -v ls dist/ | grep -q "sglang_kt" || (echo "ERROR: Wheel name does not contain sglang_kt" && exit 1) - name: Publish sglang-kt to PyPI if: github.event.inputs.test_pypi != 'true' env: TWINE_USERNAME: __token__ TWINE_PASSWORD: ${{ secrets.PYPI_API_TOKEN }} run: | python -m twine upload --skip-existing --verbose third_party/sglang/python/dist/*.whl - name: Publish sglang-kt to TestPyPI (if requested) if: github.event.inputs.test_pypi == 'true' env: TWINE_USERNAME: __token__ TWINE_PASSWORD: ${{ secrets.TEST_PYPI_API_TOKEN }} run: | python -m twine upload --repository testpypi --skip-existing --verbose third_party/sglang/python/dist/*.whl # ── kt-kernel ── build-kt-kernel: name: Build kt-kernel (Python ${{ matrix.python-version }}) runs-on: [self-hosted, linux, x64, gpu] strategy: fail-fast: false matrix: python-version: ['3.11', '3.12'] steps: - name: Checkout repository uses: actions/checkout@v4 with: submodules: recursive - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v4 with: python-version: ${{ matrix.python-version }} - name: Verify CUDA availability run: | nvidia-smi || (echo "ERROR: GPU not available" && exit 1) nvcc --version || (echo "ERROR: CUDA toolkit not found" && exit 1) - name: Install dependencies run: | apt-get update && apt-get install -y cmake libhwloc-dev pkg-config libnuma-dev python -m pip install --upgrade pip pip install build wheel setuptools torch --index-url https://download.pytorch.org/whl/cu118 - name: Build kt-kernel wheel working-directory: kt-kernel env: CPUINFER_BUILD_ALL_VARIANTS: '1' CPUINFER_USE_CUDA: '1' CPUINFER_CUDA_ARCHS: '80;86;89;90' CPUINFER_CUDA_STATIC_RUNTIME: '1' CPUINFER_BUILD_TYPE: 'Release' CPUINFER_PARALLEL: '4' CPUINFER_FORCE_REBUILD: '1' CUDA_HOME: '/usr/local/cuda-11.8' run: | echo "Building kt-kernel with:" echo " - CUDA support (SM 80, 86, 89, 90)" echo " - CPU multi-variant (AMX, AVX512, AVX2)" python -m build --wheel -v - name: Verify wheel working-directory: kt-kernel run: | echo "Generated wheel:" ls -lh dist/ # Install and test pip install dist/*.whl python -c "import kt_kernel; print(f'✓ Version: {kt_kernel.__version__}')" python -c "import kt_kernel; print(f'✓ CPU variant: {kt_kernel.__cpu_variant__}')" # Verify CUDA support python -c " from kt_kernel import kt_kernel_ext cpu_infer = kt_kernel_ext.CPUInfer(4) methods = dir(cpu_infer) has_cuda = 'submit_with_cuda_stream' in methods print(f'✓ CUDA support: {has_cuda}') " # Verify CPU multi-variant support echo "Checking CPU variants in wheel..." python -m zipfile -l dist/*.whl | grep "_kt_kernel_ext_" || echo "Warning: No variant .so files found" python -m zipfile -l dist/*.whl | grep "_kt_kernel_ext_amx.cpython" && echo "✓ AMX variant found" || echo "Note: AMX variant missing" python -m zipfile -l dist/*.whl | grep "_kt_kernel_ext_avx512" && echo "✓ AVX512 variants found" || echo "Note: AVX512 variants missing" python -m zipfile -l dist/*.whl | grep "_kt_kernel_ext_avx2.cpython" && echo "✓ AVX2 variant found" || echo "Note: AVX2 variant missing" # Verify static linking (should NOT depend on libcudart.so) rm -rf /tmp/check unzip -q dist/*.whl -d /tmp/check if ldd /tmp/check/kt_kernel/*.so 2>/dev/null | grep -q "libcudart.so"; then echo "ERROR: Dynamic cudart found, should be statically linked" exit 1 else echo "✓ CUDA runtime statically linked" fi - name: Repair wheel for manylinux working-directory: kt-kernel run: | pip install auditwheel patchelf mkdir -p wheelhouse for wheel in dist/*.whl; do auditwheel repair "$wheel" --plat manylinux_2_17_x86_64 --exclude libcuda.so.1 -w wheelhouse/ || \ cp "$wheel" wheelhouse/$(basename "$wheel" | sed 's/linux_x86_64/manylinux_2_17_x86_64/') done rm -f dist/*.whl && cp wheelhouse/*.whl dist/ - name: Upload artifact uses: actions/upload-artifact@v4 with: name: kt-kernel-wheels-py${{ matrix.python-version }} path: kt-kernel/dist/*.whl retention-days: 7 publish-pypi: name: Publish kt-kernel to PyPI needs: [build-and-publish-sglang-kt, build-kt-kernel] runs-on: [self-hosted, linux, x64] if: github.repository == 'kvcache-ai/ktransformers' && github.ref == 'refs/heads/main' environment: prod permissions: id-token: write # For trusted publishing (OIDC) contents: read steps: - name: Download all wheel artifacts uses: actions/download-artifact@v4 with: path: artifacts/ - name: Organize wheels into dist/ run: | mkdir -p dist/ find artifacts/ -name "*.whl" -exec cp {} dist/ \; echo "Wheels to publish:" ls -lh dist/ - name: Get version from wheel id: get_version run: | # Extract version from first wheel filename wheel_name=$(ls dist/*.whl | head -1 | xargs basename) # Extract version (format: kt_kernel-X.Y.Z-...) version=$(echo "$wheel_name" | sed 's/kt_kernel-\([0-9.]*\)-.*/\1/') echo "VERSION=$version" >> $GITHUB_OUTPUT echo "Publishing version: $version" - name: Install twine run: | python -m pip install --upgrade pip pip install twine - name: Publish to TestPyPI (if requested) if: github.event.inputs.test_pypi == 'true' env: TWINE_USERNAME: __token__ TWINE_PASSWORD: ${{ secrets.TEST_PYPI_API_TOKEN }} run: | python -m twine upload \ --repository testpypi \ --skip-existing \ --verbose \ dist/*.whl - name: Publish to PyPI if: github.event.inputs.test_pypi != 'true' env: TWINE_USERNAME: __token__ TWINE_PASSWORD: ${{ secrets.PYPI_API_TOKEN }} run: | python -m twine upload \ --skip-existing \ --verbose \ dist/*.whl - name: Create release summary run: | echo "## 🎉 kt-kernel v${{ steps.get_version.outputs.VERSION }} Published to PyPI" >> $GITHUB_STEP_SUMMARY echo "" >> $GITHUB_STEP_SUMMARY echo "### Installation" >> $GITHUB_STEP_SUMMARY echo '```bash' >> $GITHUB_STEP_SUMMARY echo "pip install kt-kernel==${{ steps.get_version.outputs.VERSION }}" >> $GITHUB_STEP_SUMMARY echo '```' >> $GITHUB_STEP_SUMMARY echo "" >> $GITHUB_STEP_SUMMARY echo "### Published Wheels" >> $GITHUB_STEP_SUMMARY echo "Total: $(ls -1 dist/*.whl | wc -l) wheels (Python 3.10, 3.11, 3.12)" >> $GITHUB_STEP_SUMMARY echo "" >> $GITHUB_STEP_SUMMARY echo "### Features" >> $GITHUB_STEP_SUMMARY echo "" >> $GITHUB_STEP_SUMMARY echo "**CPU Multi-Variant Support:**" >> $GITHUB_STEP_SUMMARY echo "- ✅ AMX (Intel Sapphire Rapids+, 2023)" >> $GITHUB_STEP_SUMMARY echo "- ✅ AVX512 Base/VNNI/VBMI/BF16 (Intel Skylake-X/Ice Lake/Cascade Lake, 2017+)" >> $GITHUB_STEP_SUMMARY echo "- ✅ AVX2 (Maximum compatibility, 2013+)" >> $GITHUB_STEP_SUMMARY echo "- 🔧 Runtime CPU detection: Automatically selects optimal variant" >> $GITHUB_STEP_SUMMARY echo "" >> $GITHUB_STEP_SUMMARY echo "**CUDA Support:**" >> $GITHUB_STEP_SUMMARY echo "- ✅ SM 80 (Ampere: A100, RTX 3000 series)" >> $GITHUB_STEP_SUMMARY echo "- ✅ SM 86 (Ampere: RTX 3060-3090)" >> $GITHUB_STEP_SUMMARY echo "- ✅ SM 89 (Ada Lovelace: RTX 4000 series)" >> $GITHUB_STEP_SUMMARY echo "- ✅ SM 90 (Hopper: H100)" >> $GITHUB_STEP_SUMMARY echo "- 🔧 Static CUDA runtime: Compatible with CUDA 11.8+ and 12.x drivers" >> $GITHUB_STEP_SUMMARY echo "- 🔧 Works on CPU-only systems (CUDA features disabled gracefully)" >> $GITHUB_STEP_SUMMARY echo "" >> $GITHUB_STEP_SUMMARY echo "**Requirements:**" >> $GITHUB_STEP_SUMMARY echo "- Python 3.10, 3.11, or 3.12" >> $GITHUB_STEP_SUMMARY echo "- Linux x86-64 (manylinux_2_17 compatible)" >> $GITHUB_STEP_SUMMARY echo "- For CUDA features: NVIDIA driver with CUDA 11.8+ or 12.x support" >> $GITHUB_STEP_SUMMARY echo "" >> $GITHUB_STEP_SUMMARY echo "PyPI link: https://pypi.org/project/kt-kernel/${{ steps.get_version.outputs.VERSION }}/" >> $GITHUB_STEP_SUMMARY