ktransformers/.github/workflows/release-pypi.yml

name: Release to PyPI

on:
  push:
    branches:
      - main
    paths:
      - "version.py"
  workflow_dispatch:
    inputs:
      test_pypi:
        description: 'Publish to TestPyPI instead of PyPI (for testing)'
        required: false
        default: 'false'
        type: choice
        options:
          - 'true'
          - 'false'

permissions:
  contents: read

jobs:
  # ── sglang-kt (must be on PyPI before users can pip install kt-kernel) ──
  build-and-publish-sglang-kt:
    name: Build & publish sglang-kt
    runs-on: [self-hosted, linux, x64]
    if: github.repository == 'kvcache-ai/ktransformers' && github.ref == 'refs/heads/main'
    environment: prod
    permissions:
      id-token: write
      contents: read

    steps:
      - name: Checkout repository
        uses: actions/checkout@v4
        with:
          submodules: recursive

      - name: Set up Python
        uses: actions/setup-python@v4
        with:
          python-version: '3.12'

      - name: Install build tools
        run: |
          python -m pip install --upgrade pip
          pip install build wheel setuptools twine

      - name: Build sglang-kt wheel
        working-directory: third_party/sglang/python
        run: |
          KT_VERSION=$(python3 -c "exec(open('${{ github.workspace }}/version.py').read()); print(__version__)")
          export SGLANG_KT_VERSION="$KT_VERSION"
          echo "Building sglang-kt v${KT_VERSION} wheel..."
          python -m build --wheel -v
          ls dist/ | grep -q "sglang_kt" || (echo "ERROR: Wheel name does not contain sglang_kt" && exit 1)

      - name: Publish sglang-kt to PyPI
        if: github.event.inputs.test_pypi != 'true'
        env:
          TWINE_USERNAME: __token__
          TWINE_PASSWORD: ${{ secrets.PYPI_API_TOKEN }}
        run: |
          python -m twine upload --skip-existing --verbose third_party/sglang/python/dist/*.whl

      - name: Publish sglang-kt to TestPyPI (if requested)
        if: github.event.inputs.test_pypi == 'true'
        env:
          TWINE_USERNAME: __token__
          TWINE_PASSWORD: ${{ secrets.TEST_PYPI_API_TOKEN }}
        run: |
          python -m twine upload --repository testpypi --skip-existing --verbose third_party/sglang/python/dist/*.whl

  # ── kt-kernel ──
  build-kt-kernel:
    name: Build kt-kernel (Python ${{ matrix.python-version }})
    runs-on: [self-hosted, linux, x64, gpu]
    strategy:
      fail-fast: false
      matrix:
        python-version: ['3.11', '3.12']

    steps:
      - name: Checkout repository
        uses: actions/checkout@v4
        with:
          submodules: recursive

      - name: Set up Python ${{ matrix.python-version }}
        uses: actions/setup-python@v4
        with:
          python-version: ${{ matrix.python-version }}

      - name: Verify CUDA availability
        run: |
          nvidia-smi || (echo "ERROR: GPU not available" && exit 1)
          nvcc --version || (echo "ERROR: CUDA toolkit not found" && exit 1)

      - name: Install dependencies
        run: |
          apt-get update && apt-get install -y cmake libhwloc-dev pkg-config libnuma-dev
          python -m pip install --upgrade pip
          pip install build wheel setuptools torch --index-url https://download.pytorch.org/whl/cu118

      - name: Build kt-kernel wheel
        working-directory: kt-kernel
        env:
          CPUINFER_BUILD_ALL_VARIANTS: '1'
          CPUINFER_USE_CUDA: '1'
          CPUINFER_CUDA_ARCHS: '80;86;89;90'
          CPUINFER_CUDA_STATIC_RUNTIME: '1'
          CPUINFER_BUILD_TYPE: 'Release'
          CPUINFER_PARALLEL: '4'
          CPUINFER_FORCE_REBUILD: '1'
          CUDA_HOME: '/usr/local/cuda-11.8'
        run: |
          echo "Building kt-kernel with:"
          echo "  - CUDA support (SM 80, 86, 89, 90)"
          echo "  - CPU multi-variant (AMX, AVX512, AVX2)"
          python -m build --wheel -v

      - name: Verify wheel
        working-directory: kt-kernel
        run: |
          echo "Generated wheel:"
          ls -lh dist/

          # Install and test
          pip install dist/*.whl
          python -c "import kt_kernel; print(f'✓ Version: {kt_kernel.__version__}')"
          python -c "import kt_kernel; print(f'✓ CPU variant: {kt_kernel.__cpu_variant__}')"

          # Verify CUDA support
          python -c "
          from kt_kernel import kt_kernel_ext
          cpu_infer = kt_kernel_ext.CPUInfer(4)
          methods = dir(cpu_infer)
          has_cuda = 'submit_with_cuda_stream' in methods
          print(f'✓ CUDA support: {has_cuda}')
          "

          # Verify CPU multi-variant support
          echo "Checking CPU variants in wheel..."
          python -m zipfile -l dist/*.whl | grep "_kt_kernel_ext_" || echo "Warning: No variant .so files found"
          python -m zipfile -l dist/*.whl | grep "_kt_kernel_ext_amx.cpython" && echo "✓ AMX variant found" || echo "Note: AMX variant missing"
          python -m zipfile -l dist/*.whl | grep "_kt_kernel_ext_avx512" && echo "✓ AVX512 variants found" || echo "Note: AVX512 variants missing"
          python -m zipfile -l dist/*.whl | grep "_kt_kernel_ext_avx2.cpython" && echo "✓ AVX2 variant found" || echo "Note: AVX2 variant missing"

          # Verify static linking (should NOT depend on libcudart.so)
          rm -rf /tmp/check
          unzip -q dist/*.whl -d /tmp/check
          if ldd /tmp/check/kt_kernel/*.so 2>/dev/null | grep -q "libcudart.so"; then
            echo "ERROR: Dynamic cudart found, should be statically linked"
            exit 1
          else
            echo "✓ CUDA runtime statically linked"
          fi

      - name: Repair wheel for manylinux
        working-directory: kt-kernel
        run: |
          pip install auditwheel patchelf
          mkdir -p wheelhouse
          for wheel in dist/*.whl; do
            auditwheel repair "$wheel" --plat manylinux_2_17_x86_64 --exclude libcuda.so.1 -w wheelhouse/ || \
              cp "$wheel" wheelhouse/$(basename "$wheel" | sed 's/linux_x86_64/manylinux_2_17_x86_64/')
          done
          rm -f dist/*.whl && cp wheelhouse/*.whl dist/

      - name: Upload artifact
        uses: actions/upload-artifact@v4
        with:
          name: kt-kernel-wheels-py${{ matrix.python-version }}
          path: kt-kernel/dist/*.whl
          retention-days: 7

  publish-pypi:
    name: Publish kt-kernel to PyPI
    needs: [build-and-publish-sglang-kt, build-kt-kernel]
    runs-on: [self-hosted, linux, x64]
    if: github.repository == 'kvcache-ai/ktransformers' && github.ref == 'refs/heads/main'
    environment: prod
    permissions:
      id-token: write  # For trusted publishing (OIDC)
      contents: read

    steps:
      - name: Download all wheel artifacts
        uses: actions/download-artifact@v4
        with:
          path: artifacts/

      - name: Organize wheels into dist/
        run: |
          mkdir -p dist/
          find artifacts/ -name "*.whl" -exec cp {} dist/ \;
          echo "Wheels to publish:"
          ls -lh dist/

      - name: Get version from wheel
        id: get_version
        run: |
          # Extract version from first wheel filename
          wheel_name=$(ls dist/*.whl | head -1 | xargs basename)
          # Extract version (format: kt_kernel-X.Y.Z-...)
          version=$(echo "$wheel_name" | sed 's/kt_kernel-\([0-9.]*\)-.*/\1/')
          echo "VERSION=$version" >> $GITHUB_OUTPUT
          echo "Publishing version: $version"

      - name: Install twine
        run: |
          python -m pip install --upgrade pip
          pip install twine

      - name: Publish to TestPyPI (if requested)
        if: github.event.inputs.test_pypi == 'true'
        env:
          TWINE_USERNAME: __token__
          TWINE_PASSWORD: ${{ secrets.TEST_PYPI_API_TOKEN }}
        run: |
          python -m twine upload \
            --repository testpypi \
            --skip-existing \
            --verbose \
            dist/*.whl

      - name: Publish to PyPI
        if: github.event.inputs.test_pypi != 'true'
        env:
          TWINE_USERNAME: __token__
          TWINE_PASSWORD: ${{ secrets.PYPI_API_TOKEN }}
        run: |
          python -m twine upload \
            --skip-existing \
            --verbose \
            dist/*.whl

      - name: Create release summary
        run: |
          echo "## 🎉 kt-kernel v${{ steps.get_version.outputs.VERSION }} Published to PyPI" >> $GITHUB_STEP_SUMMARY
          echo "" >> $GITHUB_STEP_SUMMARY
          echo "### Installation" >> $GITHUB_STEP_SUMMARY
          echo '```bash' >> $GITHUB_STEP_SUMMARY
          echo "pip install kt-kernel==${{ steps.get_version.outputs.VERSION }}" >> $GITHUB_STEP_SUMMARY
          echo '```' >> $GITHUB_STEP_SUMMARY
          echo "" >> $GITHUB_STEP_SUMMARY
          echo "### Published Wheels" >> $GITHUB_STEP_SUMMARY
          echo "Total: $(ls -1 dist/*.whl | wc -l) wheels (Python 3.10, 3.11, 3.12)" >> $GITHUB_STEP_SUMMARY
          echo "" >> $GITHUB_STEP_SUMMARY
          echo "### Features" >> $GITHUB_STEP_SUMMARY
          echo "" >> $GITHUB_STEP_SUMMARY
          echo "**CPU Multi-Variant Support:**" >> $GITHUB_STEP_SUMMARY
          echo "- ✅ AMX (Intel Sapphire Rapids+, 2023)" >> $GITHUB_STEP_SUMMARY
          echo "- ✅ AVX512 Base/VNNI/VBMI/BF16 (Intel Skylake-X/Ice Lake/Cascade Lake, 2017+)" >> $GITHUB_STEP_SUMMARY
          echo "- ✅ AVX2 (Maximum compatibility, 2013+)" >> $GITHUB_STEP_SUMMARY
          echo "- 🔧 Runtime CPU detection: Automatically selects optimal variant" >> $GITHUB_STEP_SUMMARY
          echo "" >> $GITHUB_STEP_SUMMARY
          echo "**CUDA Support:**" >> $GITHUB_STEP_SUMMARY
          echo "- ✅ SM 80 (Ampere: A100, RTX 3000 series)" >> $GITHUB_STEP_SUMMARY
          echo "- ✅ SM 86 (Ampere: RTX 3060-3090)" >> $GITHUB_STEP_SUMMARY
          echo "- ✅ SM 89 (Ada Lovelace: RTX 4000 series)" >> $GITHUB_STEP_SUMMARY
          echo "- ✅ SM 90 (Hopper: H100)" >> $GITHUB_STEP_SUMMARY
          echo "- 🔧 Static CUDA runtime: Compatible with CUDA 11.8+ and 12.x drivers" >> $GITHUB_STEP_SUMMARY
          echo "- 🔧 Works on CPU-only systems (CUDA features disabled gracefully)" >> $GITHUB_STEP_SUMMARY
          echo "" >> $GITHUB_STEP_SUMMARY
          echo "**Requirements:**" >> $GITHUB_STEP_SUMMARY
          echo "- Python 3.10, 3.11, or 3.12" >> $GITHUB_STEP_SUMMARY
          echo "- Linux x86-64 (manylinux_2_17 compatible)" >> $GITHUB_STEP_SUMMARY
          echo "- For CUDA features: NVIDIA driver with CUDA 11.8+ or 12.x support" >> $GITHUB_STEP_SUMMARY
          echo "" >> $GITHUB_STEP_SUMMARY
          echo "PyPI link: https://pypi.org/project/kt-kernel/${{ steps.get_version.outputs.VERSION }}/" >> $GITHUB_STEP_SUMMARY