From 15c624dcaeabddcf4cba83756737d9df22a6adf1 Mon Sep 17 00:00:00 2001 From: Jianwei Dong Date: Wed, 4 Mar 2026 16:54:48 +0800 Subject: [PATCH] Fix/sglang kt detection (#1875) * [feat]: simplify sglang installation with submodule, auto-sync CI, and version alignment - Add kvcache-ai/sglang as git submodule at third_party/sglang (branch = main) - Add top-level install.sh for one-click source installation (sglang + kt-kernel) - Add sglang-kt as hard dependency in kt-kernel/pyproject.toml - Add CI workflow to auto-sync sglang submodule daily and create PR - Add CI workflow to build and publish sglang-kt to PyPI - Integrate sglang-kt build into release-pypi.yml (version.py bump publishes both packages) - Align sglang-kt version with ktransformers via SGLANG_KT_VERSION env var injection - Update Dockerfile to use submodule and inject aligned version - Update all 13 doc files, CLI hints, and i18n strings to reference new install methods Co-Authored-By: Claude Opus 4.6 * [build]: bump version to 0.5.2 Co-Authored-By: Claude Opus 4.6 * [build]: rename PyPI package from kt-kernel to ktransformers Users can now `pip install ktransformers` to get everything (sglang-kt is auto-installed as a dependency). Co-Authored-By: Claude Opus 4.6 * Revert "[build]: rename PyPI package from kt-kernel to ktransformers" This reverts commit e0cbbf63642a108b80d6732bd5b59c9715f355f5. * [build]: add ktransformers meta-package for PyPI `pip install ktransformers` now works as a single install command. It pulls kt-kernel (which in turn pulls sglang-kt). Co-Authored-By: Claude Opus 4.6 * [fix]: show sglang-kt package version in kt version command - Prioritize sglang-kt package version (aligned with ktransformers) over sglang internal __version__ - Update display name from "sglang" to "sglang-kt" Co-Authored-By: Claude Opus 4.6 * [fix]: improve sglang-kt detection in kt doctor and kt version Recognize sglang-kt package name as proof of kvcache-ai fork installation. Previously both commands fell through to "PyPI (not recommended)" for non-editable local source installs. Now version.py reuses the centralized check_sglang_installation() logic. Co-Authored-By: Claude Opus 4.6 * [build]: bump version to 0.5.2.post1 Co-Authored-By: Claude Opus 4.6 --------- Co-authored-by: Claude Opus 4.6 --- .github/workflows/release-pypi.yml | 56 +++- .github/workflows/release-sglang-kt.yml | 130 +++++++++ .github/workflows/sync-sglang-submodule.yml | 81 ++++++ .gitmodules | 4 + doc/en/Kimi-K2-Thinking.md | 12 +- doc/en/Kimi-K2.5.md | 15 +- doc/en/MiniMax-M2.5.md | 15 +- doc/en/Qwen3.5.md | 16 +- doc/en/SFT_Installation_Guide_KimiK2.5.md | 9 +- doc/en/kt-kernel/GLM-5-Tutorial.md | 12 +- doc/en/kt-kernel/Kimi-K2-Thinking-Native.md | 12 +- doc/en/kt-kernel/MiniMax-M2.1-Tutorial.md | 14 +- doc/en/kt-kernel/Native-Precision-Tutorial.md | 10 +- doc/en/kt-kernel/Qwen3-Coder-Next-Tutorial.md | 12 +- .../deepseek-v3.2-sglang-tutorial.md | 2 +- doc/en/kt-kernel/experts-sched-Tutorial.md | 10 +- docker/Dockerfile | 25 +- install.sh | 259 ++++++++++++++++++ kt-kernel/README.md | 17 +- kt-kernel/README_zh.md | 17 +- kt-kernel/pyproject.toml | 2 + kt-kernel/python/cli/commands/doctor.py | 18 +- kt-kernel/python/cli/commands/version.py | 72 ++--- kt-kernel/python/cli/i18n.py | 20 +- kt-kernel/python/cli/utils/sglang_checker.py | 84 +++--- pyproject.toml | 23 ++ setup.py | 16 ++ third_party/sglang | 1 + version.py | 2 +- 29 files changed, 787 insertions(+), 179 deletions(-) create mode 100644 .github/workflows/release-sglang-kt.yml create mode 100644 .github/workflows/sync-sglang-submodule.yml create mode 100755 install.sh create mode 100644 pyproject.toml create mode 100644 setup.py create mode 160000 third_party/sglang diff --git a/.github/workflows/release-pypi.yml b/.github/workflows/release-pypi.yml index b48035b..50537fa 100644 --- a/.github/workflows/release-pypi.yml +++ b/.github/workflows/release-pypi.yml @@ -21,6 +21,58 @@ permissions: contents: read jobs: + # ── sglang-kt (must be on PyPI before users can pip install kt-kernel) ── + build-and-publish-sglang-kt: + name: Build & publish sglang-kt + runs-on: [self-hosted, linux, x64] + if: github.repository == 'kvcache-ai/ktransformers' && github.ref == 'refs/heads/main' + environment: prod + permissions: + id-token: write + contents: read + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + with: + submodules: recursive + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: '3.12' + + - name: Install build tools + run: | + python -m pip install --upgrade pip + pip install build wheel setuptools twine + + - name: Build sglang-kt wheel + working-directory: third_party/sglang/python + run: | + KT_VERSION=$(python3 -c "exec(open('${{ github.workspace }}/version.py').read()); print(__version__)") + export SGLANG_KT_VERSION="$KT_VERSION" + echo "Building sglang-kt v${KT_VERSION} wheel..." + python -m build --wheel -v + ls dist/ | grep -q "sglang_kt" || (echo "ERROR: Wheel name does not contain sglang_kt" && exit 1) + + - name: Publish sglang-kt to PyPI + if: github.event.inputs.test_pypi != 'true' + env: + TWINE_USERNAME: __token__ + TWINE_PASSWORD: ${{ secrets.PYPI_API_TOKEN }} + run: | + python -m twine upload --skip-existing --verbose third_party/sglang/python/dist/*.whl + + - name: Publish sglang-kt to TestPyPI (if requested) + if: github.event.inputs.test_pypi == 'true' + env: + TWINE_USERNAME: __token__ + TWINE_PASSWORD: ${{ secrets.TEST_PYPI_API_TOKEN }} + run: | + python -m twine upload --repository testpypi --skip-existing --verbose third_party/sglang/python/dist/*.whl + + # ── kt-kernel ── build-kt-kernel: name: Build kt-kernel (Python ${{ matrix.python-version }}) runs-on: [self-hosted, linux, x64, gpu] @@ -124,8 +176,8 @@ jobs: retention-days: 7 publish-pypi: - name: Publish to PyPI - needs: [build-kt-kernel] + name: Publish kt-kernel to PyPI + needs: [build-and-publish-sglang-kt, build-kt-kernel] runs-on: [self-hosted, linux, x64] if: github.repository == 'kvcache-ai/ktransformers' && github.ref == 'refs/heads/main' environment: prod diff --git a/.github/workflows/release-sglang-kt.yml b/.github/workflows/release-sglang-kt.yml new file mode 100644 index 0000000..0d745e3 --- /dev/null +++ b/.github/workflows/release-sglang-kt.yml @@ -0,0 +1,130 @@ +name: Release sglang-kt to PyPI + +on: + push: + branches: + - main + paths: + - "third_party/sglang" + - "version.py" + workflow_dispatch: + inputs: + test_pypi: + description: 'Publish to TestPyPI instead of PyPI (for testing)' + required: false + default: 'false' + type: choice + options: + - 'true' + - 'false' + +permissions: + contents: read + +jobs: + build-sglang-kt: + name: Build sglang-kt wheel + runs-on: [self-hosted, linux, x64] + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + with: + submodules: recursive + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: '3.12' + + - name: Install build tools + run: | + python -m pip install --upgrade pip + pip install build wheel setuptools + + - name: Build sglang-kt wheel + working-directory: third_party/sglang/python + run: | + # Read version from ktransformers version.py + KT_VERSION=$(python3 -c "exec(open('${{ github.workspace }}/version.py').read()); print(__version__)") + export SGLANG_KT_VERSION="$KT_VERSION" + echo "Building sglang-kt v${KT_VERSION} wheel..." + python -m build --wheel -v + + - name: Verify wheel + working-directory: third_party/sglang/python + run: | + echo "Generated wheel:" + ls -lh dist/ + # Verify the wheel has the correct package name + ls dist/ | grep -q "sglang_kt" || (echo "ERROR: Wheel name does not contain sglang_kt" && exit 1) + echo "Wheel name verified." + + - name: Upload artifact + uses: actions/upload-artifact@v4 + with: + name: sglang-kt-wheel + path: third_party/sglang/python/dist/*.whl + retention-days: 7 + + publish-pypi: + name: Publish sglang-kt to PyPI + needs: [build-sglang-kt] + runs-on: [self-hosted, linux, x64] + if: github.repository == 'kvcache-ai/ktransformers' && github.ref == 'refs/heads/main' + environment: prod + permissions: + id-token: write + contents: read + + steps: + - name: Download wheel artifact + uses: actions/download-artifact@v4 + with: + name: sglang-kt-wheel + path: dist/ + + - name: Display wheels + run: | + echo "Wheels to publish:" + ls -lh dist/ + + - name: Install twine + run: | + python -m pip install --upgrade pip + pip install twine + + - name: Publish to TestPyPI (if requested) + if: github.event.inputs.test_pypi == 'true' + env: + TWINE_USERNAME: __token__ + TWINE_PASSWORD: ${{ secrets.TEST_PYPI_API_TOKEN }} + run: | + python -m twine upload \ + --repository testpypi \ + --skip-existing \ + --verbose \ + dist/*.whl + + - name: Publish to PyPI + if: github.event.inputs.test_pypi != 'true' + env: + TWINE_USERNAME: __token__ + TWINE_PASSWORD: ${{ secrets.PYPI_API_TOKEN }} + run: | + python -m twine upload \ + --skip-existing \ + --verbose \ + dist/*.whl + + - name: Create release summary + run: | + echo "## sglang-kt Published to PyPI" >> $GITHUB_STEP_SUMMARY + echo "" >> $GITHUB_STEP_SUMMARY + echo "### Installation" >> $GITHUB_STEP_SUMMARY + echo '```bash' >> $GITHUB_STEP_SUMMARY + echo "pip install sglang-kt" >> $GITHUB_STEP_SUMMARY + echo '```' >> $GITHUB_STEP_SUMMARY + echo "" >> $GITHUB_STEP_SUMMARY + echo "This is the kvcache-ai fork of SGLang with kt-kernel support." >> $GITHUB_STEP_SUMMARY + echo "PyPI link: https://pypi.org/project/sglang-kt/" >> $GITHUB_STEP_SUMMARY diff --git a/.github/workflows/sync-sglang-submodule.yml b/.github/workflows/sync-sglang-submodule.yml new file mode 100644 index 0000000..7a957c5 --- /dev/null +++ b/.github/workflows/sync-sglang-submodule.yml @@ -0,0 +1,81 @@ +name: Sync sglang submodule + +on: + schedule: + # Run daily at 08:00 UTC + - cron: "0 8 * * *" + workflow_dispatch: + +permissions: + contents: write + pull-requests: write + +jobs: + sync: + name: Check for sglang-kt updates + runs-on: ubuntu-latest + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + with: + submodules: true + fetch-depth: 0 + token: ${{ secrets.GITHUB_TOKEN }} + + - name: Update sglang submodule to latest main + id: update + run: | + OLD_SHA=$(git -C third_party/sglang rev-parse HEAD) + git submodule update --remote third_party/sglang + NEW_SHA=$(git -C third_party/sglang rev-parse HEAD) + + echo "old_sha=$OLD_SHA" >> "$GITHUB_OUTPUT" + echo "new_sha=$NEW_SHA" >> "$GITHUB_OUTPUT" + + if [ "$OLD_SHA" = "$NEW_SHA" ]; then + echo "changed=false" >> "$GITHUB_OUTPUT" + echo "sglang submodule is already up to date ($OLD_SHA)" + else + echo "changed=true" >> "$GITHUB_OUTPUT" + + # Collect commit log between old and new + COMMITS=$(git -C third_party/sglang log --oneline "$OLD_SHA..$NEW_SHA" | head -20) + echo "commits<> "$GITHUB_OUTPUT" + echo "$COMMITS" >> "$GITHUB_OUTPUT" + echo "EOF" >> "$GITHUB_OUTPUT" + + # sglang-kt version = ktransformers version (from version.py) + VERSION=$(python3 -c "exec(open('version.py').read()); print(__version__)" 2>/dev/null || echo "unknown") + echo "version=$VERSION" >> "$GITHUB_OUTPUT" + + echo "sglang submodule updated: $OLD_SHA -> $NEW_SHA (v$VERSION)" + fi + + - name: Create pull request + if: steps.update.outputs.changed == 'true' + uses: peter-evans/create-pull-request@v6 + with: + token: ${{ secrets.GITHUB_TOKEN }} + commit-message: | + [build]: sync sglang submodule to ${{ steps.update.outputs.new_sha }} + branch: auto/sync-sglang + delete-branch: true + title: "[build] Sync sglang-kt submodule (v${{ steps.update.outputs.version }})" + body: | + Automated sync of `third_party/sglang` submodule to latest `main`. + + **Old ref:** `${{ steps.update.outputs.old_sha }}` + **New ref:** `${{ steps.update.outputs.new_sha }}` + **sglang-kt version:** `${{ steps.update.outputs.version }}` + + ### Commits included + ``` + ${{ steps.update.outputs.commits }} + ``` + + --- + *This PR was created automatically by the [sync-sglang-submodule](${{ github.server_url }}/${{ github.repository }}/actions/workflows/sync-sglang-submodule.yml) workflow.* + labels: | + dependencies + automated diff --git a/.gitmodules b/.gitmodules index 23b49a5..5b4ca91 100644 --- a/.gitmodules +++ b/.gitmodules @@ -8,3 +8,7 @@ path = third_party/custom_flashinfer url = https://github.com/kvcache-ai/custom_flashinfer.git branch = fix-precision-mla-merge-main +[submodule "third_party/sglang"] + path = third_party/sglang + url = https://github.com/kvcache-ai/sglang.git + branch = main diff --git a/doc/en/Kimi-K2-Thinking.md b/doc/en/Kimi-K2-Thinking.md index c7f3671..a95b028 100644 --- a/doc/en/Kimi-K2-Thinking.md +++ b/doc/en/Kimi-K2-Thinking.md @@ -5,11 +5,17 @@ Please Note This is Quantization Deployment. For Native Kimi K2 Thinking deploym Step 1: Install SGLang -Follow the [official SGLang installation](https://docs.sglang.ai/get_started/install.html) guide to install SGLang: -``` -pip install "sglang[all]" +Install the kvcache-ai fork of SGLang (one of): +```bash +# Option A: One-click install (from ktransformers root) +./install.sh + +# Option B: pip install +pip install sglang-kt ``` +> **Important:** Use `sglang-kt` (kvcache-ai fork), not the official `sglang` package. Run `pip uninstall sglang` first if you have the official version installed. + Step 2: Install KTransformers CPU Kernels The KTransformers CPU kernels (kt-kernel) provide AMX-optimized computation for hybrid inference, for detailed installation instructions and troubleshooting, refer to the official [kt-kernel installation guide](https://github.com/kvcache-ai/ktransformers/blob/main/kt-kernel/README.md). diff --git a/doc/en/Kimi-K2.5.md b/doc/en/Kimi-K2.5.md index d3d1206..f75017f 100644 --- a/doc/en/Kimi-K2.5.md +++ b/doc/en/Kimi-K2.5.md @@ -32,16 +32,17 @@ git submodule update --init --recursive cd kt-kernel && ./install.sh ``` -2. **SGLang installed** - Follow [SGLang integration steps](./kt-kernel_intro.md#integration-with-sglang) +2. **SGLang installed** - Install the kvcache-ai fork of SGLang (one of): -Note: Currently, please clone our custom SGLang repository: +```bash +# Option A: One-click install (from ktransformers root) +./install.sh +# Option B: pip install +pip install sglang-kt ``` -git clone https://github.com/kvcache-ai/sglang.git -cd sglang && pip install -e "python[all]" -// maybe need to reinstall cudnn according to the issue when launching SGLang -// pip install nvidia-cudnn-cu12==9.16.0.29 -``` + +> Note: You may need to reinstall cudnn: `pip install nvidia-cudnn-cu12==9.16.0.29` 3. **CUDA toolkit** - Compatible with your GPU (CUDA 12.8+ recommended) 4. **Hugging Face CLI** - For downloading models: diff --git a/doc/en/MiniMax-M2.5.md b/doc/en/MiniMax-M2.5.md index c378f8d..fc5c7d1 100644 --- a/doc/en/MiniMax-M2.5.md +++ b/doc/en/MiniMax-M2.5.md @@ -30,16 +30,17 @@ git submodule update --init --recursive cd kt-kernel && ./install.sh ``` -2. **SGLang installed** - Follow [SGLang integration steps](./kt-kernel_intro.md#integration-with-sglang) +2. **SGLang installed** - Install the kvcache-ai fork of SGLang (one of): -Note: Currently, please clone our custom SGLang repository: +```bash +# Option A: One-click install (from ktransformers root) +./install.sh +# Option B: pip install +pip install sglang-kt ``` -git clone https://github.com/kvcache-ai/sglang.git -cd sglang && pip install -e "python[all]" -// maybe need to reinstall cudnn according to the issue when launching SGLang -// pip install nvidia-cudnn-cu12==9.16.0.29 -``` + +> Note: You may need to reinstall cudnn: `pip install nvidia-cudnn-cu12==9.16.0.29` 3. **CUDA toolkit** - Compatible with your GPU (CUDA 12.8+ recommended) 4. **Hugging Face CLI** - For downloading models: diff --git a/doc/en/Qwen3.5.md b/doc/en/Qwen3.5.md index 335c8c1..2ddbaf5 100644 --- a/doc/en/Qwen3.5.md +++ b/doc/en/Qwen3.5.md @@ -36,18 +36,18 @@ git submodule update --init --recursive cd kt-kernel && ./install.sh ``` -2. **SGLang installed** - Follow [SGLang integration steps](./kt-kernel_intro.md#integration-with-sglang) - -Note: Currently, please clone our custom SGLang repository: +2. **SGLang installed** - Install the kvcache-ai fork of SGLang (one of): ```bash -git clone https://github.com/kvcache-ai/sglang.git -git checkout qwen3.5 -cd sglang && pip install -e "python[all]" -# Maybe need to reinstall cudnn according to the issue when launching SGLang -pip install nvidia-cudnn-cu12==9.16.0.29 +# Option A: One-click install (from ktransformers root) +./install.sh + +# Option B: pip install +pip install sglang-kt ``` +> Note: You may need to reinstall cudnn: `pip install nvidia-cudnn-cu12==9.16.0.29` + 3. **CUDA toolkit** - Compatible with your GPU (CUDA 12.8+ recommended) 4. **Hugging Face CLI** - For downloading models: diff --git a/doc/en/SFT_Installation_Guide_KimiK2.5.md b/doc/en/SFT_Installation_Guide_KimiK2.5.md index 0b90d0b..858bb9a 100644 --- a/doc/en/SFT_Installation_Guide_KimiK2.5.md +++ b/doc/en/SFT_Installation_Guide_KimiK2.5.md @@ -65,10 +65,11 @@ cd kt-kernel && ./install.sh **Recommended for Kimi-K2.5:** ```bash -git clone https://github.com/kvcache-ai/sglang.git -cd sglang -git checkout kimi_k2.5 -pip install -e "python[all]" +# Option A: One-click install (from ktransformers root, installs sglang + kt-kernel) +./install.sh + +# Option B: pip install +pip install sglang-kt ``` ### 0.3 Training Environment: `kt-sft` diff --git a/doc/en/kt-kernel/GLM-5-Tutorial.md b/doc/en/kt-kernel/GLM-5-Tutorial.md index 9585963..3cc1332 100644 --- a/doc/en/kt-kernel/GLM-5-Tutorial.md +++ b/doc/en/kt-kernel/GLM-5-Tutorial.md @@ -19,15 +19,15 @@ Before starting, ensure you have: 1. **SGLang installed** - Note: Currently, please clone our custom SGLang repository: + Install the kvcache-ai fork of SGLang (one of): ```bash - git clone https://github.com/kvcache-ai/sglang.git - cd sglang - pip install -e "python[all]" - ``` + # Option A: One-click install (from ktransformers root) + ./install.sh - You can follow [SGLang integration steps](https://docs.sglang.io/get_started/install.html) + # Option B: pip install + pip install sglang-kt + ``` 2. **KT-Kernel installed** diff --git a/doc/en/kt-kernel/Kimi-K2-Thinking-Native.md b/doc/en/kt-kernel/Kimi-K2-Thinking-Native.md index 443ab00..5878184 100644 --- a/doc/en/kt-kernel/Kimi-K2-Thinking-Native.md +++ b/doc/en/kt-kernel/Kimi-K2-Thinking-Native.md @@ -30,14 +30,14 @@ This tutorial demonstrates how to run Kimi-K2 model inference using SGLang integ Before starting, ensure you have: 1. **KT-Kernel installed** - Follow the [installation guide](./kt-kernel_intro.md#installation) -2. **SGLang installed** - Follow [SGLang integration steps](./kt-kernel_intro.md#integration-with-sglang) +2. **SGLang installed** - Install the kvcache-ai fork of SGLang (one of): -Note: Currently, please clone our custom SGLang repository: +```bash +# Option A: One-click install (from ktransformers root) +./install.sh -``` -git clone https://github.com/kvcache-ai/sglang.git -cd sglang -pip install -e "python[all]" +# Option B: pip install +pip install sglang-kt ``` 3. **CUDA toolkit** - Compatible with your GPU (CUDA 11.8+ recommended) diff --git a/doc/en/kt-kernel/MiniMax-M2.1-Tutorial.md b/doc/en/kt-kernel/MiniMax-M2.1-Tutorial.md index d2d0739..25aaade 100644 --- a/doc/en/kt-kernel/MiniMax-M2.1-Tutorial.md +++ b/doc/en/kt-kernel/MiniMax-M2.1-Tutorial.md @@ -42,17 +42,17 @@ This tutorial demonstrates how to run MiniMax-M2.1 model inference using SGLang Before starting, ensure you have: -1. **SGLang installed** +1. **SGLang installed** - Note: Currently, please clone our custom SGLang repository: + Install the kvcache-ai fork of SGLang (one of): ```bash - git clone https://github.com/kvcache-ai/sglang.git - cd sglang - pip install -e "python[all]" - ``` + # Option A: One-click install (from ktransformers root) + ./install.sh - You can follow [SGLang integration steps](https://docs.sglang.io/get_started/install.html) + # Option B: pip install + pip install sglang-kt + ``` 2. **KT-Kernel installed** diff --git a/doc/en/kt-kernel/Native-Precision-Tutorial.md b/doc/en/kt-kernel/Native-Precision-Tutorial.md index 1e80007..5aecdac 100644 --- a/doc/en/kt-kernel/Native-Precision-Tutorial.md +++ b/doc/en/kt-kernel/Native-Precision-Tutorial.md @@ -63,12 +63,14 @@ Before starting, ensure you have: 1. **SGLang installed** - Clone and install the custom SGLang repository: + Install the kvcache-ai fork of SGLang (one of): ```bash - git clone https://github.com/kvcache-ai/sglang.git - cd sglang - pip install -e "python[all]" + # Option A: One-click install (from ktransformers root) + ./install.sh + + # Option B: pip install + pip install sglang-kt ``` 2. **KT-Kernel installed** diff --git a/doc/en/kt-kernel/Qwen3-Coder-Next-Tutorial.md b/doc/en/kt-kernel/Qwen3-Coder-Next-Tutorial.md index 5bf2c39..b290ab8 100644 --- a/doc/en/kt-kernel/Qwen3-Coder-Next-Tutorial.md +++ b/doc/en/kt-kernel/Qwen3-Coder-Next-Tutorial.md @@ -32,15 +32,15 @@ Before starting, ensure you have: 1. **SGLang installed** - Note: Currently, please clone our custom SGLang repository: + Install the kvcache-ai fork of SGLang (one of): ```bash - git clone https://github.com/kvcache-ai/sglang.git - cd sglang - pip install -e "python[all]" - ``` + # Option A: One-click install (from ktransformers root) + ./install.sh - You can follow [SGLang integration steps](https://docs.sglang.io/get_started/install.html) + # Option B: pip install + pip install sglang-kt + ``` 2. **KT-Kernel installed** diff --git a/doc/en/kt-kernel/deepseek-v3.2-sglang-tutorial.md b/doc/en/kt-kernel/deepseek-v3.2-sglang-tutorial.md index a8ee454..9cffe0d 100644 --- a/doc/en/kt-kernel/deepseek-v3.2-sglang-tutorial.md +++ b/doc/en/kt-kernel/deepseek-v3.2-sglang-tutorial.md @@ -30,7 +30,7 @@ This tutorial demonstrates how to run DeepSeek V3.2 model inference using SGLang Before starting, ensure you have: 1. **KT-Kernel installed** - Follow the [installation guide](./kt-kernel_intro.md#installation) -2. **SGLang installed** - Follow [SGLang integration steps](./kt-kernel_intro.md#integration-with-sglang) +2. **SGLang installed** - Install the kvcache-ai fork: `pip install sglang-kt` or run `./install.sh` from the ktransformers root 3. **CUDA toolkit** - Compatible with your GPU (CUDA 11.8+ recommended) 4. **Hugging Face CLI** - For downloading models: ```bash diff --git a/doc/en/kt-kernel/experts-sched-Tutorial.md b/doc/en/kt-kernel/experts-sched-Tutorial.md index 9d5c496..1957c6b 100644 --- a/doc/en/kt-kernel/experts-sched-Tutorial.md +++ b/doc/en/kt-kernel/experts-sched-Tutorial.md @@ -40,12 +40,14 @@ Before starting, ensure you have: 1. **SGLang installed** - Note: Currently, please clone our custom SGLang repository: + Install the kvcache-ai fork of SGLang (one of): ```bash - git clone https://github.com/kvcache-ai/sglang.git - cd sglang - pip install -e "python[all]" + # Option A: One-click install (from ktransformers root) + ./install.sh + + # Option B: pip install + pip install sglang-kt ``` 2. **KTransformers installed** diff --git a/docker/Dockerfile b/docker/Dockerfile index c20c1a7..0e2f92f 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -215,13 +215,10 @@ RUN /opt/miniconda3/envs/serve/bin/pip config set global.index-url https://mirro /opt/miniconda3/envs/fine-tune/bin/pip config set global.index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple; \ fi -# Clone repositories -# Use kvcache-ai/sglang fork with kimi_k2 branch -RUN git clone https://${GITHUB_ARTIFACTORY}/kvcache-ai/sglang.git /workspace/sglang \ - && cd /workspace/sglang && git checkout kimi_k2 - +# Clone repositories (sglang is included as a submodule in ktransformers) RUN git clone --depth 1 https://${GITHUB_ARTIFACTORY}/kvcache-ai/ktransformers.git /workspace/ktransformers \ && cd /workspace/ktransformers && git submodule update --init --recursive \ + && ln -s /workspace/ktransformers/third_party/sglang /workspace/sglang \ && if [ "$FUNCTIONALITY" = "sft" ]; then \ git clone --depth 1 https://${GITHUB_ARTIFACTORY}/hiyouga/LLaMA-Factory.git /workspace/LLaMA-Factory; \ fi @@ -262,7 +259,7 @@ RUN --mount=type=cache,target=/root/.cache/pip \ ; \ fi -# Install SGLang in serve env +# Install SGLang in serve env (version aligned with ktransformers) RUN --mount=type=cache,target=/root/.cache/pip \ case "$CUDA_VERSION" in \ 12.6.1) CUINDEX=126 ;; \ @@ -270,6 +267,8 @@ RUN --mount=type=cache,target=/root/.cache/pip \ 12.9.1) CUINDEX=129 ;; \ 13.0.1) CUINDEX=130 ;; \ esac \ + && export SGLANG_KT_VERSION=$(python3 -c "exec(open('/workspace/ktransformers/version.py').read()); print(__version__)") \ + && echo "Installing sglang-kt v${SGLANG_KT_VERSION}" \ && cd /workspace/sglang \ && /opt/miniconda3/envs/serve/bin/pip install -e "python[all]" --extra-index-url https://download.pytorch.org/whl/cu${CUINDEX} @@ -404,18 +403,16 @@ RUN echo '\n# Conda environment aliases\nalias serve="conda activate serve"' >> # Extract versions from each component and save to versions.env RUN set -x && \ - # SGLang version (from version.py file) - cd /workspace/sglang/python/sglang && \ - SGLANG_VERSION=$(python3 -c "exec(open('version.py').read()); print(__version__)" 2>/dev/null || echo "unknown") && \ - echo "SGLANG_VERSION=$SGLANG_VERSION" > /workspace/versions.env && \ - echo "Extracted SGLang version: $SGLANG_VERSION" && \ - \ - # KTransformers version (from version.py in repo) + # KTransformers version (single source of truth for both kt-kernel and sglang-kt) cd /workspace/ktransformers && \ KTRANSFORMERS_VERSION=$(python3 -c "exec(open('version.py').read()); print(__version__)" 2>/dev/null || echo "unknown") && \ - echo "KTRANSFORMERS_VERSION=$KTRANSFORMERS_VERSION" >> /workspace/versions.env && \ + echo "KTRANSFORMERS_VERSION=$KTRANSFORMERS_VERSION" > /workspace/versions.env && \ echo "Extracted KTransformers version: $KTRANSFORMERS_VERSION" && \ \ + # sglang-kt version = ktransformers version (aligned) + echo "SGLANG_KT_VERSION=$KTRANSFORMERS_VERSION" >> /workspace/versions.env && \ + echo "sglang-kt version (aligned): $KTRANSFORMERS_VERSION" && \ + \ # LLaMA-Factory version (from fine-tune environment, sft mode only) if [ "$FUNCTIONALITY" = "sft" ]; then \ . /opt/miniconda3/etc/profile.d/conda.sh && conda activate fine-tune && \ diff --git a/install.sh b/install.sh new file mode 100755 index 0000000..db1343e --- /dev/null +++ b/install.sh @@ -0,0 +1,259 @@ +#!/usr/bin/env bash +set -euo pipefail + +# Resolve the repository root (directory containing this script) +REPO_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" + +usage() { + cat <&2 +} + +# Read ktransformers version from version.py and export for sglang-kt +read_kt_version() { + local version_file="$REPO_ROOT/version.py" + if [ -f "$version_file" ]; then + KT_VERSION=$(python3 -c "exec(open('$version_file').read()); print(__version__)") + export SGLANG_KT_VERSION="$KT_VERSION" + log_info "ktransformers version: $KT_VERSION (will be used for sglang-kt)" + else + log_warn "version.py not found; sglang-kt will use its default version" + fi +} + +# ─── Submodule init ──────────────────────────────────────────────────────────── + +init_submodules() { + log_step "Initializing git submodules" + + if [ ! -d "$REPO_ROOT/.git" ]; then + log_warn "Not a git repository. Skipping submodule init." + log_warn "If you need sglang, clone with: git clone --recursive https://github.com/kvcache-ai/ktransformers.git" + return 0 + fi + + cd "$REPO_ROOT" + git submodule update --init --recursive + log_info "Submodules initialized successfully." +} + +# ─── sglang install ─────────────────────────────────────────────────────────── + +install_sglang() { + local editable="${1:-0}" + + log_step "Installing sglang (kvcache-ai fork)" + + local sglang_dir="$REPO_ROOT/third_party/sglang" + local pyproject="$sglang_dir/python/pyproject.toml" + + if [ ! -f "$pyproject" ]; then + log_error "sglang source not found at $sglang_dir" + log_error "Run 'git submodule update --init --recursive' first, or clone with --recursive." + exit 1 + fi + + cd "$sglang_dir" + + if [ "$editable" = "1" ]; then + log_info "Installing sglang in editable mode..." + pip install -e "./python[all]" + else + log_info "Installing sglang..." + pip install "./python[all]" + fi + + log_info "sglang installed successfully." +} + +# ─── kt-kernel install ──────────────────────────────────────────────────────── + +install_kt_kernel() { + # Forward all remaining args to kt-kernel/install.sh + local kt_args=("$@") + + log_step "Installing kt-kernel" + + local kt_install="$REPO_ROOT/kt-kernel/install.sh" + + if [ ! -f "$kt_install" ]; then + log_error "kt-kernel/install.sh not found at $kt_install" + exit 1 + fi + + cd "$REPO_ROOT/kt-kernel" + bash ./install.sh build "${kt_args[@]}" +} + +# ─── deps install ───────────────────────────────────────────────────────────── + +install_deps() { + log_step "Installing system dependencies" + + local kt_install="$REPO_ROOT/kt-kernel/install.sh" + + if [ ! -f "$kt_install" ]; then + log_error "kt-kernel/install.sh not found at $kt_install" + exit 1 + fi + + cd "$REPO_ROOT/kt-kernel" + bash ./install.sh deps +} + +# ─── "all" subcommand ───────────────────────────────────────────────────────── + +install_all() { + local skip_sglang=0 + local skip_kt_kernel=0 + local editable=0 + local kt_args=() + + while [[ $# -gt 0 ]]; do + case "$1" in + --skip-sglang) skip_sglang=1; shift ;; + --skip-kt-kernel) skip_kt_kernel=1; shift ;; + --editable) editable=1; shift ;; + --manual) kt_args+=("--manual"); shift ;; + --no-clean) kt_args+=("--no-clean"); shift ;; + -h|--help) usage ;; + *) + log_error "Unknown option: $1" + usage + ;; + esac + done + + # 1. Init submodules + init_submodules + + # 2. System dependencies + install_deps + + # 3. Read version for sglang-kt + read_kt_version + + # 4. Install sglang + if [ "$skip_sglang" = "0" ]; then + install_sglang "$editable" + else + log_info "Skipping sglang installation (--skip-sglang)." + fi + + # 4. Build & install kt-kernel + if [ "$skip_kt_kernel" = "0" ]; then + install_kt_kernel "${kt_args[@]}" + else + log_info "Skipping kt-kernel installation (--skip-kt-kernel)." + fi + + log_step "Installation complete!" + echo " Verify with: kt doctor" + echo "" +} + +# ─── Subcommand dispatcher ──────────────────────────────────────────────────── + +SUBCMD="all" +if [[ $# -gt 0 ]]; then + case "$1" in + all|sglang|kt-kernel|deps) + SUBCMD="$1" + shift + ;; + -h|--help) + usage + ;; + -*) + # Flags without subcommand → default to "all" + SUBCMD="all" + ;; + *) + log_error "Unknown subcommand: $1" + usage + ;; + esac +fi + +case "$SUBCMD" in + all) + install_all "$@" + ;; + sglang) + # Parse sglang-specific options + editable=0 + while [[ $# -gt 0 ]]; do + case "$1" in + --editable) editable=1; shift ;; + -h|--help) usage ;; + *) log_error "Unknown option for sglang: $1"; usage ;; + esac + done + init_submodules + read_kt_version + install_sglang "$editable" + ;; + kt-kernel) + install_kt_kernel "$@" + ;; + deps) + install_deps + ;; +esac diff --git a/kt-kernel/README.md b/kt-kernel/README.md index 9db9960..a04d9b6 100644 --- a/kt-kernel/README.md +++ b/kt-kernel/README.md @@ -262,12 +262,23 @@ KT-Kernel can be used standalone via [Direct Python API](#direct-python-api-usag #### 1. Install SGLang +Install the kvcache-ai fork of SGLang (required for kt-kernel support): + ```bash -git clone https://github.com/sgl-project/sglang.git -cd sglang -pip install -e "python[all]" +# Option A: One-click install (from ktransformers root, installs sglang + kt-kernel) +./install.sh + +# Option B: pip install +pip install sglang-kt + +# Option C: From source (editable mode) +git clone --recursive https://github.com/kvcache-ai/ktransformers.git +cd ktransformers +pip install -e "third_party/sglang/python[all]" ``` +> **Important:** Use `sglang-kt` (kvcache-ai fork), not the official `sglang` package. If you have the official version installed, uninstall it first: `pip uninstall sglang -y` + #### 2. Prepare Weights You need both GPU weights and CPU-side expert weights for heterogeneous inference. The exact format depends on the backend: diff --git a/kt-kernel/README_zh.md b/kt-kernel/README_zh.md index 554b35b..70c797c 100644 --- a/kt-kernel/README_zh.md +++ b/kt-kernel/README_zh.md @@ -115,12 +115,23 @@ KT-Kernel 可以单独通过 [Python API](#直接使用-python-api) 使用,也 #### 1. 安装 SGLang +安装 kvcache-ai 分支的 SGLang(kt-kernel 需要此分支): + ```bash -git clone https://github.com/sgl-project/sglang.git -cd sglang -pip install -e "python[all]" +# 方式 A: 一键安装(从 ktransformers 根目录,同时安装 sglang + kt-kernel) +./install.sh + +# 方式 B: pip 安装 +pip install sglang-kt + +# 方式 C: 从源码安装(可编辑模式) +git clone --recursive https://github.com/kvcache-ai/ktransformers.git +cd ktransformers +pip install -e "third_party/sglang/python[all]" ``` +> **重要:** 请使用 `sglang-kt`(kvcache-ai 分支),而非官方 `sglang` 包。如已安装官方版本,请先卸载:`pip uninstall sglang -y` + #### 2. 准备权重 要进行异构推理,需要同时准备 GPU 权重和 CPU 侧 experts 对应的权重,具体格式取决于后端类型: diff --git a/kt-kernel/pyproject.toml b/kt-kernel/pyproject.toml index 4c9e55e..7716869 100644 --- a/kt-kernel/pyproject.toml +++ b/kt-kernel/pyproject.toml @@ -33,6 +33,8 @@ dependencies = [ "pyyaml>=6.0", "httpx>=0.25.0", "packaging>=23.0", + # SGLang (kvcache-ai fork) + "sglang-kt", # Development dependencies "black>=25.9.0", ] diff --git a/kt-kernel/python/cli/commands/doctor.py b/kt-kernel/python/cli/commands/doctor.py index 5b32a0c..f0cc33b 100644 --- a/kt-kernel/python/cli/commands/doctor.py +++ b/kt-kernel/python/cli/commands/doctor.py @@ -369,7 +369,19 @@ def doctor( sglang_info = check_sglang_installation() if sglang_info["installed"]: - if sglang_info["from_source"]: + if sglang_info.get("is_kvcache_fork"): + # Package name is sglang-kt — this is definitively the kvcache-ai fork + if sglang_info["from_source"] and sglang_info["git_info"]: + git_remote = sglang_info["git_info"].get("remote", "unknown") + git_branch = sglang_info["git_info"].get("branch", "unknown") + sglang_source_value = f"sglang-kt (Source: {git_remote}, branch: {git_branch})" + elif sglang_info["editable"]: + sglang_source_value = "sglang-kt (editable)" + else: + sglang_source_value = "sglang-kt" + sglang_source_status = "ok" + sglang_source_hint = None + elif sglang_info["from_source"]: if sglang_info["git_info"]: git_remote = sglang_info["git_info"].get("remote", "unknown") git_branch = sglang_info["git_info"].get("branch", "unknown") @@ -381,7 +393,7 @@ def doctor( sglang_source_status = "ok" sglang_source_hint = None else: - sglang_source_value = "PyPI (not recommended)" + sglang_source_value = "PyPI sglang (not kvcache-ai fork)" sglang_source_status = "warning" sglang_source_hint = t("sglang_pypi_hint") else: @@ -411,7 +423,7 @@ def doctor( else: kt_kernel_value = t("sglang_kt_kernel_not_supported") kt_kernel_status = "error" - kt_kernel_hint = 'Reinstall SGLang from: git clone https://github.com/kvcache-ai/sglang && cd sglang && pip install -e "python[all]"' + kt_kernel_hint = "Reinstall SGLang: pip uninstall sglang -y && pip install sglang-kt (or run ./install.sh from ktransformers root)" issues_found = True checks.append( diff --git a/kt-kernel/python/cli/commands/version.py b/kt-kernel/python/cli/commands/version.py index 3d4adf2..fb98ba9 100644 --- a/kt-kernel/python/cli/commands/version.py +++ b/kt-kernel/python/cli/commands/version.py @@ -16,54 +16,38 @@ from kt_kernel.cli.utils.environment import detect_cuda_version, get_installed_p def _get_sglang_info() -> str: - """Get sglang version and installation source information.""" - try: - import sglang + """Get sglang-kt version and installation source information.""" + from kt_kernel.cli.utils.sglang_checker import check_sglang_installation - version = getattr(sglang, "__version__", None) + info = check_sglang_installation() - if not version: - version = get_installed_package_version("sglang") - - if not version: - return t("version_not_installed") - - # Try to detect installation source - from pathlib import Path - import subprocess - - if hasattr(sglang, "__file__") and sglang.__file__: - location = Path(sglang.__file__).parent.parent - git_dir = location / ".git" - - if git_dir.exists(): - # Installed from git (editable install) - try: - # Get remote URL - result = subprocess.run( - ["git", "remote", "get-url", "origin"], - cwd=location, - capture_output=True, - text=True, - timeout=2, - ) - if result.returncode == 0: - remote_url = result.stdout.strip() - # Simplify GitHub URLs - if "github.com" in remote_url: - repo_name = remote_url.split("/")[-1].replace(".git", "") - owner = remote_url.split("/")[-2] - return f"{version} [dim](GitHub: {owner}/{repo_name})[/dim]" - return f"{version} [dim](Git: {remote_url})[/dim]" - except (subprocess.TimeoutExpired, FileNotFoundError, OSError): - pass - - # Default: installed from PyPI - return f"{version} [dim](PyPI)[/dim]" - - except ImportError: + if not info["installed"]: return t("version_not_installed") + # Get version from package metadata (prefer sglang-kt) + version = get_installed_package_version("sglang-kt") + if not version: + version = get_installed_package_version("sglang") + if not version: + version = info.get("version") or "unknown" + + # Determine source label + if info.get("is_kvcache_fork"): + if info["from_source"] and info.get("git_info"): + git_remote = info["git_info"].get("remote", "") + return f"{version} [dim](Source: {git_remote})[/dim]" + elif info["editable"]: + return f"{version} [dim](editable)[/dim]" + else: + return f"{version} [dim](sglang-kt)[/dim]" + elif info["from_source"]: + if info.get("git_info"): + git_remote = info["git_info"].get("remote", "") + return f"{version} [dim](Source: {git_remote})[/dim]" + return f"{version} [dim](source)[/dim]" + else: + return f"{version} [dim](PyPI)[/dim]" + def version( verbose: bool = typer.Option(False, "--verbose", "-v", help="Show detailed version info"), diff --git a/kt-kernel/python/cli/i18n.py b/kt-kernel/python/cli/i18n.py index 9dbe1c9..57a5d6d 100644 --- a/kt-kernel/python/cli/i18n.py +++ b/kt-kernel/python/cli/i18n.py @@ -37,7 +37,7 @@ MESSAGES: dict[str, dict[str, str]] = { "version_cuda_not_found": "Not found", "version_kt_kernel": "kt-kernel", "version_ktransformers": "ktransformers", - "version_sglang": "sglang", + "version_sglang": "sglang-kt", "version_llamafactory": "llamafactory", "version_not_installed": "Not installed", # Install command @@ -300,10 +300,10 @@ MESSAGES: dict[str, dict[str, str]] = { "completion_next_session": "Completion will be automatically enabled in new terminal sessions.", # SGLang "sglang_not_found": "SGLang not found", - "sglang_pypi_warning": "SGLang from PyPI may not be compatible with kt-kernel", - "sglang_pypi_hint": 'SGLang from PyPI may not be compatible. Install from source: git clone https://github.com/kvcache-ai/sglang && cd sglang && pip install -e "python[all]"', - "sglang_install_hint": 'Install SGLang: git clone https://github.com/kvcache-ai/sglang && cd sglang && pip install -e "python[all]"', - "sglang_recommend_source": 'Recommend reinstalling from source: git clone https://github.com/kvcache-ai/sglang && cd sglang && pip install -e "python[all]"', + "sglang_pypi_warning": "SGLang from PyPI may not be compatible with kt-kernel. Use sglang-kt instead: pip install sglang-kt", + "sglang_pypi_hint": "SGLang from PyPI may not be compatible. Install the kvcache-ai fork: pip install sglang-kt (or run ./install.sh from ktransformers root)", + "sglang_install_hint": "Install SGLang: pip install sglang-kt (or run ./install.sh from ktransformers root)", + "sglang_recommend_source": "Recommend reinstalling with the kvcache-ai fork: pip uninstall sglang -y && pip install sglang-kt", "sglang_kt_kernel_not_supported": "SGLang does not support kt-kernel (missing --kt-gpu-prefill-token-threshold parameter)", "sglang_checking_kt_kernel_support": "Checking SGLang kt-kernel support...", "sglang_kt_kernel_supported": "SGLang kt-kernel support verified", @@ -657,7 +657,7 @@ MESSAGES: dict[str, dict[str, str]] = { "version_cuda_not_found": "未找到", "version_kt_kernel": "kt-kernel", "version_ktransformers": "ktransformers", - "version_sglang": "sglang", + "version_sglang": "sglang-kt", "version_llamafactory": "llamafactory", "version_not_installed": "未安装", # Install command @@ -920,10 +920,10 @@ MESSAGES: dict[str, dict[str, str]] = { "completion_next_session": "新的终端会话将自动启用补全。", # SGLang "sglang_not_found": "未找到 SGLang", - "sglang_pypi_warning": "PyPI 版本的 SGLang 可能与 kt-kernel 不兼容", - "sglang_pypi_hint": 'PyPI 版本可能不兼容。从源码安装: git clone https://github.com/kvcache-ai/sglang && cd sglang && pip install -e "python[all]"', - "sglang_install_hint": '安装 SGLang: git clone https://github.com/kvcache-ai/sglang && cd sglang && pip install -e "python[all]"', - "sglang_recommend_source": '建议从源码重新安装: git clone https://github.com/kvcache-ai/sglang && cd sglang && pip install -e "python[all]"', + "sglang_pypi_warning": "PyPI 版本的 SGLang 可能与 kt-kernel 不兼容。请使用 sglang-kt: pip install sglang-kt", + "sglang_pypi_hint": "PyPI 版本可能不兼容。安装 kvcache-ai 分支: pip install sglang-kt (或在 ktransformers 根目录运行 ./install.sh)", + "sglang_install_hint": "安装 SGLang: pip install sglang-kt (或在 ktransformers 根目录运行 ./install.sh)", + "sglang_recommend_source": "建议重新安装 kvcache-ai 分支: pip uninstall sglang -y && pip install sglang-kt", "sglang_kt_kernel_not_supported": "SGLang 不支持 kt-kernel (缺少 --kt-gpu-prefill-token-threshold 参数)", "sglang_checking_kt_kernel_support": "正在检查 SGLang kt-kernel 支持...", "sglang_kt_kernel_supported": "SGLang kt-kernel 支持已验证", diff --git a/kt-kernel/python/cli/utils/sglang_checker.py b/kt-kernel/python/cli/utils/sglang_checker.py index 604098b..e49be64 100644 --- a/kt-kernel/python/cli/utils/sglang_checker.py +++ b/kt-kernel/python/cli/utils/sglang_checker.py @@ -38,15 +38,25 @@ def check_sglang_installation() -> dict: editable = False git_info = None from_source = False + is_kvcache_fork = False # True if installed as sglang-kt package try: - # Get pip show output + # Get pip show output (try sglang-kt first, then sglang) result = subprocess.run( - [sys.executable, "-m", "pip", "show", "sglang"], + [sys.executable, "-m", "pip", "show", "sglang-kt"], capture_output=True, text=True, timeout=10, ) + if result.returncode == 0: + is_kvcache_fork = True # sglang-kt package name proves it's the fork + else: + result = subprocess.run( + [sys.executable, "-m", "pip", "show", "sglang"], + capture_output=True, + text=True, + timeout=10, + ) if result.returncode == 0: pip_info = {} @@ -128,6 +138,7 @@ def check_sglang_installation() -> dict: "editable": editable, "git_info": git_info, "from_source": from_source, + "is_kvcache_fork": is_kvcache_fork, } except ImportError: return { @@ -137,6 +148,7 @@ def check_sglang_installation() -> dict: "editable": False, "git_info": None, "from_source": False, + "is_kvcache_fork": False, } @@ -158,20 +170,19 @@ def get_sglang_install_instructions(lang: Optional[str] = None) -> str: return """ [bold yellow]SGLang \u672a\u5b89\u88c5[/bold yellow] -\u8bf7\u6309\u7167\u4ee5\u4e0b\u6b65\u9aa4\u5b89\u88c5 SGLang: +\u8bf7\u9009\u62e9\u4ee5\u4e0b\u65b9\u5f0f\u4e4b\u4e00\u5b89\u88c5 SGLang (kvcache-ai \u5206\u652f): -[bold]1. \u514b\u9686\u4ed3\u5e93:[/bold] - git clone https://github.com/kvcache-ai/sglang.git - cd sglang +[bold]\u65b9\u5f0f A - \u4e00\u952e\u5b89\u88c5 (\u63a8\u8350):[/bold] + \u4ece ktransformers \u6839\u76ee\u5f55\u8fd0\u884c: + [cyan]./install.sh[/cyan] -[bold]2. \u5b89\u88c5 (\u4e8c\u9009\u4e00):[/bold] +[bold]\u65b9\u5f0f B - pip \u5b89\u88c5:[/bold] + [cyan]pip install sglang-kt[/cyan] - [cyan]\u65b9\u5f0f A - pip \u5b89\u88c5 (\u63a8\u8350):[/cyan] - pip install -e "python[all]" - - [cyan]\u65b9\u5f0f B - uv \u5b89\u88c5 (\u66f4\u5feb):[/cyan] - pip install uv - uv pip install -e "python[all]" +[bold]\u65b9\u5f0f C - \u4ece\u6e90\u7801\u5b89\u88c5:[/bold] + git clone --recursive https://github.com/kvcache-ai/ktransformers.git + cd ktransformers + pip install "third_party/sglang/python[all]" [dim]\u6ce8\u610f: \u8bf7\u786e\u4fdd\u5728\u6b63\u786e\u7684 Python \u73af\u5883\u4e2d\u6267\u884c\u4ee5\u4e0a\u547d\u4ee4[/dim] """ @@ -179,20 +190,19 @@ def get_sglang_install_instructions(lang: Optional[str] = None) -> str: return """ [bold yellow]SGLang is not installed[/bold yellow] -Please follow these steps to install SGLang: +Install SGLang (kvcache-ai fork) using one of these methods: -[bold]1. Clone the repository:[/bold] - git clone https://github.com/kvcache-ai/sglang.git - cd sglang +[bold]Option A - One-click install (recommended):[/bold] + From the ktransformers root directory, run: + [cyan]./install.sh[/cyan] -[bold]2. Install (choose one):[/bold] +[bold]Option B - pip install:[/bold] + [cyan]pip install sglang-kt[/cyan] - [cyan]Option A - pip install (recommended):[/cyan] - pip install -e "python[all]" - - [cyan]Option B - uv install (faster):[/cyan] - pip install uv - uv pip install -e "python[all]" +[bold]Option C - From source:[/bold] + git clone --recursive https://github.com/kvcache-ai/ktransformers.git + cd ktransformers + pip install "third_party/sglang/python[all]" [dim]Note: Make sure to run these commands in the correct Python environment[/dim] """ @@ -369,17 +379,18 @@ def print_sglang_kt_kernel_instructions() -> None: 您当前安装的 SGLang 不包含 kt-kernel 支持。 kt-kernel 需要使用 kvcache-ai 维护的 SGLang 分支。 -[bold]请按以下步骤重新安装 SGLang:[/bold] +[bold]请按以下步骤重新安装:[/bold] [cyan]1. 卸载当前的 SGLang:[/cyan] pip uninstall sglang -y -[cyan]2. 克隆 kvcache-ai 的 SGLang 仓库:[/cyan] - git clone https://github.com/kvcache-ai/sglang.git - cd sglang +[cyan]2. 安装 kvcache-ai 版本 (选择一种方式):[/cyan] -[cyan]3. 安装 SGLang:[/cyan] - pip install -e "python[all]" + [bold]方式 A - 一键安装 (推荐):[/bold] + 从 ktransformers 根目录运行: ./install.sh + + [bold]方式 B - pip 安装:[/bold] + pip install sglang-kt [dim]注意: 请确保在正确的 Python 环境中执行以上命令[/dim] """ @@ -390,17 +401,18 @@ kt-kernel 需要使用 kvcache-ai 维护的 SGLang 分支。 Your current SGLang installation does not include kt-kernel support. kt-kernel requires the kvcache-ai maintained fork of SGLang. -[bold]Please reinstall SGLang with the following steps:[/bold] +[bold]Please reinstall SGLang:[/bold] [cyan]1. Uninstall current SGLang:[/cyan] pip uninstall sglang -y -[cyan]2. Clone the kvcache-ai SGLang repository:[/cyan] - git clone https://github.com/kvcache-ai/sglang.git - cd sglang +[cyan]2. Install the kvcache-ai fork (choose one):[/cyan] -[cyan]3. Install SGLang:[/cyan] - pip install -e "python[all]" + [bold]Option A - One-click install (recommended):[/bold] + From the ktransformers root directory, run: ./install.sh + + [bold]Option B - pip install:[/bold] + pip install sglang-kt [dim]Note: Make sure to run these commands in the correct Python environment[/dim] """ diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..e32acdb --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,23 @@ +[build-system] +requires = ["setuptools>=61"] +build-backend = "setuptools.build_meta" + +[project] +name = "ktransformers" +dynamic = ["version", "dependencies"] +description = "KTransformers: CPU-GPU heterogeneous inference framework for LLMs" +readme = "README.md" +authors = [{ name = "kvcache-ai" }] +license = "Apache-2.0" +requires-python = ">=3.8" +classifiers = [ + "Programming Language :: Python :: 3", + "Operating System :: POSIX :: Linux", +] + +[project.urls] +Homepage = "https://github.com/kvcache-ai/ktransformers" + +[tool.setuptools] +# No actual Python packages — this is a meta-package +packages = [] diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..5bc2874 --- /dev/null +++ b/setup.py @@ -0,0 +1,16 @@ +"""Meta-package: pip install ktransformers → installs kt-kernel + sglang-kt.""" +from pathlib import Path +from setuptools import setup + +_version_file = Path(__file__).resolve().parent / "version.py" +_ns = {} +exec(_version_file.read_text(), _ns) +_v = _ns["__version__"] + +setup( + version=_v, + install_requires=[ + f"kt-kernel=={_v}", + f"sglang-kt=={_v}", + ], +) diff --git a/third_party/sglang b/third_party/sglang new file mode 160000 index 0000000..6b8b5f4 --- /dev/null +++ b/third_party/sglang @@ -0,0 +1 @@ +Subproject commit 6b8b5f4649c18afac6a5491bdfa69cf6746d714a diff --git a/version.py b/version.py index 08409ce..a294ce1 100644 --- a/version.py +++ b/version.py @@ -3,4 +3,4 @@ KTransformers version information. Shared across kt-kernel and kt-sft modules. """ -__version__ = "0.5.1" +__version__ = "0.5.2.post1"