Fix/sglang kt detection (#1875)

* [feat]: simplify sglang installation with submodule, auto-sync CI, and version alignment - Add kvcache-ai/sglang as git submodule at third_party/sglang (branch = main) - Add top-level install.sh for one-click source installation (sglang + kt-kernel) - Add sglang-kt as hard dependency in kt-kernel/pyproject.toml - Add CI workflow to auto-sync sglang submodule daily and create PR - Add CI workflow to build and publish sglang-kt to PyPI - Integrate sglang-kt build into release-pypi.yml (version.py bump publishes both packages) - Align sglang-kt version with ktransformers via SGLANG_KT_VERSION env var injection - Update Dockerfile to use submodule and inject aligned version - Update all 13 doc files, CLI hints, and i18n strings to reference new install methods Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * [build]: bump version to 0.5.2 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * [build]: rename PyPI package from kt-kernel to ktransformers Users can now `pip install ktransformers` to get everything (sglang-kt is auto-installed as a dependency). Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * Revert "[build]: rename PyPI package from kt-kernel to ktransformers" This reverts commit e0cbbf6364. * [build]: add ktransformers meta-package for PyPI `pip install ktransformers` now works as a single install command. It pulls kt-kernel (which in turn pulls sglang-kt). Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * [fix]: show sglang-kt package version in kt version command - Prioritize sglang-kt package version (aligned with ktransformers) over sglang internal __version__ - Update display name from "sglang" to "sglang-kt" Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * [fix]: improve sglang-kt detection in kt doctor and kt version Recognize sglang-kt package name as proof of kvcache-ai fork installation. Previously both commands fell through to "PyPI (not recommended)" for non-editable local source installs. Now version.py reuses the centralized check_sglang_installation() logic. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * [build]: bump version to 0.5.2.post1 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-14 18:37:23 +00:00 · 2026-03-04 16:54:48 +08:00
parent 9e69fccb02
commit 15c624dcae
29 changed files with 787 additions and 179 deletions
--- a/.github/workflows/release-pypi.yml
+++ b/.github/workflows/release-pypi.yml
@@ -21,6 +21,58 @@ permissions:
  contents: read

 jobs:
+  # ── sglang-kt (must be on PyPI before users can pip install kt-kernel) ──
+  build-and-publish-sglang-kt:
+    name: Build & publish sglang-kt
+    runs-on: [self-hosted, linux, x64]
+    if: github.repository == 'kvcache-ai/ktransformers' && github.ref == 'refs/heads/main'
+    environment: prod
+    permissions:
+      id-token: write
+      contents: read
+
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+        with:
+          submodules: recursive
+
+      - name: Set up Python
+        uses: actions/setup-python@v4
+        with:
+          python-version: '3.12'
+
+      - name: Install build tools
+        run: |
+          python -m pip install --upgrade pip
+          pip install build wheel setuptools twine
+
+      - name: Build sglang-kt wheel
+        working-directory: third_party/sglang/python
+        run: |
+          KT_VERSION=$(python3 -c "exec(open('${{ github.workspace }}/version.py').read()); print(__version__)")
+          export SGLANG_KT_VERSION="$KT_VERSION"
+          echo "Building sglang-kt v${KT_VERSION} wheel..."
+          python -m build --wheel -v
+          ls dist/ | grep -q "sglang_kt" || (echo "ERROR: Wheel name does not contain sglang_kt" && exit 1)
+
+      - name: Publish sglang-kt to PyPI
+        if: github.event.inputs.test_pypi != 'true'
+        env:
+          TWINE_USERNAME: __token__
+          TWINE_PASSWORD: ${{ secrets.PYPI_API_TOKEN }}
+        run: |
+          python -m twine upload --skip-existing --verbose third_party/sglang/python/dist/*.whl
+
+      - name: Publish sglang-kt to TestPyPI (if requested)
+        if: github.event.inputs.test_pypi == 'true'
+        env:
+          TWINE_USERNAME: __token__
+          TWINE_PASSWORD: ${{ secrets.TEST_PYPI_API_TOKEN }}
+        run: |
+          python -m twine upload --repository testpypi --skip-existing --verbose third_party/sglang/python/dist/*.whl
+
+  # ── kt-kernel ──
  build-kt-kernel:
    name: Build kt-kernel (Python ${{ matrix.python-version }})
    runs-on: [self-hosted, linux, x64, gpu]
@@ -124,8 +176,8 @@ jobs:
          retention-days: 7

  publish-pypi:
-    name: Publish to PyPI
-    needs: [build-kt-kernel]
+    name: Publish kt-kernel to PyPI
+    needs: [build-and-publish-sglang-kt, build-kt-kernel]
    runs-on: [self-hosted, linux, x64]
    if: github.repository == 'kvcache-ai/ktransformers' && github.ref == 'refs/heads/main'
    environment: prod
--- a/.github/workflows/release-sglang-kt.yml
+++ b/.github/workflows/release-sglang-kt.yml
@@ -0,0 +1,130 @@
+name: Release sglang-kt to PyPI
+
+on:
+  push:
+    branches:
+      - main
+    paths:
+      - "third_party/sglang"
+      - "version.py"
+  workflow_dispatch:
+    inputs:
+      test_pypi:
+        description: 'Publish to TestPyPI instead of PyPI (for testing)'
+        required: false
+        default: 'false'
+        type: choice
+        options:
+          - 'true'
+          - 'false'
+
+permissions:
+  contents: read
+
+jobs:
+  build-sglang-kt:
+    name: Build sglang-kt wheel
+    runs-on: [self-hosted, linux, x64]
+
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+        with:
+          submodules: recursive
+
+      - name: Set up Python
+        uses: actions/setup-python@v4
+        with:
+          python-version: '3.12'
+
+      - name: Install build tools
+        run: |
+          python -m pip install --upgrade pip
+          pip install build wheel setuptools
+
+      - name: Build sglang-kt wheel
+        working-directory: third_party/sglang/python
+        run: |
+          # Read version from ktransformers version.py
+          KT_VERSION=$(python3 -c "exec(open('${{ github.workspace }}/version.py').read()); print(__version__)")
+          export SGLANG_KT_VERSION="$KT_VERSION"
+          echo "Building sglang-kt v${KT_VERSION} wheel..."
+          python -m build --wheel -v
+
+      - name: Verify wheel
+        working-directory: third_party/sglang/python
+        run: |
+          echo "Generated wheel:"
+          ls -lh dist/
+          # Verify the wheel has the correct package name
+          ls dist/ | grep -q "sglang_kt" || (echo "ERROR: Wheel name does not contain sglang_kt" && exit 1)
+          echo "Wheel name verified."
+
+      - name: Upload artifact
+        uses: actions/upload-artifact@v4
+        with:
+          name: sglang-kt-wheel
+          path: third_party/sglang/python/dist/*.whl
+          retention-days: 7
+
+  publish-pypi:
+    name: Publish sglang-kt to PyPI
+    needs: [build-sglang-kt]
+    runs-on: [self-hosted, linux, x64]
+    if: github.repository == 'kvcache-ai/ktransformers' && github.ref == 'refs/heads/main'
+    environment: prod
+    permissions:
+      id-token: write
+      contents: read
+
+    steps:
+      - name: Download wheel artifact
+        uses: actions/download-artifact@v4
+        with:
+          name: sglang-kt-wheel
+          path: dist/
+
+      - name: Display wheels
+        run: |
+          echo "Wheels to publish:"
+          ls -lh dist/
+
+      - name: Install twine
+        run: |
+          python -m pip install --upgrade pip
+          pip install twine
+
+      - name: Publish to TestPyPI (if requested)
+        if: github.event.inputs.test_pypi == 'true'
+        env:
+          TWINE_USERNAME: __token__
+          TWINE_PASSWORD: ${{ secrets.TEST_PYPI_API_TOKEN }}
+        run: |
+          python -m twine upload \
+            --repository testpypi \
+            --skip-existing \
+            --verbose \
+            dist/*.whl
+
+      - name: Publish to PyPI
+        if: github.event.inputs.test_pypi != 'true'
+        env:
+          TWINE_USERNAME: __token__
+          TWINE_PASSWORD: ${{ secrets.PYPI_API_TOKEN }}
+        run: |
+          python -m twine upload \
+            --skip-existing \
+            --verbose \
+            dist/*.whl
+
+      - name: Create release summary
+        run: |
+          echo "## sglang-kt Published to PyPI" >> $GITHUB_STEP_SUMMARY
+          echo "" >> $GITHUB_STEP_SUMMARY
+          echo "### Installation" >> $GITHUB_STEP_SUMMARY
+          echo '```bash' >> $GITHUB_STEP_SUMMARY
+          echo "pip install sglang-kt" >> $GITHUB_STEP_SUMMARY
+          echo '```' >> $GITHUB_STEP_SUMMARY
+          echo "" >> $GITHUB_STEP_SUMMARY
+          echo "This is the kvcache-ai fork of SGLang with kt-kernel support." >> $GITHUB_STEP_SUMMARY
+          echo "PyPI link: https://pypi.org/project/sglang-kt/" >> $GITHUB_STEP_SUMMARY
--- a/.github/workflows/sync-sglang-submodule.yml
+++ b/.github/workflows/sync-sglang-submodule.yml
@@ -0,0 +1,81 @@
+name: Sync sglang submodule
+
+on:
+  schedule:
+    # Run daily at 08:00 UTC
+    - cron: "0 8 * * *"
+  workflow_dispatch:
+
+permissions:
+  contents: write
+  pull-requests: write
+
+jobs:
+  sync:
+    name: Check for sglang-kt updates
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+        with:
+          submodules: true
+          fetch-depth: 0
+          token: ${{ secrets.GITHUB_TOKEN }}
+
+      - name: Update sglang submodule to latest main
+        id: update
+        run: |
+          OLD_SHA=$(git -C third_party/sglang rev-parse HEAD)
+          git submodule update --remote third_party/sglang
+          NEW_SHA=$(git -C third_party/sglang rev-parse HEAD)
+
+          echo "old_sha=$OLD_SHA" >> "$GITHUB_OUTPUT"
+          echo "new_sha=$NEW_SHA" >> "$GITHUB_OUTPUT"
+
+          if [ "$OLD_SHA" = "$NEW_SHA" ]; then
+            echo "changed=false" >> "$GITHUB_OUTPUT"
+            echo "sglang submodule is already up to date ($OLD_SHA)"
+          else
+            echo "changed=true" >> "$GITHUB_OUTPUT"
+
+            # Collect commit log between old and new
+            COMMITS=$(git -C third_party/sglang log --oneline "$OLD_SHA..$NEW_SHA" | head -20)
+            echo "commits<<EOF" >> "$GITHUB_OUTPUT"
+            echo "$COMMITS" >> "$GITHUB_OUTPUT"
+            echo "EOF" >> "$GITHUB_OUTPUT"
+
+            # sglang-kt version = ktransformers version (from version.py)
+            VERSION=$(python3 -c "exec(open('version.py').read()); print(__version__)" 2>/dev/null || echo "unknown")
+            echo "version=$VERSION" >> "$GITHUB_OUTPUT"
+
+            echo "sglang submodule updated: $OLD_SHA -> $NEW_SHA (v$VERSION)"
+          fi
+
+      - name: Create pull request
+        if: steps.update.outputs.changed == 'true'
+        uses: peter-evans/create-pull-request@v6
+        with:
+          token: ${{ secrets.GITHUB_TOKEN }}
+          commit-message: |
+            [build]: sync sglang submodule to ${{ steps.update.outputs.new_sha }}
+          branch: auto/sync-sglang
+          delete-branch: true
+          title: "[build] Sync sglang-kt submodule (v${{ steps.update.outputs.version }})"
+          body: |
+            Automated sync of `third_party/sglang` submodule to latest `main`.
+
+            **Old ref:** `${{ steps.update.outputs.old_sha }}`
+            **New ref:** `${{ steps.update.outputs.new_sha }}`
+            **sglang-kt version:** `${{ steps.update.outputs.version }}`
+
+            ### Commits included
+            ```
+            ${{ steps.update.outputs.commits }}
+            ```
+
+            ---
+            *This PR was created automatically by the [sync-sglang-submodule](${{ github.server_url }}/${{ github.repository }}/actions/workflows/sync-sglang-submodule.yml) workflow.*
+          labels: |
+            dependencies
+            automated
--- a/.gitmodules
+++ b/.gitmodules
@@ -8,3 +8,7 @@
 	path = third_party/custom_flashinfer
 	url = https://github.com/kvcache-ai/custom_flashinfer.git
 	branch = fix-precision-mla-merge-main
+[submodule "third_party/sglang"]
+	path = third_party/sglang
+	url = https://github.com/kvcache-ai/sglang.git
+	branch = main
--- a/doc/en/Kimi-K2-Thinking.md
+++ b/doc/en/Kimi-K2-Thinking.md
@@ -5,11 +5,17 @@ Please Note This is Quantization Deployment. For Native Kimi K2 Thinking deploym

 Step 1: Install SGLang

-Follow the [official SGLang installation](https://docs.sglang.ai/get_started/install.html) guide to install SGLang:
-```
-pip install "sglang[all]"
+Install the kvcache-ai fork of SGLang (one of):
+```bash
+# Option A: One-click install (from ktransformers root)
+./install.sh
+
+# Option B: pip install
+pip install sglang-kt
 ```

+> **Important:** Use `sglang-kt` (kvcache-ai fork), not the official `sglang` package. Run `pip uninstall sglang` first if you have the official version installed.
+
 Step 2: Install KTransformers CPU Kernels

 The KTransformers CPU kernels (kt-kernel) provide AMX-optimized computation for hybrid inference, for detailed installation instructions and troubleshooting, refer to the official [kt-kernel installation guide](https://github.com/kvcache-ai/ktransformers/blob/main/kt-kernel/README.md).
--- a/doc/en/Kimi-K2.5.md
+++ b/doc/en/Kimi-K2.5.md
@@ -32,16 +32,17 @@ git submodule update --init --recursive
 cd kt-kernel && ./install.sh
 ```

-2. **SGLang installed** - Follow [SGLang integration steps](./kt-kernel_intro.md#integration-with-sglang)
+2. **SGLang installed** - Install the kvcache-ai fork of SGLang (one of):

-Note: Currently, please clone our custom SGLang repository:
+```bash
+# Option A: One-click install (from ktransformers root)
+./install.sh

+# Option B: pip install
+pip install sglang-kt
 ```
-git clone https://github.com/kvcache-ai/sglang.git
-cd sglang && pip install -e "python[all]"
-// maybe need to reinstall cudnn according to the issue when launching SGLang
-// pip install nvidia-cudnn-cu12==9.16.0.29
-```
+
+> Note: You may need to reinstall cudnn: `pip install nvidia-cudnn-cu12==9.16.0.29`

 3. **CUDA toolkit** - Compatible with your GPU (CUDA 12.8+ recommended)
 4. **Hugging Face CLI** - For downloading models:
--- a/doc/en/MiniMax-M2.5.md
+++ b/doc/en/MiniMax-M2.5.md
@@ -30,16 +30,17 @@ git submodule update --init --recursive
 cd kt-kernel && ./install.sh
 ```

-2. **SGLang installed** - Follow [SGLang integration steps](./kt-kernel_intro.md#integration-with-sglang)
+2. **SGLang installed** - Install the kvcache-ai fork of SGLang (one of):

-Note: Currently, please clone our custom SGLang repository:
+```bash
+# Option A: One-click install (from ktransformers root)
+./install.sh

+# Option B: pip install
+pip install sglang-kt
 ```
-git clone https://github.com/kvcache-ai/sglang.git
-cd sglang && pip install -e "python[all]"
-// maybe need to reinstall cudnn according to the issue when launching SGLang
-// pip install nvidia-cudnn-cu12==9.16.0.29
-```
+
+> Note: You may need to reinstall cudnn: `pip install nvidia-cudnn-cu12==9.16.0.29`

 3. **CUDA toolkit** - Compatible with your GPU (CUDA 12.8+ recommended)
 4. **Hugging Face CLI** - For downloading models:
--- a/doc/en/Qwen3.5.md
+++ b/doc/en/Qwen3.5.md
@@ -36,18 +36,18 @@ git submodule update --init --recursive
 cd kt-kernel && ./install.sh
 ```

-2. **SGLang installed** - Follow [SGLang integration steps](./kt-kernel_intro.md#integration-with-sglang)
-
-Note: Currently, please clone our custom SGLang repository:
+2. **SGLang installed** - Install the kvcache-ai fork of SGLang (one of):

 ```bash
-git clone https://github.com/kvcache-ai/sglang.git
-git checkout qwen3.5
-cd sglang && pip install -e "python[all]"
-# Maybe need to reinstall cudnn according to the issue when launching SGLang
-pip install nvidia-cudnn-cu12==9.16.0.29
+# Option A: One-click install (from ktransformers root)
+./install.sh
+
+# Option B: pip install
+pip install sglang-kt
 ```

+> Note: You may need to reinstall cudnn: `pip install nvidia-cudnn-cu12==9.16.0.29`
+
 3. **CUDA toolkit** - Compatible with your GPU (CUDA 12.8+ recommended)
 4. **Hugging Face CLI** - For downloading models:

--- a/doc/en/SFT_Installation_Guide_KimiK2.5.md
+++ b/doc/en/SFT_Installation_Guide_KimiK2.5.md
@@ -65,10 +65,11 @@ cd kt-kernel && ./install.sh
 **Recommended for Kimi-K2.5:**

 ```bash
-git clone https://github.com/kvcache-ai/sglang.git
-cd sglang
-git checkout kimi_k2.5
-pip install -e "python[all]"
+# Option A: One-click install (from ktransformers root, installs sglang + kt-kernel)
+./install.sh
+
+# Option B: pip install
+pip install sglang-kt
 ```

 ### 0.3 Training Environment: `kt-sft`
--- a/doc/en/kt-kernel/GLM-5-Tutorial.md
+++ b/doc/en/kt-kernel/GLM-5-Tutorial.md
@@ -19,15 +19,15 @@ Before starting, ensure you have:

 1. **SGLang installed**

-    Note: Currently, please clone our custom SGLang repository:
+    Install the kvcache-ai fork of SGLang (one of):

    ```bash
-    git clone https://github.com/kvcache-ai/sglang.git
-    cd sglang
-    pip install -e "python[all]"
-    ```
+    # Option A: One-click install (from ktransformers root)
+    ./install.sh

-    You can follow [SGLang integration steps](https://docs.sglang.io/get_started/install.html)
+    # Option B: pip install
+    pip install sglang-kt
+    ```

 2. **KT-Kernel installed**

--- a/doc/en/kt-kernel/Kimi-K2-Thinking-Native.md
+++ b/doc/en/kt-kernel/Kimi-K2-Thinking-Native.md
@@ -30,14 +30,14 @@ This tutorial demonstrates how to run Kimi-K2 model inference using SGLang integ
 Before starting, ensure you have:

 1. **KT-Kernel installed** - Follow the [installation guide](./kt-kernel_intro.md#installation)
-2. **SGLang installed** - Follow [SGLang integration steps](./kt-kernel_intro.md#integration-with-sglang)
+2. **SGLang installed** - Install the kvcache-ai fork of SGLang (one of):

-Note: Currently, please clone our custom SGLang repository:
+```bash
+# Option A: One-click install (from ktransformers root)
+./install.sh

-```
-git clone https://github.com/kvcache-ai/sglang.git
-cd sglang
-pip install -e "python[all]"
+# Option B: pip install
+pip install sglang-kt
 ```

 3. **CUDA toolkit** - Compatible with your GPU (CUDA 11.8+ recommended)
--- a/doc/en/kt-kernel/MiniMax-M2.1-Tutorial.md
+++ b/doc/en/kt-kernel/MiniMax-M2.1-Tutorial.md
@@ -42,17 +42,17 @@ This tutorial demonstrates how to run MiniMax-M2.1 model inference using SGLang

 Before starting, ensure you have:

-1. **SGLang installed** 
+1. **SGLang installed**

-    Note: Currently, please clone our custom SGLang repository:
+    Install the kvcache-ai fork of SGLang (one of):

    ```bash
-    git clone https://github.com/kvcache-ai/sglang.git
-    cd sglang
-    pip install -e "python[all]"
-    ```
+    # Option A: One-click install (from ktransformers root)
+    ./install.sh

-    You can follow [SGLang integration steps](https://docs.sglang.io/get_started/install.html)
+    # Option B: pip install
+    pip install sglang-kt
+    ```

 2. **KT-Kernel installed**

--- a/doc/en/kt-kernel/Native-Precision-Tutorial.md
+++ b/doc/en/kt-kernel/Native-Precision-Tutorial.md
@@ -63,12 +63,14 @@ Before starting, ensure you have:

 1. **SGLang installed**

-    Clone and install the custom SGLang repository:
+    Install the kvcache-ai fork of SGLang (one of):

    ```bash
-    git clone https://github.com/kvcache-ai/sglang.git
-    cd sglang
-    pip install -e "python[all]"
+    # Option A: One-click install (from ktransformers root)
+    ./install.sh
+
+    # Option B: pip install
+    pip install sglang-kt
    ```

 2. **KT-Kernel installed**
--- a/doc/en/kt-kernel/Qwen3-Coder-Next-Tutorial.md
+++ b/doc/en/kt-kernel/Qwen3-Coder-Next-Tutorial.md
@@ -32,15 +32,15 @@ Before starting, ensure you have:

 1. **SGLang installed**

-    Note: Currently, please clone our custom SGLang repository:
+    Install the kvcache-ai fork of SGLang (one of):

    ```bash
-    git clone https://github.com/kvcache-ai/sglang.git
-    cd sglang
-    pip install -e "python[all]"
-    ```
+    # Option A: One-click install (from ktransformers root)
+    ./install.sh

-    You can follow [SGLang integration steps](https://docs.sglang.io/get_started/install.html)
+    # Option B: pip install
+    pip install sglang-kt
+    ```

 2. **KT-Kernel installed**

--- a/doc/en/kt-kernel/deepseek-v3.2-sglang-tutorial.md
+++ b/doc/en/kt-kernel/deepseek-v3.2-sglang-tutorial.md
@@ -30,7 +30,7 @@ This tutorial demonstrates how to run DeepSeek V3.2 model inference using SGLang
 Before starting, ensure you have:

 1. **KT-Kernel installed** - Follow the [installation guide](./kt-kernel_intro.md#installation)
-2. **SGLang installed** - Follow [SGLang integration steps](./kt-kernel_intro.md#integration-with-sglang)
+2. **SGLang installed** - Install the kvcache-ai fork: `pip install sglang-kt` or run `./install.sh` from the ktransformers root
 3. **CUDA toolkit** - Compatible with your GPU (CUDA 11.8+ recommended)
 4. **Hugging Face CLI** - For downloading models:
   ```bash
--- a/doc/en/kt-kernel/experts-sched-Tutorial.md
+++ b/doc/en/kt-kernel/experts-sched-Tutorial.md
@@ -40,12 +40,14 @@ Before starting, ensure you have:

 1. **SGLang installed**

-    Note: Currently, please clone our custom SGLang repository:
+    Install the kvcache-ai fork of SGLang (one of):

    ```bash
-    git clone https://github.com/kvcache-ai/sglang.git
-    cd sglang
-    pip install -e "python[all]"
+    # Option A: One-click install (from ktransformers root)
+    ./install.sh
+
+    # Option B: pip install
+    pip install sglang-kt
    ```

 2. **KTransformers installed**
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -215,13 +215,10 @@ RUN /opt/miniconda3/envs/serve/bin/pip config set global.index-url https://mirro
        /opt/miniconda3/envs/fine-tune/bin/pip config set global.index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple; \
    fi

-# Clone repositories
-# Use kvcache-ai/sglang fork with kimi_k2 branch
-RUN git clone https://${GITHUB_ARTIFACTORY}/kvcache-ai/sglang.git /workspace/sglang \
-    && cd /workspace/sglang && git checkout kimi_k2
-
+# Clone repositories (sglang is included as a submodule in ktransformers)
 RUN git clone --depth 1 https://${GITHUB_ARTIFACTORY}/kvcache-ai/ktransformers.git /workspace/ktransformers \
    && cd /workspace/ktransformers && git submodule update --init --recursive \
+    && ln -s /workspace/ktransformers/third_party/sglang /workspace/sglang \
    && if [ "$FUNCTIONALITY" = "sft" ]; then \
        git clone --depth 1 https://${GITHUB_ARTIFACTORY}/hiyouga/LLaMA-Factory.git /workspace/LLaMA-Factory; \
    fi
@@ -262,7 +259,7 @@ RUN --mount=type=cache,target=/root/.cache/pip \
    ; \
    fi

-# Install SGLang in serve env
+# Install SGLang in serve env (version aligned with ktransformers)
 RUN --mount=type=cache,target=/root/.cache/pip \
    case "$CUDA_VERSION" in \
        12.6.1) CUINDEX=126 ;; \
@@ -270,6 +267,8 @@ RUN --mount=type=cache,target=/root/.cache/pip \
        12.9.1) CUINDEX=129 ;; \
        13.0.1) CUINDEX=130 ;; \
    esac \
+    && export SGLANG_KT_VERSION=$(python3 -c "exec(open('/workspace/ktransformers/version.py').read()); print(__version__)") \
+    && echo "Installing sglang-kt v${SGLANG_KT_VERSION}" \
    && cd /workspace/sglang \
    && /opt/miniconda3/envs/serve/bin/pip install -e "python[all]" --extra-index-url https://download.pytorch.org/whl/cu${CUINDEX}

@@ -404,18 +403,16 @@ RUN echo '\n# Conda environment aliases\nalias serve="conda activate serve"' >>

 # Extract versions from each component and save to versions.env
 RUN set -x && \
-    # SGLang version (from version.py file)
-    cd /workspace/sglang/python/sglang && \
-    SGLANG_VERSION=$(python3 -c "exec(open('version.py').read()); print(__version__)" 2>/dev/null || echo "unknown") && \
-    echo "SGLANG_VERSION=$SGLANG_VERSION" > /workspace/versions.env && \
-    echo "Extracted SGLang version: $SGLANG_VERSION" && \
-    \
-    # KTransformers version (from version.py in repo)
+    # KTransformers version (single source of truth for both kt-kernel and sglang-kt)
    cd /workspace/ktransformers && \
    KTRANSFORMERS_VERSION=$(python3 -c "exec(open('version.py').read()); print(__version__)" 2>/dev/null || echo "unknown") && \
-    echo "KTRANSFORMERS_VERSION=$KTRANSFORMERS_VERSION" >> /workspace/versions.env && \
+    echo "KTRANSFORMERS_VERSION=$KTRANSFORMERS_VERSION" > /workspace/versions.env && \
    echo "Extracted KTransformers version: $KTRANSFORMERS_VERSION" && \
    \
+    # sglang-kt version = ktransformers version (aligned)
+    echo "SGLANG_KT_VERSION=$KTRANSFORMERS_VERSION" >> /workspace/versions.env && \
+    echo "sglang-kt version (aligned): $KTRANSFORMERS_VERSION" && \
+    \
    # LLaMA-Factory version (from fine-tune environment, sft mode only)
    if [ "$FUNCTIONALITY" = "sft" ]; then \
        . /opt/miniconda3/etc/profile.d/conda.sh && conda activate fine-tune && \
--- a/install.sh
+++ b/install.sh
@@ -0,0 +1,259 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+# Resolve the repository root (directory containing this script)
+REPO_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+
+usage() {
+  cat <<EOF
+Usage: $0 [SUBCOMMAND] [OPTIONS]
+
+One-click installer for ktransformers (sglang + kt-kernel).
+
+SUBCOMMANDS:
+  all             Full install: submodules → sglang → kt-kernel (default)
+  sglang          Install sglang only
+  kt-kernel       Install kt-kernel only
+  deps            Install system dependencies only
+  -h, --help      Show this help message
+
+OPTIONS:
+  --skip-sglang       Skip sglang installation (for "all" subcommand)
+  --skip-kt-kernel    Skip kt-kernel installation (for "all" subcommand)
+  --editable          Install sglang in editable/dev mode (-e)
+  --manual            Pass through to kt-kernel (manual CPU config)
+  --no-clean          Pass through to kt-kernel (skip build clean)
+
+EXAMPLES:
+  # Full install (recommended)
+  $0
+
+  # Install everything in editable mode for development
+  $0 all --editable
+
+  # Install sglang only
+  $0 sglang
+
+  # Install kt-kernel only (manual CPU config)
+  $0 kt-kernel --manual
+
+  # Full install, skip sglang (already installed)
+  $0 all --skip-sglang
+
+EOF
+  exit 1
+}
+
+# ─── Helpers ───────────────────────────────────────────────────────────────────
+
+log_step() {
+  echo ""
+  echo "=========================================="
+  echo "  $1"
+  echo "=========================================="
+  echo ""
+}
+
+log_info() {
+  echo "[INFO] $1"
+}
+
+log_warn() {
+  echo "[WARN] $1"
+}
+
+log_error() {
+  echo "[ERROR] $1" >&2
+}
+
+# Read ktransformers version from version.py and export for sglang-kt
+read_kt_version() {
+  local version_file="$REPO_ROOT/version.py"
+  if [ -f "$version_file" ]; then
+    KT_VERSION=$(python3 -c "exec(open('$version_file').read()); print(__version__)")
+    export SGLANG_KT_VERSION="$KT_VERSION"
+    log_info "ktransformers version: $KT_VERSION (will be used for sglang-kt)"
+  else
+    log_warn "version.py not found; sglang-kt will use its default version"
+  fi
+}
+
+# ─── Submodule init ────────────────────────────────────────────────────────────
+
+init_submodules() {
+  log_step "Initializing git submodules"
+
+  if [ ! -d "$REPO_ROOT/.git" ]; then
+    log_warn "Not a git repository. Skipping submodule init."
+    log_warn "If you need sglang, clone with: git clone --recursive https://github.com/kvcache-ai/ktransformers.git"
+    return 0
+  fi
+
+  cd "$REPO_ROOT"
+  git submodule update --init --recursive
+  log_info "Submodules initialized successfully."
+}
+
+# ─── sglang install ───────────────────────────────────────────────────────────
+
+install_sglang() {
+  local editable="${1:-0}"
+
+  log_step "Installing sglang (kvcache-ai fork)"
+
+  local sglang_dir="$REPO_ROOT/third_party/sglang"
+  local pyproject="$sglang_dir/python/pyproject.toml"
+
+  if [ ! -f "$pyproject" ]; then
+    log_error "sglang source not found at $sglang_dir"
+    log_error "Run 'git submodule update --init --recursive' first, or clone with --recursive."
+    exit 1
+  fi
+
+  cd "$sglang_dir"
+
+  if [ "$editable" = "1" ]; then
+    log_info "Installing sglang in editable mode..."
+    pip install -e "./python[all]"
+  else
+    log_info "Installing sglang..."
+    pip install "./python[all]"
+  fi
+
+  log_info "sglang installed successfully."
+}
+
+# ─── kt-kernel install ────────────────────────────────────────────────────────
+
+install_kt_kernel() {
+  # Forward all remaining args to kt-kernel/install.sh
+  local kt_args=("$@")
+
+  log_step "Installing kt-kernel"
+
+  local kt_install="$REPO_ROOT/kt-kernel/install.sh"
+
+  if [ ! -f "$kt_install" ]; then
+    log_error "kt-kernel/install.sh not found at $kt_install"
+    exit 1
+  fi
+
+  cd "$REPO_ROOT/kt-kernel"
+  bash ./install.sh build "${kt_args[@]}"
+}
+
+# ─── deps install ─────────────────────────────────────────────────────────────
+
+install_deps() {
+  log_step "Installing system dependencies"
+
+  local kt_install="$REPO_ROOT/kt-kernel/install.sh"
+
+  if [ ! -f "$kt_install" ]; then
+    log_error "kt-kernel/install.sh not found at $kt_install"
+    exit 1
+  fi
+
+  cd "$REPO_ROOT/kt-kernel"
+  bash ./install.sh deps
+}
+
+# ─── "all" subcommand ─────────────────────────────────────────────────────────
+
+install_all() {
+  local skip_sglang=0
+  local skip_kt_kernel=0
+  local editable=0
+  local kt_args=()
+
+  while [[ $# -gt 0 ]]; do
+    case "$1" in
+      --skip-sglang)    skip_sglang=1; shift ;;
+      --skip-kt-kernel) skip_kt_kernel=1; shift ;;
+      --editable)       editable=1; shift ;;
+      --manual)         kt_args+=("--manual"); shift ;;
+      --no-clean)       kt_args+=("--no-clean"); shift ;;
+      -h|--help)        usage ;;
+      *)
+        log_error "Unknown option: $1"
+        usage
+        ;;
+    esac
+  done
+
+  # 1. Init submodules
+  init_submodules
+
+  # 2. System dependencies
+  install_deps
+
+  # 3. Read version for sglang-kt
+  read_kt_version
+
+  # 4. Install sglang
+  if [ "$skip_sglang" = "0" ]; then
+    install_sglang "$editable"
+  else
+    log_info "Skipping sglang installation (--skip-sglang)."
+  fi
+
+  # 4. Build & install kt-kernel
+  if [ "$skip_kt_kernel" = "0" ]; then
+    install_kt_kernel "${kt_args[@]}"
+  else
+    log_info "Skipping kt-kernel installation (--skip-kt-kernel)."
+  fi
+
+  log_step "Installation complete!"
+  echo "  Verify with: kt doctor"
+  echo ""
+}
+
+# ─── Subcommand dispatcher ────────────────────────────────────────────────────
+
+SUBCMD="all"
+if [[ $# -gt 0 ]]; then
+  case "$1" in
+    all|sglang|kt-kernel|deps)
+      SUBCMD="$1"
+      shift
+      ;;
+    -h|--help)
+      usage
+      ;;
+    -*)
+      # Flags without subcommand → default to "all"
+      SUBCMD="all"
+      ;;
+    *)
+      log_error "Unknown subcommand: $1"
+      usage
+      ;;
+  esac
+fi
+
+case "$SUBCMD" in
+  all)
+    install_all "$@"
+    ;;
+  sglang)
+    # Parse sglang-specific options
+    editable=0
+    while [[ $# -gt 0 ]]; do
+      case "$1" in
+        --editable) editable=1; shift ;;
+        -h|--help) usage ;;
+        *) log_error "Unknown option for sglang: $1"; usage ;;
+      esac
+    done
+    init_submodules
+    read_kt_version
+    install_sglang "$editable"
+    ;;
+  kt-kernel)
+    install_kt_kernel "$@"
+    ;;
+  deps)
+    install_deps
+    ;;
+esac
--- a/kt-kernel/README.md
+++ b/kt-kernel/README.md
@@ -262,12 +262,23 @@ KT-Kernel can be used standalone via [Direct Python API](#direct-python-api-usag

 #### 1. Install SGLang

+Install the kvcache-ai fork of SGLang (required for kt-kernel support):
+
 ```bash
-git clone https://github.com/sgl-project/sglang.git
-cd sglang
-pip install -e "python[all]"
+# Option A: One-click install (from ktransformers root, installs sglang + kt-kernel)
+./install.sh
+
+# Option B: pip install
+pip install sglang-kt
+
+# Option C: From source (editable mode)
+git clone --recursive https://github.com/kvcache-ai/ktransformers.git
+cd ktransformers
+pip install -e "third_party/sglang/python[all]"
 ```

+> **Important:** Use `sglang-kt` (kvcache-ai fork), not the official `sglang` package. If you have the official version installed, uninstall it first: `pip uninstall sglang -y`
+
 #### 2. Prepare Weights

 You need both GPU weights and CPU-side expert weights for heterogeneous inference. The exact format depends on the backend:
--- a/kt-kernel/README_zh.md
+++ b/kt-kernel/README_zh.md
@@ -115,12 +115,23 @@ KT-Kernel 可以单独通过 [Python API](#直接使用-python-api) 使用，也

 #### 1. 安装 SGLang

+安装 kvcache-ai 分支的 SGLang（kt-kernel 需要此分支）：
+
 ```bash
-git clone https://github.com/sgl-project/sglang.git
-cd sglang
-pip install -e "python[all]"
+# 方式 A: 一键安装（从 ktransformers 根目录，同时安装 sglang + kt-kernel）
+./install.sh
+
+# 方式 B: pip 安装
+pip install sglang-kt
+
+# 方式 C: 从源码安装（可编辑模式）
+git clone --recursive https://github.com/kvcache-ai/ktransformers.git
+cd ktransformers
+pip install -e "third_party/sglang/python[all]"
 ```

+> **重要:** 请使用 `sglang-kt`（kvcache-ai 分支），而非官方 `sglang` 包。如已安装官方版本，请先卸载：`pip uninstall sglang -y`
+
 #### 2. 准备权重

 要进行异构推理，需要同时准备 GPU 权重和 CPU 侧 experts 对应的权重，具体格式取决于后端类型：
--- a/kt-kernel/pyproject.toml
+++ b/kt-kernel/pyproject.toml
@@ -33,6 +33,8 @@ dependencies = [
  "pyyaml>=6.0",
  "httpx>=0.25.0",
  "packaging>=23.0",
+  # SGLang (kvcache-ai fork)
+  "sglang-kt",
  # Development dependencies
  "black>=25.9.0",
 ]
--- a/kt-kernel/python/cli/commands/doctor.py
+++ b/kt-kernel/python/cli/commands/doctor.py
@@ -369,7 +369,19 @@ def doctor(
    sglang_info = check_sglang_installation()

    if sglang_info["installed"]:
-        if sglang_info["from_source"]:
+        if sglang_info.get("is_kvcache_fork"):
+            # Package name is sglang-kt — this is definitively the kvcache-ai fork
+            if sglang_info["from_source"] and sglang_info["git_info"]:
+                git_remote = sglang_info["git_info"].get("remote", "unknown")
+                git_branch = sglang_info["git_info"].get("branch", "unknown")
+                sglang_source_value = f"sglang-kt (Source: {git_remote}, branch: {git_branch})"
+            elif sglang_info["editable"]:
+                sglang_source_value = "sglang-kt (editable)"
+            else:
+                sglang_source_value = "sglang-kt"
+            sglang_source_status = "ok"
+            sglang_source_hint = None
+        elif sglang_info["from_source"]:
            if sglang_info["git_info"]:
                git_remote = sglang_info["git_info"].get("remote", "unknown")
                git_branch = sglang_info["git_info"].get("branch", "unknown")
@@ -381,7 +393,7 @@ def doctor(
                sglang_source_status = "ok"
                sglang_source_hint = None
        else:
-            sglang_source_value = "PyPI (not recommended)"
+            sglang_source_value = "PyPI sglang (not kvcache-ai fork)"
            sglang_source_status = "warning"
            sglang_source_hint = t("sglang_pypi_hint")
    else:
@@ -411,7 +423,7 @@ def doctor(
        else:
            kt_kernel_value = t("sglang_kt_kernel_not_supported")
            kt_kernel_status = "error"
-            kt_kernel_hint = 'Reinstall SGLang from: git clone https://github.com/kvcache-ai/sglang && cd sglang && pip install -e "python[all]"'
+            kt_kernel_hint = "Reinstall SGLang: pip uninstall sglang -y && pip install sglang-kt (or run ./install.sh from ktransformers root)"
            issues_found = True

        checks.append(
--- a/kt-kernel/python/cli/commands/version.py
+++ b/kt-kernel/python/cli/commands/version.py
@@ -16,54 +16,38 @@ from kt_kernel.cli.utils.environment import detect_cuda_version, get_installed_p


 def _get_sglang_info() -> str:
-    """Get sglang version and installation source information."""
-    try:
-        import sglang
+    """Get sglang-kt version and installation source information."""
+    from kt_kernel.cli.utils.sglang_checker import check_sglang_installation

-        version = getattr(sglang, "__version__", None)
+    info = check_sglang_installation()

-        if not version:
-            version = get_installed_package_version("sglang")
-
-        if not version:
-            return t("version_not_installed")
-
-        # Try to detect installation source
-        from pathlib import Path
-        import subprocess
-
-        if hasattr(sglang, "__file__") and sglang.__file__:
-            location = Path(sglang.__file__).parent.parent
-            git_dir = location / ".git"
-
-            if git_dir.exists():
-                # Installed from git (editable install)
-                try:
-                    # Get remote URL
-                    result = subprocess.run(
-                        ["git", "remote", "get-url", "origin"],
-                        cwd=location,
-                        capture_output=True,
-                        text=True,
-                        timeout=2,
-                    )
-                    if result.returncode == 0:
-                        remote_url = result.stdout.strip()
-                        # Simplify GitHub URLs
-                        if "github.com" in remote_url:
-                            repo_name = remote_url.split("/")[-1].replace(".git", "")
-                            owner = remote_url.split("/")[-2]
-                            return f"{version} [dim](GitHub: {owner}/{repo_name})[/dim]"
-                        return f"{version} [dim](Git: {remote_url})[/dim]"
-                except (subprocess.TimeoutExpired, FileNotFoundError, OSError):
-                    pass
-
-        # Default: installed from PyPI
-        return f"{version} [dim](PyPI)[/dim]"
-
-    except ImportError:
+    if not info["installed"]:
        return t("version_not_installed")

+    # Get version from package metadata (prefer sglang-kt)
+    version = get_installed_package_version("sglang-kt")
+    if not version:
+        version = get_installed_package_version("sglang")
+    if not version:
+        version = info.get("version") or "unknown"
+
+    # Determine source label
+    if info.get("is_kvcache_fork"):
+        if info["from_source"] and info.get("git_info"):
+            git_remote = info["git_info"].get("remote", "")
+            return f"{version} [dim](Source: {git_remote})[/dim]"
+        elif info["editable"]:
+            return f"{version} [dim](editable)[/dim]"
+        else:
+            return f"{version} [dim](sglang-kt)[/dim]"
+    elif info["from_source"]:
+        if info.get("git_info"):
+            git_remote = info["git_info"].get("remote", "")
+            return f"{version} [dim](Source: {git_remote})[/dim]"
+        return f"{version} [dim](source)[/dim]"
+    else:
+        return f"{version} [dim](PyPI)[/dim]"
+

 def version(
    verbose: bool = typer.Option(False, "--verbose", "-v", help="Show detailed version info"),
--- a/kt-kernel/python/cli/i18n.py
+++ b/kt-kernel/python/cli/i18n.py
@@ -37,7 +37,7 @@ MESSAGES: dict[str, dict[str, str]] = {
        "version_cuda_not_found": "Not found",
        "version_kt_kernel": "kt-kernel",
        "version_ktransformers": "ktransformers",
-        "version_sglang": "sglang",
+        "version_sglang": "sglang-kt",
        "version_llamafactory": "llamafactory",
        "version_not_installed": "Not installed",
        # Install command
@@ -300,10 +300,10 @@ MESSAGES: dict[str, dict[str, str]] = {
        "completion_next_session": "Completion will be automatically enabled in new terminal sessions.",
        # SGLang
        "sglang_not_found": "SGLang not found",
-        "sglang_pypi_warning": "SGLang from PyPI may not be compatible with kt-kernel",
-        "sglang_pypi_hint": 'SGLang from PyPI may not be compatible. Install from source: git clone https://github.com/kvcache-ai/sglang && cd sglang && pip install -e "python[all]"',
-        "sglang_install_hint": 'Install SGLang: git clone https://github.com/kvcache-ai/sglang && cd sglang && pip install -e "python[all]"',
-        "sglang_recommend_source": 'Recommend reinstalling from source: git clone https://github.com/kvcache-ai/sglang && cd sglang && pip install -e "python[all]"',
+        "sglang_pypi_warning": "SGLang from PyPI may not be compatible with kt-kernel. Use sglang-kt instead: pip install sglang-kt",
+        "sglang_pypi_hint": "SGLang from PyPI may not be compatible. Install the kvcache-ai fork: pip install sglang-kt (or run ./install.sh from ktransformers root)",
+        "sglang_install_hint": "Install SGLang: pip install sglang-kt (or run ./install.sh from ktransformers root)",
+        "sglang_recommend_source": "Recommend reinstalling with the kvcache-ai fork: pip uninstall sglang -y && pip install sglang-kt",
        "sglang_kt_kernel_not_supported": "SGLang does not support kt-kernel (missing --kt-gpu-prefill-token-threshold parameter)",
        "sglang_checking_kt_kernel_support": "Checking SGLang kt-kernel support...",
        "sglang_kt_kernel_supported": "SGLang kt-kernel support verified",
@@ -657,7 +657,7 @@ MESSAGES: dict[str, dict[str, str]] = {
        "version_cuda_not_found": "未找到",
        "version_kt_kernel": "kt-kernel",
        "version_ktransformers": "ktransformers",
-        "version_sglang": "sglang",
+        "version_sglang": "sglang-kt",
        "version_llamafactory": "llamafactory",
        "version_not_installed": "未安装",
        # Install command
@@ -920,10 +920,10 @@ MESSAGES: dict[str, dict[str, str]] = {
        "completion_next_session": "新的终端会话将自动启用补全。",
        # SGLang
        "sglang_not_found": "未找到 SGLang",
-        "sglang_pypi_warning": "PyPI 版本的 SGLang 可能与 kt-kernel 不兼容",
-        "sglang_pypi_hint": 'PyPI 版本可能不兼容。从源码安装: git clone https://github.com/kvcache-ai/sglang && cd sglang && pip install -e "python[all]"',
-        "sglang_install_hint": '安装 SGLang: git clone https://github.com/kvcache-ai/sglang && cd sglang && pip install -e "python[all]"',
-        "sglang_recommend_source": '建议从源码重新安装: git clone https://github.com/kvcache-ai/sglang && cd sglang && pip install -e "python[all]"',
+        "sglang_pypi_warning": "PyPI 版本的 SGLang 可能与 kt-kernel 不兼容。请使用 sglang-kt: pip install sglang-kt",
+        "sglang_pypi_hint": "PyPI 版本可能不兼容。安装 kvcache-ai 分支: pip install sglang-kt (或在 ktransformers 根目录运行 ./install.sh)",
+        "sglang_install_hint": "安装 SGLang: pip install sglang-kt (或在 ktransformers 根目录运行 ./install.sh)",
+        "sglang_recommend_source": "建议重新安装 kvcache-ai 分支: pip uninstall sglang -y && pip install sglang-kt",
        "sglang_kt_kernel_not_supported": "SGLang 不支持 kt-kernel (缺少 --kt-gpu-prefill-token-threshold 参数)",
        "sglang_checking_kt_kernel_support": "正在检查 SGLang kt-kernel 支持...",
        "sglang_kt_kernel_supported": "SGLang kt-kernel 支持已验证",
--- a/kt-kernel/python/cli/utils/sglang_checker.py
+++ b/kt-kernel/python/cli/utils/sglang_checker.py
@@ -38,15 +38,25 @@ def check_sglang_installation() -> dict:
        editable = False
        git_info = None
        from_source = False
+        is_kvcache_fork = False  # True if installed as sglang-kt package

        try:
-            # Get pip show output
+            # Get pip show output (try sglang-kt first, then sglang)
            result = subprocess.run(
-                [sys.executable, "-m", "pip", "show", "sglang"],
+                [sys.executable, "-m", "pip", "show", "sglang-kt"],
                capture_output=True,
                text=True,
                timeout=10,
            )
+            if result.returncode == 0:
+                is_kvcache_fork = True  # sglang-kt package name proves it's the fork
+            else:
+                result = subprocess.run(
+                    [sys.executable, "-m", "pip", "show", "sglang"],
+                    capture_output=True,
+                    text=True,
+                    timeout=10,
+                )

            if result.returncode == 0:
                pip_info = {}
@@ -128,6 +138,7 @@ def check_sglang_installation() -> dict:
            "editable": editable,
            "git_info": git_info,
            "from_source": from_source,
+            "is_kvcache_fork": is_kvcache_fork,
        }
    except ImportError:
        return {
@@ -137,6 +148,7 @@ def check_sglang_installation() -> dict:
            "editable": False,
            "git_info": None,
            "from_source": False,
+            "is_kvcache_fork": False,
        }


@@ -158,20 +170,19 @@ def get_sglang_install_instructions(lang: Optional[str] = None) -> str:
        return """
 [bold yellow]SGLang \u672a\u5b89\u88c5[/bold yellow]

-\u8bf7\u6309\u7167\u4ee5\u4e0b\u6b65\u9aa4\u5b89\u88c5 SGLang:
+\u8bf7\u9009\u62e9\u4ee5\u4e0b\u65b9\u5f0f\u4e4b\u4e00\u5b89\u88c5 SGLang (kvcache-ai \u5206\u652f):

-[bold]1. \u514b\u9686\u4ed3\u5e93:[/bold]
-   git clone https://github.com/kvcache-ai/sglang.git
-   cd sglang
+[bold]\u65b9\u5f0f A - \u4e00\u952e\u5b89\u88c5 (\u63a8\u8350):[/bold]
+   \u4ece ktransformers \u6839\u76ee\u5f55\u8fd0\u884c:
+   [cyan]./install.sh[/cyan]

-[bold]2. \u5b89\u88c5 (\u4e8c\u9009\u4e00):[/bold]
+[bold]\u65b9\u5f0f B - pip \u5b89\u88c5:[/bold]
+   [cyan]pip install sglang-kt[/cyan]

-   [cyan]\u65b9\u5f0f A - pip \u5b89\u88c5 (\u63a8\u8350):[/cyan]
-   pip install -e "python[all]"
-
-   [cyan]\u65b9\u5f0f B - uv \u5b89\u88c5 (\u66f4\u5feb):[/cyan]
-   pip install uv
-   uv pip install -e "python[all]"
+[bold]\u65b9\u5f0f C - \u4ece\u6e90\u7801\u5b89\u88c5:[/bold]
+   git clone --recursive https://github.com/kvcache-ai/ktransformers.git
+   cd ktransformers
+   pip install "third_party/sglang/python[all]"

 [dim]\u6ce8\u610f: \u8bf7\u786e\u4fdd\u5728\u6b63\u786e\u7684 Python \u73af\u5883\u4e2d\u6267\u884c\u4ee5\u4e0a\u547d\u4ee4[/dim]
 """
@@ -179,20 +190,19 @@ def get_sglang_install_instructions(lang: Optional[str] = None) -> str:
        return """
 [bold yellow]SGLang is not installed[/bold yellow]

-Please follow these steps to install SGLang:
+Install SGLang (kvcache-ai fork) using one of these methods:

-[bold]1. Clone the repository:[/bold]
-   git clone https://github.com/kvcache-ai/sglang.git
-   cd sglang
+[bold]Option A - One-click install (recommended):[/bold]
+   From the ktransformers root directory, run:
+   [cyan]./install.sh[/cyan]

-[bold]2. Install (choose one):[/bold]
+[bold]Option B - pip install:[/bold]
+   [cyan]pip install sglang-kt[/cyan]

-   [cyan]Option A - pip install (recommended):[/cyan]
-   pip install -e "python[all]"
-
-   [cyan]Option B - uv install (faster):[/cyan]
-   pip install uv
-   uv pip install -e "python[all]"
+[bold]Option C - From source:[/bold]
+   git clone --recursive https://github.com/kvcache-ai/ktransformers.git
+   cd ktransformers
+   pip install "third_party/sglang/python[all]"

 [dim]Note: Make sure to run these commands in the correct Python environment[/dim]
 """
@@ -369,17 +379,18 @@ def print_sglang_kt_kernel_instructions() -> None:
 您当前安装的 SGLang 不包含 kt-kernel 支持。
 kt-kernel 需要使用 kvcache-ai 维护的 SGLang 分支。

-[bold]请按以下步骤重新安装 SGLang:[/bold]
+[bold]请按以下步骤重新安装:[/bold]

 [cyan]1. 卸载当前的 SGLang:[/cyan]
   pip uninstall sglang -y

-[cyan]2. 克隆 kvcache-ai 的 SGLang 仓库:[/cyan]
-   git clone https://github.com/kvcache-ai/sglang.git
-   cd sglang
+[cyan]2. 安装 kvcache-ai 版本 (选择一种方式):[/cyan]

-[cyan]3. 安装 SGLang:[/cyan]
-   pip install -e "python[all]"
+   [bold]方式 A - 一键安装 (推荐):[/bold]
+   从 ktransformers 根目录运行: ./install.sh
+
+   [bold]方式 B - pip 安装:[/bold]
+   pip install sglang-kt

 [dim]注意: 请确保在正确的 Python 环境中执行以上命令[/dim]
 """
@@ -390,17 +401,18 @@ kt-kernel 需要使用 kvcache-ai 维护的 SGLang 分支。
 Your current SGLang installation does not include kt-kernel support.
 kt-kernel requires the kvcache-ai maintained fork of SGLang.

-[bold]Please reinstall SGLang with the following steps:[/bold]
+[bold]Please reinstall SGLang:[/bold]

 [cyan]1. Uninstall current SGLang:[/cyan]
   pip uninstall sglang -y

-[cyan]2. Clone the kvcache-ai SGLang repository:[/cyan]
-   git clone https://github.com/kvcache-ai/sglang.git
-   cd sglang
+[cyan]2. Install the kvcache-ai fork (choose one):[/cyan]

-[cyan]3. Install SGLang:[/cyan]
-   pip install -e "python[all]"
+   [bold]Option A - One-click install (recommended):[/bold]
+   From the ktransformers root directory, run: ./install.sh
+
+   [bold]Option B - pip install:[/bold]
+   pip install sglang-kt

 [dim]Note: Make sure to run these commands in the correct Python environment[/dim]
 """
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -0,0 +1,23 @@
+[build-system]
+requires = ["setuptools>=61"]
+build-backend = "setuptools.build_meta"
+
+[project]
+name = "ktransformers"
+dynamic = ["version", "dependencies"]
+description = "KTransformers: CPU-GPU heterogeneous inference framework for LLMs"
+readme = "README.md"
+authors = [{ name = "kvcache-ai" }]
+license = "Apache-2.0"
+requires-python = ">=3.8"
+classifiers = [
+  "Programming Language :: Python :: 3",
+  "Operating System :: POSIX :: Linux",
+]
+
+[project.urls]
+Homepage = "https://github.com/kvcache-ai/ktransformers"
+
+[tool.setuptools]
+# No actual Python packages — this is a meta-package
+packages = []
--- a/setup.py
+++ b/setup.py
@@ -0,0 +1,16 @@
+"""Meta-package: pip install ktransformers → installs kt-kernel + sglang-kt."""
+from pathlib import Path
+from setuptools import setup
+
+_version_file = Path(__file__).resolve().parent / "version.py"
+_ns = {}
+exec(_version_file.read_text(), _ns)
+_v = _ns["__version__"]
+
+setup(
+    version=_v,
+    install_requires=[
+        f"kt-kernel=={_v}",
+        f"sglang-kt=={_v}",
+    ],
+)
--- a/third_party/sglang
+++ b/third_party/sglang
--- a/version.py
+++ b/version.py
@@ -3,4 +3,4 @@ KTransformers version information.
 Shared across kt-kernel and kt-sft modules.
 """

-__version__ = "0.5.1"
+__version__ = "0.5.2.post1"