mirror of
https://github.com/kvcache-ai/ktransformers.git
synced 2026-03-14 18:37:23 +00:00
[feat](kt-kernel): Add automatic deployment workflow (#1719)
This commit is contained in:
170
.github/workflows/docker-image.yml
vendored
170
.github/workflows/docker-image.yml
vendored
@@ -5,9 +5,24 @@ on:
|
||||
types: [published]
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
choose:
|
||||
description: 'Will you push the image to DockerHub? 0 for No, 1 for Yes'
|
||||
push_to_dockerhub:
|
||||
description: 'Push image to DockerHub? (true/false)'
|
||||
required: true
|
||||
default: 'false'
|
||||
type: boolean
|
||||
cuda_version:
|
||||
description: 'CUDA version (e.g., 12.8.1)'
|
||||
required: false
|
||||
default: '12.8.1'
|
||||
type: string
|
||||
push_simplified_tag:
|
||||
description: 'Also push simplified tag? (true/false)'
|
||||
required: false
|
||||
default: 'true'
|
||||
type: boolean
|
||||
ubuntu_mirror:
|
||||
description: 'Use Tsinghua Ubuntu mirror? (0/1)'
|
||||
required: false
|
||||
default: '0'
|
||||
type: string
|
||||
|
||||
@@ -20,79 +35,108 @@ jobs:
|
||||
test:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- uses: actions/checkout@v4
|
||||
- name: Run tests
|
||||
run: |
|
||||
if [ -f docker-compose.test.yml ]; then
|
||||
docker-compose --file docker-compose.test.yml build
|
||||
docker-compose --file docker-compose.test.yml run sut
|
||||
else
|
||||
docker build . --file Dockerfile
|
||||
docker build . --file docker/Dockerfile
|
||||
fi
|
||||
|
||||
docker_task:
|
||||
build-and-push:
|
||||
needs: test
|
||||
name: ${{ matrix.instruct}}
|
||||
name: Build and Push Multi-Variant Docker Image
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
include:
|
||||
# for amd64
|
||||
- {instruct: "FANCY", platform: "linux/amd64"}
|
||||
- {instruct: "AVX512", platform: "linux/amd64"}
|
||||
- {instruct: "AVX2", platform: "linux/amd64"}
|
||||
- {instruct: "NATIVE", platform: "linux/amd64"}
|
||||
# for arm64
|
||||
- {instruct: "NATIVE", platform: "linux/arm64"}
|
||||
|
||||
steps:
|
||||
- name: Move Docker data directory
|
||||
run: |
|
||||
sudo systemctl stop docker
|
||||
sudo mkdir -p /mnt/docker
|
||||
sudo rsync -avz /var/lib/docker/ /mnt/docker
|
||||
sudo rm -rf /var/lib/docker
|
||||
sudo ln -s /mnt/docker /var/lib/docker
|
||||
sudo systemctl start docker
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v4
|
||||
|
||||
-
|
||||
name: Set up QEMU
|
||||
uses: docker/setup-qemu-action@v3
|
||||
- name: Move Docker data directory
|
||||
run: |
|
||||
sudo systemctl stop docker
|
||||
sudo mkdir -p /mnt/docker
|
||||
sudo rsync -avz /var/lib/docker/ /mnt/docker
|
||||
sudo rm -rf /var/lib/docker
|
||||
sudo ln -s /mnt/docker /var/lib/docker
|
||||
sudo systemctl start docker
|
||||
|
||||
-
|
||||
name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
|
||||
-
|
||||
name: Login to Docker Hub
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
-
|
||||
name: Build and push for amd64
|
||||
if: matrix.platform == 'linux/amd64'
|
||||
uses: docker/build-push-action@v6
|
||||
with:
|
||||
push: true
|
||||
platforms: |
|
||||
linux/amd64
|
||||
tags: |
|
||||
${{ env.DOCKERHUB_REPO }}:latest-${{ matrix.instruct }}
|
||||
${{ env.DOCKERHUB_REPO }}:${{ github.event.release.tag_name }}-${{ matrix.instruct }}
|
||||
build-args: |
|
||||
CPU_INSTRUCT=${{ matrix.instruct }}
|
||||
-
|
||||
name: Build and push for arm64
|
||||
if: matrix.platform == 'linux/arm64'
|
||||
uses: docker/build-push-action@v6
|
||||
with:
|
||||
push: true
|
||||
platforms: |
|
||||
linux/arm64
|
||||
tags: |
|
||||
${{ env.DOCKERHUB_REPO }}:latest-${{ matrix.instruct }}
|
||||
${{ env.DOCKERHUB_REPO }}:${{ github.event.release.tag_name }}-${{ matrix.instruct }}
|
||||
build-args: |
|
||||
CPU_INSTRUCT=${{ matrix.instruct }}
|
||||
- name: Login to Docker Hub
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
|
||||
- name: Determine build parameters
|
||||
id: params
|
||||
run: |
|
||||
# Determine if we should push
|
||||
if [ "${{ github.event_name }}" = "release" ]; then
|
||||
echo "should_push=true" >> $GITHUB_OUTPUT
|
||||
echo "push_simplified=true" >> $GITHUB_OUTPUT
|
||||
elif [ "${{ github.event_name }}" = "workflow_dispatch" ]; then
|
||||
echo "should_push=${{ inputs.push_to_dockerhub }}" >> $GITHUB_OUTPUT
|
||||
echo "push_simplified=${{ inputs.push_simplified_tag }}" >> $GITHUB_OUTPUT
|
||||
else
|
||||
echo "should_push=false" >> $GITHUB_OUTPUT
|
||||
echo "push_simplified=false" >> $GITHUB_OUTPUT
|
||||
fi
|
||||
|
||||
# Determine CUDA version
|
||||
if [ "${{ github.event_name }}" = "workflow_dispatch" ] && [ -n "${{ inputs.cuda_version }}" ]; then
|
||||
echo "cuda_version=${{ inputs.cuda_version }}" >> $GITHUB_OUTPUT
|
||||
else
|
||||
echo "cuda_version=12.8.1" >> $GITHUB_OUTPUT
|
||||
fi
|
||||
|
||||
# Determine Ubuntu mirror setting
|
||||
if [ "${{ github.event_name }}" = "workflow_dispatch" ] && [ -n "${{ inputs.ubuntu_mirror }}" ]; then
|
||||
echo "ubuntu_mirror=${{ inputs.ubuntu_mirror }}" >> $GITHUB_OUTPUT
|
||||
else
|
||||
echo "ubuntu_mirror=0" >> $GITHUB_OUTPUT
|
||||
fi
|
||||
|
||||
- name: Build and push Docker image
|
||||
run: |
|
||||
cd docker
|
||||
|
||||
# Build command arguments
|
||||
BUILD_ARGS=(
|
||||
--cuda-version "${{ steps.params.outputs.cuda_version }}"
|
||||
--ubuntu-mirror "${{ steps.params.outputs.ubuntu_mirror }}"
|
||||
--repository "${{ env.DOCKERHUB_REPO }}"
|
||||
)
|
||||
|
||||
# Add simplified tag option if enabled
|
||||
if [ "${{ steps.params.outputs.push_simplified }}" = "true" ]; then
|
||||
BUILD_ARGS+=(--also-push-simplified)
|
||||
fi
|
||||
|
||||
# Add HTTP proxy if available
|
||||
if [ -n "${{ secrets.HTTP_PROXY }}" ]; then
|
||||
BUILD_ARGS+=(--http-proxy "${{ secrets.HTTP_PROXY }}")
|
||||
fi
|
||||
|
||||
# Add HTTPS proxy if available
|
||||
if [ -n "${{ secrets.HTTPS_PROXY }}" ]; then
|
||||
BUILD_ARGS+=(--https-proxy "${{ secrets.HTTPS_PROXY }}")
|
||||
fi
|
||||
|
||||
# Dry run if not pushing
|
||||
if [ "${{ steps.params.outputs.should_push }}" != "true" ]; then
|
||||
BUILD_ARGS+=(--dry-run)
|
||||
fi
|
||||
|
||||
# Execute build script
|
||||
./push-to-dockerhub.sh "${BUILD_ARGS[@]}"
|
||||
|
||||
- name: Display image information
|
||||
if: steps.params.outputs.should_push == 'true'
|
||||
run: |
|
||||
echo "::notice title=Docker Image::Image pushed successfully to ${{ env.DOCKERHUB_REPO }}"
|
||||
echo "Pull command: docker pull ${{ env.DOCKERHUB_REPO }}:v\$(VERSION)-cu\$(CUDA_SHORT)"
|
||||
|
||||
71
.github/workflows/install.yml
vendored
71
.github/workflows/install.yml
vendored
@@ -1,71 +0,0 @@
|
||||
name: Install / Test KTransformers
|
||||
run-name: Install / Test KTransformers
|
||||
on:
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
job_to_run:
|
||||
description: "Which job to run?"
|
||||
required: true
|
||||
default: "test"
|
||||
type: choice
|
||||
options:
|
||||
- create-install-test
|
||||
- install-test
|
||||
- test
|
||||
jobs:
|
||||
Install-Test-KTransformers:
|
||||
runs-on: self-hosted
|
||||
steps:
|
||||
- run: echo "🎉 The job was automatically triggered by a ${{ github.event_name }} event."
|
||||
- run: echo "🔎 The name of your branch is ${{ github.ref }} and your repository is ${{ github.repository }}."
|
||||
- name: Check out repository code
|
||||
uses: actions/checkout@v4
|
||||
- run: echo "💡 The ${{ github.repository }} repository has been cloned to the runner."
|
||||
- name: Remove old conda environment
|
||||
continue-on-error: true
|
||||
if: contains(inputs.job_to_run, 'create')
|
||||
run: |
|
||||
source /home/qujing3/anaconda3/etc/profile.d/conda.sh
|
||||
conda env remove --name ktransformers-dev -y
|
||||
- name: Create conda environment
|
||||
if: contains(inputs.job_to_run, 'create')
|
||||
run: |
|
||||
source /home/qujing3/anaconda3/etc/profile.d/conda.sh
|
||||
conda create --name ktransformers-dev python=3.11
|
||||
conda activate ktransformers-dev
|
||||
conda install -c conda-forge libstdcxx-ng -y
|
||||
- name: Install dependencies
|
||||
if: contains(inputs.job_to_run, 'create')
|
||||
run: |
|
||||
source /home/qujing3/anaconda3/etc/profile.d/conda.sh
|
||||
conda activate ktransformers-dev
|
||||
pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu126
|
||||
pip3 install packaging ninja cpufeature numpy
|
||||
pip install ~/flash_attn-2.7.4.post1+cu12torch2.6cxx11abiTRUE-cp311-cp311-linux_x86_64.whl
|
||||
- name: Install KTransformers
|
||||
if: contains(inputs.job_to_run, 'install')
|
||||
run: |
|
||||
source /home/qujing3/anaconda3/etc/profile.d/conda.sh
|
||||
conda activate ktransformers-dev
|
||||
pip3 uninstall ktransformers -y
|
||||
cd ${{ github.workspace }}
|
||||
git submodule init
|
||||
git submodule update
|
||||
bash install.sh
|
||||
- name: Test Local Chat 1
|
||||
run: |
|
||||
set -e
|
||||
source /home/qujing3/anaconda3/etc/profile.d/conda.sh
|
||||
conda activate ktransformers-dev
|
||||
export PATH=/usr/local/cuda-12.4/bin:$PATH
|
||||
export LD_LIBRARY_PATH=/usr/local/cuda-12.4/lib64:$LD_LIBRARY_PATH
|
||||
export CUDA_HOME=/usr/local/cuda-12.4
|
||||
cd ${{ github.workspace }}
|
||||
echo "Running Local Chat 1 (book.txt) ..."
|
||||
python ktransformers/local_chat_test.py --model_path /home/qujing3/models/DeepSeek-R1-Q4_K_M/config --gguf_path /home/qujing3/models/DeepSeek-R1-Q4_K_M/ --max_new_tokens 256 --cpu_infer 64 --prompt_file /home/qujing3/prompts/book.txt > log1.txt
|
||||
sed -n '/Prompt:/,$p' log1.txt
|
||||
echo "Running Local Chat 2 [force think] (chinese.txt) ..."
|
||||
python ktransformers/local_chat_test.py --model_path /home/qujing3/models/DeepSeek-R1-Q4_K_M/config --gguf_path /home/qujing3/models/DeepSeek-R1-Q4_K_M/ --max_new_tokens 256 --cpu_infer 64 --prompt_file /home/qujing3/prompts/chinese.txt -f > log2.txt
|
||||
sed -n '/Prompt:/,$p' log2.txt
|
||||
|
||||
- run: echo "This job's status is ${{ job.status }}."
|
||||
231
.github/workflows/package_wheel_release.yml
vendored
231
.github/workflows/package_wheel_release.yml
vendored
@@ -1,231 +0,0 @@
|
||||
name: Build Wheels
|
||||
on:
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
release:
|
||||
description: 'Release? 1 = yes, 0 = no'
|
||||
default: '0'
|
||||
required: true
|
||||
type: string
|
||||
jobs:
|
||||
build_wheels:
|
||||
name: ${{ matrix.os }} Python=${{ matrix.pyver }} CUDA=${{ matrix.cuda }} CPU_INSTRUCT=${{ matrix.instruct }} Torch=${{ matrix.torch }}
|
||||
runs-on: ${{ matrix.os }}
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
include:
|
||||
# Ubuntu
|
||||
- { os: ubuntu-20.04, pyver: '3.12', cuda: '12.5.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'FANCY', torch_cu: '124'}
|
||||
- { os: ubuntu-20.04, pyver: '3.12', cuda: '12.5.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '124'}
|
||||
- { os: ubuntu-20.04, pyver: '3.12', cuda: '12.5.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '124'}
|
||||
- { os: ubuntu-20.04, pyver: '3.12', cuda: '12.4.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'FANCY', torch_cu: '124'}
|
||||
- { os: ubuntu-20.04, pyver: '3.12', cuda: '12.4.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '124'}
|
||||
- { os: ubuntu-20.04, pyver: '3.12', cuda: '12.4.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '124'}
|
||||
- { os: ubuntu-20.04, pyver: '3.12', cuda: '12.2.2', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'FANCY', torch_cu: '121'}
|
||||
- { os: ubuntu-20.04, pyver: '3.12', cuda: '12.2.2', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '121'}
|
||||
- { os: ubuntu-20.04, pyver: '3.12', cuda: '12.2.2', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '121'}
|
||||
- { os: ubuntu-20.04, pyver: '3.12', cuda: '12.1.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'FANCY', torch_cu: '121'}
|
||||
- { os: ubuntu-20.04, pyver: '3.12', cuda: '12.1.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '121'}
|
||||
- { os: ubuntu-20.04, pyver: '3.12', cuda: '12.1.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '121'}
|
||||
- { os: ubuntu-20.04, pyver: '3.12', cuda: '12.2.2', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'FANCY', torch_cu: '121'}
|
||||
- { os: ubuntu-20.04, pyver: '3.12', cuda: '12.2.2', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '121'}
|
||||
- { os: ubuntu-20.04, pyver: '3.12', cuda: '12.2.2', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '121'}
|
||||
- { os: ubuntu-20.04, pyver: '3.12', cuda: '12.1.1', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'FANCY', torch_cu: '121'}
|
||||
- { os: ubuntu-20.04, pyver: '3.12', cuda: '12.1.1', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '121'}
|
||||
- { os: ubuntu-20.04, pyver: '3.12', cuda: '12.1.1', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '121'}
|
||||
- { os: ubuntu-20.04, pyver: '3.11', cuda: '12.5.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'FANCY', torch_cu: '124'}
|
||||
- { os: ubuntu-20.04, pyver: '3.11', cuda: '12.5.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '124'}
|
||||
- { os: ubuntu-20.04, pyver: '3.11', cuda: '12.5.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '124'}
|
||||
- { os: ubuntu-20.04, pyver: '3.11', cuda: '12.4.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'FANCY', torch_cu: '124'}
|
||||
- { os: ubuntu-20.04, pyver: '3.11', cuda: '12.4.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '124'}
|
||||
- { os: ubuntu-20.04, pyver: '3.11', cuda: '12.4.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '124'}
|
||||
- { os: ubuntu-20.04, pyver: '3.11', cuda: '12.2.2', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'FANCY', torch_cu: '121'}
|
||||
- { os: ubuntu-20.04, pyver: '3.11', cuda: '12.2.2', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '121'}
|
||||
- { os: ubuntu-20.04, pyver: '3.11', cuda: '12.2.2', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '121'}
|
||||
- { os: ubuntu-20.04, pyver: '3.11', cuda: '12.1.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'FANCY', torch_cu: '121'}
|
||||
- { os: ubuntu-20.04, pyver: '3.11', cuda: '12.1.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '121'}
|
||||
- { os: ubuntu-20.04, pyver: '3.11', cuda: '12.1.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '121'}
|
||||
- { os: ubuntu-20.04, pyver: '3.11', cuda: '12.2.2', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'FANCY', torch_cu: '121'}
|
||||
- { os: ubuntu-20.04, pyver: '3.11', cuda: '12.2.2', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '121'}
|
||||
- { os: ubuntu-20.04, pyver: '3.11', cuda: '12.2.2', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '121'}
|
||||
- { os: ubuntu-20.04, pyver: '3.11', cuda: '12.1.1', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'FANCY', torch_cu: '121'}
|
||||
- { os: ubuntu-20.04, pyver: '3.11', cuda: '12.1.1', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '121'}
|
||||
- { os: ubuntu-20.04, pyver: '3.11', cuda: '12.1.1', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '121'}
|
||||
- { os: ubuntu-20.04, pyver: '3.10', cuda: '12.5.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'FANCY', torch_cu: '124'}
|
||||
- { os: ubuntu-20.04, pyver: '3.10', cuda: '12.5.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '124'}
|
||||
- { os: ubuntu-20.04, pyver: '3.10', cuda: '12.5.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '124'}
|
||||
- { os: ubuntu-20.04, pyver: '3.10', cuda: '12.4.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'FANCY', torch_cu: '124'}
|
||||
- { os: ubuntu-20.04, pyver: '3.10', cuda: '12.4.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '124'}
|
||||
- { os: ubuntu-20.04, pyver: '3.10', cuda: '12.4.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '124'}
|
||||
- { os: ubuntu-20.04, pyver: '3.10', cuda: '12.2.2', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'FANCY', torch_cu: '121'}
|
||||
- { os: ubuntu-20.04, pyver: '3.10', cuda: '12.2.2', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '121'}
|
||||
- { os: ubuntu-20.04, pyver: '3.10', cuda: '12.2.2', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '121'}
|
||||
- { os: ubuntu-20.04, pyver: '3.10', cuda: '12.1.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'FANCY', torch_cu: '121'}
|
||||
- { os: ubuntu-20.04, pyver: '3.10', cuda: '12.1.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '121'}
|
||||
- { os: ubuntu-20.04, pyver: '3.10', cuda: '12.1.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '121'}
|
||||
- { os: ubuntu-20.04, pyver: '3.10', cuda: '12.2.2', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'FANCY', torch_cu: '121'}
|
||||
- { os: ubuntu-20.04, pyver: '3.10', cuda: '12.2.2', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '121'}
|
||||
- { os: ubuntu-20.04, pyver: '3.10', cuda: '12.2.2', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '121'}
|
||||
- { os: ubuntu-20.04, pyver: '3.10', cuda: '12.1.1', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'FANCY', torch_cu: '121'}
|
||||
- { os: ubuntu-20.04, pyver: '3.10', cuda: '12.1.1', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '121'}
|
||||
- { os: ubuntu-20.04, pyver: '3.10', cuda: '12.1.1', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '121'}
|
||||
|
||||
# Windows
|
||||
- { os: windows-2022, pyver: '3.12', cuda: '12.5.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '124'}
|
||||
- { os: windows-2022, pyver: '3.12', cuda: '12.5.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '124'}
|
||||
- { os: windows-2022, pyver: '3.12', cuda: '12.4.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '124'}
|
||||
- { os: windows-2022, pyver: '3.12', cuda: '12.4.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '124'}
|
||||
- { os: windows-2022, pyver: '3.12', cuda: '12.2.2', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '121'}
|
||||
- { os: windows-2022, pyver: '3.12', cuda: '12.2.2', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '121'}
|
||||
- { os: windows-2022, pyver: '3.12', cuda: '12.1.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '121'}
|
||||
- { os: windows-2022, pyver: '3.12', cuda: '12.1.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '121'}
|
||||
- { os: windows-2022, pyver: '3.12', cuda: '12.2.2', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '121'}
|
||||
- { os: windows-2022, pyver: '3.12', cuda: '12.2.2', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '121'}
|
||||
- { os: windows-2022, pyver: '3.12', cuda: '12.1.1', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '121'}
|
||||
- { os: windows-2022, pyver: '3.12', cuda: '12.1.1', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '121'}
|
||||
- { os: windows-2022, pyver: '3.11', cuda: '12.5.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '124'}
|
||||
- { os: windows-2022, pyver: '3.11', cuda: '12.5.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '124'}
|
||||
- { os: windows-2022, pyver: '3.11', cuda: '12.4.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '124'}
|
||||
- { os: windows-2022, pyver: '3.11', cuda: '12.4.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '124'}
|
||||
- { os: windows-2022, pyver: '3.11', cuda: '12.2.2', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '121'}
|
||||
- { os: windows-2022, pyver: '3.11', cuda: '12.2.2', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '121'}
|
||||
- { os: windows-2022, pyver: '3.11', cuda: '12.1.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '121'}
|
||||
- { os: windows-2022, pyver: '3.11', cuda: '12.1.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '121'}
|
||||
- { os: windows-2022, pyver: '3.11', cuda: '12.2.2', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '121'}
|
||||
- { os: windows-2022, pyver: '3.11', cuda: '12.2.2', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '121'}
|
||||
- { os: windows-2022, pyver: '3.11', cuda: '12.1.1', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '121'}
|
||||
- { os: windows-2022, pyver: '3.11', cuda: '12.1.1', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '121'}
|
||||
- { os: windows-2022, pyver: '3.10', cuda: '12.5.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '124'}
|
||||
- { os: windows-2022, pyver: '3.10', cuda: '12.5.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '124'}
|
||||
- { os: windows-2022, pyver: '3.10', cuda: '12.4.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '124'}
|
||||
- { os: windows-2022, pyver: '3.10', cuda: '12.4.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '124'}
|
||||
- { os: windows-2022, pyver: '3.10', cuda: '12.2.2', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '121'}
|
||||
- { os: windows-2022, pyver: '3.10', cuda: '12.2.2', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '121'}
|
||||
- { os: windows-2022, pyver: '3.10', cuda: '12.1.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '121'}
|
||||
- { os: windows-2022, pyver: '3.10', cuda: '12.1.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '121'}
|
||||
- { os: windows-2022, pyver: '3.10', cuda: '12.2.2', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '121'}
|
||||
- { os: windows-2022, pyver: '3.10', cuda: '12.2.2', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '121'}
|
||||
- { os: windows-2022, pyver: '3.10', cuda: '12.1.1', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '121'}
|
||||
- { os: windows-2022, pyver: '3.10', cuda: '12.1.1', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '121'}
|
||||
|
||||
defaults:
|
||||
run:
|
||||
shell: pwsh
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
|
||||
- name: Free Disk Space
|
||||
uses: jlumbroso/free-disk-space@v1.3.1
|
||||
if: runner.os == 'Linux'
|
||||
with:
|
||||
tool-cache: true
|
||||
android: true
|
||||
dotnet: true
|
||||
haskell: true
|
||||
large-packages: false
|
||||
swap-storage: true
|
||||
|
||||
- uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: ${{ matrix.pyver }}
|
||||
|
||||
- name: check_space
|
||||
run: |
|
||||
if($IsLinux) {df -h}
|
||||
if($IsWindows) {Get-PSDrive -PSProvider 'FileSystem'}
|
||||
|
||||
- uses: actions/setup-node@v4
|
||||
with:
|
||||
node-version: 20
|
||||
|
||||
- name: Setup Mamba
|
||||
if: matrix.cuda != ''
|
||||
uses: conda-incubator/setup-miniconda@v3
|
||||
with:
|
||||
activate-environment: "ktransformers"
|
||||
python-version: ${{ matrix.pyver }}
|
||||
miniforge-variant: Miniforge3
|
||||
miniforge-version: latest
|
||||
use-mamba: true
|
||||
add-pip-as-python-dependency: true
|
||||
auto-activate-base: false
|
||||
|
||||
|
||||
|
||||
- name: build web
|
||||
run: |
|
||||
cd ktransformers/website/
|
||||
npm install
|
||||
npm run build
|
||||
cd ../../
|
||||
|
||||
- name: build for cuda
|
||||
if: matrix.cuda != ''
|
||||
env:
|
||||
USE_BALANCE_SERVE: "1"
|
||||
run: |
|
||||
git submodule init
|
||||
git submodule update
|
||||
if($IsWindows){
|
||||
$originalPath = Get-Location
|
||||
Import-Module 'C:\Program Files\Microsoft Visual Studio\2022\Enterprise\Common7\Tools\Microsoft.VisualStudio.DevShell.dll'
|
||||
Enter-VsDevShell -VsInstallPath 'C:\Program Files\Microsoft Visual Studio\2022\Enterprise' -DevCmdArguments '-arch=x64 -host_arch=x64'
|
||||
$env:DISTUTILS_USE_SDK=1
|
||||
Set-Location $originalPath
|
||||
}
|
||||
$cudaVersion = '${{ matrix.cuda }}'
|
||||
$env:MAMBA_NO_LOW_SPEED_LIMIT = 1
|
||||
mamba install -y -c nvidia/label/cuda-$cudaVersion cuda-toolkit cuda-runtime
|
||||
$env:CUDA_PATH = $env:CONDA_PREFIX
|
||||
$env:CUDA_HOME = $env:CONDA_PREFIX
|
||||
if ($IsLinux) {
|
||||
$env:LD_LIBRARY_PATH = $env:CONDA_PREFIX + '/lib:' + $env:LD_LIBRARY_PATH
|
||||
$env:LD_LIBRARY_PATH = $env:CONDA_PREFIX + '/lib/python${{ matrix.pyver }}/site-packages/nvidia/nvjitlink/lib:' + $env:LD_LIBRARY_PATH
|
||||
if (!(Test-Path $env:CUDA_HOME/lib64)) {
|
||||
New-Item -ItemType SymbolicLink -Path $env:CUDA_HOME/lib64 -Target $env:CUDA_HOME/lib
|
||||
}
|
||||
}
|
||||
if ($IsWindows) {
|
||||
if (Test-Path -Path "$env:CUDA_PATH/Library/bin/nvcc.exe"){
|
||||
$env:CUDA_PATH = "$env:CUDA_PATH/Library"
|
||||
$env:CUDA_HOME = $env:CUDA_PATH
|
||||
}
|
||||
$env:PATH = "$env:CUDA_PATH/bin;" + $env:PATH
|
||||
$directory = "$env:CUDA_PATH/lib/x64/"
|
||||
if (-not (Test-Path -Path $directory)) {
|
||||
New-Item -ItemType Directory -Path $directory
|
||||
Write-Output "Directory '$directory' created."
|
||||
}
|
||||
cp $env:CUDA_PATH/lib/*.lib $env:CUDA_PATH/lib/x64/
|
||||
$env:INCLUDE =$env:CUDA_PATH + "/include/targets/x64;" + $env:INCLUDE
|
||||
$env:INCLUDE =$env:CONDA_PREFIX + "/include;" + $env:INCLUDE
|
||||
}
|
||||
python -m pip install torch==${{ matrix.torch }} torchvision torchaudio --index-url https://download.pytorch.org/whl/cu${{ matrix.torch_cu }}
|
||||
python -m pip install cpufeature build wheel ninja packaging setuptools
|
||||
$env:KTRANSFORMERS_FORCE_BUILD = "TRUE"
|
||||
$env:CPU_INSTRUCT = '${{ matrix.instruct }}'
|
||||
$env:TORCH_CUDA_ARCH_LIST = '${{ matrix.cudaarch }}'
|
||||
python -m build --no-isolation --verbose
|
||||
|
||||
|
||||
- name: create Rlease dir
|
||||
run: |
|
||||
if ($IsWindows) {
|
||||
$env:date = $(Get-Date -Format "yyyy-MM-dd")
|
||||
New-Item -ItemType Directory -Force -Path "$Env:USERPROFILE\.ssh"
|
||||
$Env:SSH_PATH = "$Env:USERPROFILE\.ssh\id_rsa"
|
||||
Set-Content -Path $Env:SSH_PATH -Value "${{ secrets.SSH_PRIVATE_KEY }}"
|
||||
(Get-Content -Path $Env:SSH_PATH).Replace("`r`n","`n") | Set-Content -Path $Env:SSH_PATH
|
||||
chmod 600 $Env:SSH_PATH
|
||||
}
|
||||
if ($IsLinux) {
|
||||
$env:date = $(date +%Y-%m-%d)
|
||||
mkdir -p ~/.ssh/
|
||||
echo "${{ secrets.SSH_PRIVATE_KEY }}" > ~/.ssh/id_rsa
|
||||
chmod 600 ~/.ssh/id_rsa
|
||||
}
|
||||
|
||||
ssh -p ${{ secrets.SSH_PORT }} -o StrictHostKeyChecking=no root@${{ secrets.SSH_SERVER }} "mkdir -p /mnt/data/release-$env:date"
|
||||
scp -P ${{ secrets.SSH_PORT }} -o StrictHostKeyChecking=no dist/*.whl root@${{ secrets.SSH_SERVER }}:/mnt/data/release-$env:date/
|
||||
141
.github/workflows/package_wheel_test.yml
vendored
141
.github/workflows/package_wheel_test.yml
vendored
@@ -1,141 +0,0 @@
|
||||
name: Build Wheels Tests
|
||||
on:
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
release:
|
||||
description: 'Release? 1 = yes, 0 = no'
|
||||
default: '0'
|
||||
required: true
|
||||
type: string
|
||||
jobs:
|
||||
build_wheels:
|
||||
name: ${{ matrix.os }} Python=${{ matrix.pyver }} CUDA=${{ matrix.cuda }} CPU_INSTRUCT=${{ matrix.instruct }} Torch=${{ matrix.torch }}
|
||||
runs-on: ${{ matrix.os }}
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
include:
|
||||
# Ubuntu
|
||||
- { os: ubuntu-20.04, pyver: '3.12', cuda: '12.5.1', torch: '2.4.0', cudaarch: '8.9;9.0+PTX', instruct: 'FANCY', torch_cu: '124'}
|
||||
- { os: ubuntu-20.04, pyver: '3.12', cuda: '12.2.2', torch: '2.3.0', cudaarch: '8.9;9.0+PTX', instruct: 'FANCY', torch_cu: '121'}
|
||||
- { os: windows-2022, pyver: '3.11', cuda: '12.5.1', torch: '2.4.0', cudaarch: '8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '124'}
|
||||
- { os: windows-2022, pyver: '3.12', cuda: '12.1.1', torch: '2.3.0', cudaarch: '8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '121'}
|
||||
|
||||
defaults:
|
||||
run:
|
||||
shell: pwsh
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
|
||||
- name: Free Disk Space
|
||||
uses: jlumbroso/free-disk-space@v1.3.1
|
||||
if: runner.os == 'Linux'
|
||||
with:
|
||||
tool-cache: true
|
||||
android: true
|
||||
dotnet: true
|
||||
haskell: true
|
||||
large-packages: false
|
||||
swap-storage: true
|
||||
|
||||
- uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: ${{ matrix.pyver }}
|
||||
|
||||
- name: check_space
|
||||
run: |
|
||||
if($IsLinux) {df -h}
|
||||
if($IsWindows) {Get-PSDrive -PSProvider 'FileSystem'}
|
||||
|
||||
- uses: actions/setup-node@v4
|
||||
with:
|
||||
node-version: 20
|
||||
|
||||
- name: Setup Mamba
|
||||
if: matrix.cuda != ''
|
||||
uses: conda-incubator/setup-miniconda@v3
|
||||
with:
|
||||
activate-environment: "ktransformers"
|
||||
python-version: ${{ matrix.pyver }}
|
||||
miniforge-variant: Miniforge3
|
||||
miniforge-version: latest
|
||||
use-mamba: true
|
||||
add-pip-as-python-dependency: true
|
||||
auto-activate-base: false
|
||||
|
||||
|
||||
|
||||
- name: build web
|
||||
run: |
|
||||
cd ktransformers/website/
|
||||
npm install
|
||||
npm run build
|
||||
cd ../../
|
||||
|
||||
- name: build for cuda
|
||||
if: matrix.cuda != ''
|
||||
run: |
|
||||
git submodule init
|
||||
git submodule update
|
||||
if($IsWindows){
|
||||
$originalPath = Get-Location
|
||||
Import-Module 'C:\Program Files\Microsoft Visual Studio\2022\Enterprise\Common7\Tools\Microsoft.VisualStudio.DevShell.dll'
|
||||
Enter-VsDevShell -VsInstallPath 'C:\Program Files\Microsoft Visual Studio\2022\Enterprise' -DevCmdArguments '-arch=x64 -host_arch=x64'
|
||||
$env:DISTUTILS_USE_SDK=1
|
||||
Set-Location $originalPath
|
||||
}
|
||||
$cudaVersion = '${{ matrix.cuda }}'
|
||||
$env:MAMBA_NO_LOW_SPEED_LIMIT = 1
|
||||
mamba install -y -c nvidia/label/cuda-$cudaVersion cuda-toolkit cuda-runtime
|
||||
$env:CUDA_PATH = $env:CONDA_PREFIX
|
||||
$env:CUDA_HOME = $env:CONDA_PREFIX
|
||||
if ($IsLinux) {
|
||||
$env:LD_LIBRARY_PATH = $env:CONDA_PREFIX + '/lib:' + $env:LD_LIBRARY_PATH
|
||||
$env:LD_LIBRARY_PATH = $env:CONDA_PREFIX + '/lib/python${{ matrix.pyver }}/site-packages/nvidia/nvjitlink/lib:' + $env:LD_LIBRARY_PATH
|
||||
if (!(Test-Path $env:CUDA_HOME/lib64)) {
|
||||
New-Item -ItemType SymbolicLink -Path $env:CUDA_HOME/lib64 -Target $env:CUDA_HOME/lib
|
||||
}
|
||||
}
|
||||
if ($IsWindows) {
|
||||
if (Test-Path -Path "$env:CUDA_PATH/Library/bin/nvcc.exe"){
|
||||
$env:CUDA_PATH = "$env:CUDA_PATH/Library"
|
||||
$env:CUDA_HOME = $env:CUDA_PATH
|
||||
}
|
||||
$env:PATH = "$env:CUDA_PATH/bin;" + $env:PATH
|
||||
$directory = "$env:CUDA_PATH/lib/x64/"
|
||||
if (-not (Test-Path -Path $directory)) {
|
||||
New-Item -ItemType Directory -Path $directory
|
||||
Write-Output "Directory '$directory' created."
|
||||
}
|
||||
cp $env:CUDA_PATH/lib/*.lib $env:CUDA_PATH/lib/x64/
|
||||
$env:INCLUDE =$env:CUDA_PATH + "/include/targets/x64;" + $env:INCLUDE
|
||||
$env:INCLUDE =$env:CONDA_PREFIX + "/include;" + $env:INCLUDE
|
||||
}
|
||||
python -m pip install torch==${{ matrix.torch }} torchvision torchaudio --index-url https://download.pytorch.org/whl/cu${{ matrix.torch_cu }}
|
||||
python -m pip install cpufeature build wheel ninja packaging setuptools
|
||||
$env:KTRANSFORMERS_FORCE_BUILD = "TRUE"
|
||||
$env:CPU_INSTRUCT = '${{ matrix.instruct }}'
|
||||
$env:TORCH_CUDA_ARCH_LIST = '${{ matrix.cudaarch }}'
|
||||
python -m build --no-isolation --verbose
|
||||
|
||||
|
||||
- name: create Rlease dir
|
||||
run: |
|
||||
if ($IsWindows) {
|
||||
$env:date = $(Get-Date -Format "yyyy-MM-dd")
|
||||
New-Item -ItemType Directory -Force -Path "$Env:USERPROFILE\.ssh"
|
||||
$Env:SSH_PATH = "$Env:USERPROFILE\.ssh\id_rsa"
|
||||
Set-Content -Path $Env:SSH_PATH -Value "${{ secrets.SSH_PRIVATE_KEY }}"
|
||||
(Get-Content -Path $Env:SSH_PATH).Replace("`r`n","`n") | Set-Content -Path $Env:SSH_PATH
|
||||
chmod 600 $Env:SSH_PATH
|
||||
}
|
||||
if ($IsLinux) {
|
||||
$env:date = $(date +%Y-%m-%d)
|
||||
mkdir -p ~/.ssh/
|
||||
echo "${{ secrets.SSH_PRIVATE_KEY }}" > ~/.ssh/id_rsa
|
||||
chmod 600 ~/.ssh/id_rsa
|
||||
}
|
||||
|
||||
ssh -p ${{ secrets.SSH_PORT }} -o StrictHostKeyChecking=no root@${{ secrets.SSH_SERVER }} "mkdir -p /mnt/data/release-$env:date"
|
||||
scp -P ${{ secrets.SSH_PORT }} -o StrictHostKeyChecking=no dist/*.whl root@${{ secrets.SSH_SERVER }}:/mnt/data/release-$env:date/
|
||||
36
.github/workflows/release-fake-tag.yml
vendored
Normal file
36
.github/workflows/release-fake-tag.yml
vendored
Normal file
@@ -0,0 +1,36 @@
|
||||
name: Release Fake Tag
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
paths:
|
||||
- "version.py"
|
||||
workflow_dispatch:
|
||||
|
||||
permissions:
|
||||
contents: write
|
||||
|
||||
jobs:
|
||||
publish:
|
||||
if: github.repository == 'kvcache-ai/ktransformers'
|
||||
runs-on: ubuntu-latest
|
||||
environment: 'prod'
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
token: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
- name: Get version
|
||||
id: get_version
|
||||
run: |
|
||||
version=$(cat version.py | grep '__version__' | cut -d'"' -f2)
|
||||
echo "TAG=v$version" >> $GITHUB_OUTPUT
|
||||
|
||||
- name: Create and push tag
|
||||
run: |
|
||||
git config user.name "ktransformers-bot"
|
||||
git config user.email "ktransformers-bot@users.noreply.github.com"
|
||||
git tag ${{ steps.get_version.outputs.TAG }}
|
||||
git push origin ${{ steps.get_version.outputs.TAG }}
|
||||
163
.github/workflows/release-pypi.yml
vendored
Normal file
163
.github/workflows/release-pypi.yml
vendored
Normal file
@@ -0,0 +1,163 @@
|
||||
name: Release to PyPI
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
paths:
|
||||
- "version.py"
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
test_pypi:
|
||||
description: 'Publish to TestPyPI instead of PyPI (for testing)'
|
||||
required: false
|
||||
default: 'false'
|
||||
type: choice
|
||||
options:
|
||||
- 'true'
|
||||
- 'false'
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
build-kt-kernel:
|
||||
name: Build kt-kernel CPU-only (Python ${{ matrix.python-version }})
|
||||
runs-on: ubuntu-20.04
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
python-version: ['3.10', '3.11', '3.12']
|
||||
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
submodules: recursive
|
||||
|
||||
- name: Set up Python ${{ matrix.python-version }}
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
|
||||
- name: Install system dependencies
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y cmake libhwloc-dev pkg-config libnuma-dev
|
||||
|
||||
- name: Install Python build tools
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
pip install build wheel setuptools
|
||||
|
||||
- name: Build kt-kernel wheel (CPU-only, multi-variant)
|
||||
working-directory: kt-kernel
|
||||
env:
|
||||
CPUINFER_BUILD_ALL_VARIANTS: '1'
|
||||
CPUINFER_USE_CUDA: '0'
|
||||
CPUINFER_BUILD_TYPE: 'Release'
|
||||
CPUINFER_PARALLEL: '4'
|
||||
CPUINFER_FORCE_REBUILD: '1'
|
||||
run: |
|
||||
echo "Building kt-kernel CPU-only with all CPU variants (AMX, AVX512, AVX2)"
|
||||
python -m build --wheel --no-isolation -v
|
||||
|
||||
- name: List generated wheels
|
||||
working-directory: kt-kernel
|
||||
run: |
|
||||
echo "Generated wheels:"
|
||||
ls -lh dist/
|
||||
|
||||
- name: Test wheel import
|
||||
working-directory: kt-kernel
|
||||
run: |
|
||||
pip install dist/*.whl
|
||||
python -c "import kt_kernel; print('✓ Import successful'); print(f'CPU variant detected: {kt_kernel.__cpu_variant__}'); print(f'Version: {kt_kernel.__version__}')"
|
||||
|
||||
- name: Verify wheel contains all variants
|
||||
working-directory: kt-kernel
|
||||
run: |
|
||||
echo "Checking wheel contents for CPU variants..."
|
||||
python -m zipfile -l dist/*.whl | grep "_kt_kernel_ext_" || echo "ERROR: No variant .so files found!"
|
||||
python -m zipfile -l dist/*.whl | grep "_kt_kernel_ext_amx.cpython" && echo "✓ AMX variant found" || echo "✗ AMX variant missing"
|
||||
python -m zipfile -l dist/*.whl | grep "_kt_kernel_ext_avx512.cpython" && echo "✓ AVX512 variant found" || echo "✗ AVX512 variant missing"
|
||||
python -m zipfile -l dist/*.whl | grep "_kt_kernel_ext_avx2.cpython" && echo "✓ AVX2 variant found" || echo "✗ AVX2 variant missing"
|
||||
|
||||
- name: Upload wheel artifact
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: kt-kernel-wheels-py${{ matrix.python-version }}
|
||||
path: kt-kernel/dist/*.whl
|
||||
retention-days: 7
|
||||
|
||||
publish-pypi:
|
||||
name: Publish to PyPI
|
||||
needs: build-kt-kernel
|
||||
runs-on: ubuntu-latest
|
||||
if: github.repository == 'kvcache-ai/ktransformers' && github.ref == 'refs/heads/main'
|
||||
environment: prod
|
||||
permissions:
|
||||
id-token: write # For trusted publishing (OIDC)
|
||||
contents: read
|
||||
|
||||
steps:
|
||||
- name: Download all wheel artifacts
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
path: artifacts/
|
||||
|
||||
- name: Organize wheels into dist/
|
||||
run: |
|
||||
mkdir -p dist/
|
||||
find artifacts/ -name "*.whl" -exec cp {} dist/ \;
|
||||
echo "Wheels to publish:"
|
||||
ls -lh dist/
|
||||
|
||||
- name: Get version from wheel
|
||||
id: get_version
|
||||
run: |
|
||||
# Extract version from first wheel filename
|
||||
wheel_name=$(ls dist/*.whl | head -1 | xargs basename)
|
||||
# Extract version (format: kt_kernel-X.Y.Z-...)
|
||||
version=$(echo "$wheel_name" | sed 's/kt_kernel-\([0-9.]*\)-.*/\1/')
|
||||
echo "VERSION=$version" >> $GITHUB_OUTPUT
|
||||
echo "Publishing version: $version"
|
||||
|
||||
- name: Publish to TestPyPI (if requested)
|
||||
if: github.event.inputs.test_pypi == 'true'
|
||||
uses: pypa/gh-action-pypi-publish@release/v1
|
||||
with:
|
||||
repository-url: https://test.pypi.org/legacy/
|
||||
password: ${{ secrets.TEST_PYPI_API_TOKEN }}
|
||||
skip-existing: true
|
||||
print-hash: true
|
||||
|
||||
- name: Publish to PyPI
|
||||
if: github.event.inputs.test_pypi != 'true'
|
||||
uses: pypa/gh-action-pypi-publish@release/v1
|
||||
with:
|
||||
password: ${{ secrets.PYPI_API_TOKEN }}
|
||||
skip-existing: true
|
||||
print-hash: true
|
||||
|
||||
- name: Create release summary
|
||||
run: |
|
||||
echo "## 🎉 kt-kernel v${{ steps.get_version.outputs.VERSION }} Published to PyPI" >> $GITHUB_STEP_SUMMARY
|
||||
echo "" >> $GITHUB_STEP_SUMMARY
|
||||
echo "### Installation" >> $GITHUB_STEP_SUMMARY
|
||||
echo '```bash' >> $GITHUB_STEP_SUMMARY
|
||||
echo "pip install kt-kernel==${{ steps.get_version.outputs.VERSION }}" >> $GITHUB_STEP_SUMMARY
|
||||
echo '```' >> $GITHUB_STEP_SUMMARY
|
||||
echo "" >> $GITHUB_STEP_SUMMARY
|
||||
echo "### Published Wheels" >> $GITHUB_STEP_SUMMARY
|
||||
echo "Total: $(ls -1 dist/*.whl | wc -l) wheels (3 Python versions: 3.10, 3.11, 3.12)" >> $GITHUB_STEP_SUMMARY
|
||||
echo "" >> $GITHUB_STEP_SUMMARY
|
||||
echo "### Features" >> $GITHUB_STEP_SUMMARY
|
||||
echo "**CPU-only build with multi-variant support:**" >> $GITHUB_STEP_SUMMARY
|
||||
echo "- ✅ AMX (Intel Sapphire Rapids+)" >> $GITHUB_STEP_SUMMARY
|
||||
echo "- ✅ AVX512 (Intel Skylake-X/Ice Lake/Cascade Lake)" >> $GITHUB_STEP_SUMMARY
|
||||
echo "- ✅ AVX2 (Maximum compatibility)" >> $GITHUB_STEP_SUMMARY
|
||||
echo "" >> $GITHUB_STEP_SUMMARY
|
||||
echo "**Runtime CPU detection:** Automatically selects the best variant for your CPU" >> $GITHUB_STEP_SUMMARY
|
||||
echo "" >> $GITHUB_STEP_SUMMARY
|
||||
echo "PyPI link: https://pypi.org/project/kt-kernel/#history" >> $GITHUB_STEP_SUMMARY
|
||||
24
.github/workflows/score.yml
vendored
24
.github/workflows/score.yml
vendored
@@ -1,24 +0,0 @@
|
||||
name: Human Eval Score
|
||||
run-name: Human Eval Score
|
||||
on: workflow_dispatch
|
||||
jobs:
|
||||
Human-Eval-Score:
|
||||
runs-on: self-hosted
|
||||
steps:
|
||||
- run: echo "🎉 The job was automatically triggered by a ${{ github.event_name }} event."
|
||||
- run: echo "🔎 The name of your branch is ${{ github.ref }} and your repository is ${{ github.repository }}."
|
||||
- name: Check out repository code
|
||||
uses: actions/checkout@v4
|
||||
- run: echo "💡 The ${{ github.repository }} repository has been cloned to the runner."
|
||||
- name: Human Eval Run
|
||||
run: |
|
||||
set -e
|
||||
source /home/qujing3/anaconda3/etc/profile.d/conda.sh
|
||||
conda activate ktransformers-dev
|
||||
export PATH=/usr/local/cuda-12.4/bin:$PATH
|
||||
export LD_LIBRARY_PATH=/usr/local/cuda-12.4/lib64:$LD_LIBRARY_PATH
|
||||
export CUDA_HOME=/usr/local/cuda-12.4
|
||||
cd ${{ github.workspace }}
|
||||
python ktransformers/tests/score.py
|
||||
|
||||
- run: echo "This job's status is ${{ job.status }}."
|
||||
408
docker/Dockerfile
Normal file
408
docker/Dockerfile
Normal file
@@ -0,0 +1,408 @@
|
||||
ARG CUDA_VERSION=12.8.1
|
||||
FROM docker.1ms.run/nvidia/cuda:${CUDA_VERSION}-cudnn-devel-ubuntu24.04 AS base
|
||||
|
||||
ARG TARGETARCH
|
||||
ARG GRACE_BLACKWELL=0
|
||||
ARG HOPPER_SBO=0
|
||||
ARG CPU_VARIANT=x86-intel-multi
|
||||
ARG BUILD_ALL_CPU_VARIANTS=1
|
||||
|
||||
# Proxy settings for build-time network access
|
||||
ARG HTTP_PROXY
|
||||
ARG HTTPS_PROXY
|
||||
ARG http_proxy
|
||||
ARG https_proxy
|
||||
ENV HTTP_PROXY=${HTTP_PROXY} \
|
||||
HTTPS_PROXY=${HTTPS_PROXY} \
|
||||
http_proxy=${http_proxy} \
|
||||
https_proxy=${https_proxy}
|
||||
|
||||
ARG GRACE_BLACKWELL_DEEPEP_BRANCH=gb200_blog_part_2
|
||||
ARG HOPPER_SBO_DEEPEP_COMMIT=9f2fc4b3182a51044ae7ecb6610f7c9c3258c4d6
|
||||
ARG DEEPEP_COMMIT=9af0e0d0e74f3577af1979c9b9e1ac2cad0104ee
|
||||
ARG BUILD_AND_DOWNLOAD_PARALLEL=8
|
||||
ARG SGL_KERNEL_VERSION=0.3.19
|
||||
ARG SGL_VERSION=0.5.6.post1
|
||||
ARG USE_LATEST_SGLANG=0
|
||||
ARG GDRCOPY_VERSION=2.5.1
|
||||
ARG UBUNTU_MIRROR
|
||||
ARG GITHUB_ARTIFACTORY=github.com
|
||||
ARG FLASHINFER_VERSION=0.5.3
|
||||
|
||||
# ktransformers wheel version (cu128torch28 for CUDA 12.8 + PyTorch 2.8)
|
||||
ARG KTRANSFORMERS_VERSION=0.4.2
|
||||
ARG KTRANSFORMERS_WHEEL=ktransformers-0.4.2+cu128torch28fancy-cp312-cp312-linux_x86_64.whl
|
||||
|
||||
# flash_attn wheel for fine-tune env
|
||||
ARG FLASH_ATTN_WHEEL=flash_attn-2.8.3+cu12torch2.8cxx11abiTRUE-cp312-cp312-linux_x86_64.whl
|
||||
|
||||
ENV DEBIAN_FRONTEND=noninteractive \
|
||||
CUDA_HOME=/usr/local/cuda \
|
||||
GDRCOPY_HOME=/usr/src/gdrdrv-${GDRCOPY_VERSION}/ \
|
||||
FLASHINFER_VERSION=${FLASHINFER_VERSION}
|
||||
|
||||
# Add GKE default lib and bin locations
|
||||
ENV PATH="${PATH}:/usr/local/nvidia/bin" \
|
||||
LD_LIBRARY_PATH="${LD_LIBRARY_PATH}:/usr/local/nvidia/lib:/usr/local/nvidia/lib64"
|
||||
|
||||
# Replace Ubuntu sources with Tsinghua mirror for Ubuntu 24.04 (noble)
|
||||
RUN if [ -n "$UBUNTU_MIRROR" ]; then \
|
||||
echo "deb https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ noble main restricted universe multiverse" > /etc/apt/sources.list && \
|
||||
echo "deb https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ noble-updates main restricted universe multiverse" >> /etc/apt/sources.list && \
|
||||
echo "deb https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ noble-backports main restricted universe multiverse" >> /etc/apt/sources.list && \
|
||||
echo "deb http://security.ubuntu.com/ubuntu/ noble-security main restricted universe multiverse" >> /etc/apt/sources.list && \
|
||||
rm -f /etc/apt/sources.list.d/ubuntu.sources; \
|
||||
fi
|
||||
|
||||
# Install system dependencies (organized by category for better caching)
|
||||
RUN --mount=type=cache,target=/var/cache/apt,id=base-apt \
|
||||
echo 'tzdata tzdata/Areas select America' | debconf-set-selections \
|
||||
&& echo 'tzdata tzdata/Zones/America select Los_Angeles' | debconf-set-selections \
|
||||
&& apt-get update && apt-get install -y --no-install-recommends --allow-change-held-packages \
|
||||
# Core system utilities
|
||||
tzdata \
|
||||
ca-certificates \
|
||||
software-properties-common \
|
||||
netcat-openbsd \
|
||||
kmod \
|
||||
unzip \
|
||||
openssh-server \
|
||||
curl \
|
||||
wget \
|
||||
lsof \
|
||||
locales \
|
||||
# Build essentials
|
||||
build-essential \
|
||||
cmake \
|
||||
perl \
|
||||
patchelf \
|
||||
ccache \
|
||||
git \
|
||||
git-lfs \
|
||||
# MPI and NUMA
|
||||
libopenmpi-dev \
|
||||
libnuma1 \
|
||||
libnuma-dev \
|
||||
numactl \
|
||||
# transformers multimodal VLM
|
||||
ffmpeg \
|
||||
# InfiniBand/RDMA
|
||||
libibverbs-dev \
|
||||
libibverbs1 \
|
||||
libibumad3 \
|
||||
librdmacm1 \
|
||||
libnl-3-200 \
|
||||
libnl-route-3-200 \
|
||||
libnl-route-3-dev \
|
||||
libnl-3-dev \
|
||||
ibverbs-providers \
|
||||
infiniband-diags \
|
||||
perftest \
|
||||
# Development libraries
|
||||
libgoogle-glog-dev \
|
||||
libgtest-dev \
|
||||
libjsoncpp-dev \
|
||||
libunwind-dev \
|
||||
libboost-all-dev \
|
||||
libssl-dev \
|
||||
libgrpc-dev \
|
||||
libgrpc++-dev \
|
||||
libprotobuf-dev \
|
||||
protobuf-compiler \
|
||||
protobuf-compiler-grpc \
|
||||
pybind11-dev \
|
||||
libhiredis-dev \
|
||||
libcurl4-openssl-dev \
|
||||
libczmq4 \
|
||||
libczmq-dev \
|
||||
libfabric-dev \
|
||||
# Package building tools
|
||||
devscripts \
|
||||
debhelper \
|
||||
fakeroot \
|
||||
dkms \
|
||||
check \
|
||||
libsubunit0 \
|
||||
libsubunit-dev \
|
||||
# Development tools
|
||||
gdb \
|
||||
ninja-build \
|
||||
vim \
|
||||
tmux \
|
||||
htop \
|
||||
zsh \
|
||||
tree \
|
||||
less \
|
||||
rdma-core \
|
||||
# NCCL
|
||||
libnccl2 \
|
||||
libnccl-dev \
|
||||
&& rm -rf /var/lib/apt/lists/* \
|
||||
&& apt-get clean
|
||||
|
||||
# GDRCopy installation
|
||||
RUN mkdir -p /tmp/gdrcopy && cd /tmp \
|
||||
&& curl --retry 3 --retry-delay 2 -fsSL -o v${GDRCOPY_VERSION}.tar.gz \
|
||||
https://${GITHUB_ARTIFACTORY}/NVIDIA/gdrcopy/archive/refs/tags/v${GDRCOPY_VERSION}.tar.gz \
|
||||
&& tar -xzf v${GDRCOPY_VERSION}.tar.gz && rm v${GDRCOPY_VERSION}.tar.gz \
|
||||
&& cd gdrcopy-${GDRCOPY_VERSION}/packages \
|
||||
&& CUDA=/usr/local/cuda ./build-deb-packages.sh \
|
||||
&& dpkg -i gdrdrv-dkms_*.deb libgdrapi_*.deb gdrcopy-tests_*.deb gdrcopy_*.deb \
|
||||
&& cd / && rm -rf /tmp/gdrcopy
|
||||
|
||||
# Fix DeepEP IBGDA symlink
|
||||
RUN ln -sf /usr/lib/$(uname -m)-linux-gnu/libmlx5.so.1 /usr/lib/$(uname -m)-linux-gnu/libmlx5.so
|
||||
|
||||
# Set up locale
|
||||
RUN locale-gen en_US.UTF-8
|
||||
ENV LANG=en_US.UTF-8 \
|
||||
LANGUAGE=en_US:en \
|
||||
LC_ALL=en_US.UTF-8
|
||||
|
||||
########################################################
|
||||
########## Install Miniconda ###########################
|
||||
########################################################
|
||||
|
||||
RUN mkdir -p /opt/miniconda3 \
|
||||
&& wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O /opt/miniconda3/miniconda.sh \
|
||||
&& bash /opt/miniconda3/miniconda.sh -b -u -p /opt/miniconda3 \
|
||||
&& rm /opt/miniconda3/miniconda.sh
|
||||
|
||||
# Add conda to PATH
|
||||
ENV PATH="/opt/miniconda3/bin:${PATH}"
|
||||
|
||||
# Accept conda TOS
|
||||
RUN conda tos accept --override-channels --channel https://repo.anaconda.com/pkgs/main \
|
||||
&& conda tos accept --override-channels --channel https://repo.anaconda.com/pkgs/r
|
||||
|
||||
# Configure conda to use Tsinghua mirror
|
||||
RUN conda config --add channels https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/main \
|
||||
&& conda config --add channels https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free \
|
||||
&& conda config --set show_channel_urls yes
|
||||
|
||||
########################################################
|
||||
########## Dual Conda Environment Setup ################
|
||||
########################################################
|
||||
|
||||
FROM base AS framework
|
||||
|
||||
ARG CUDA_VERSION
|
||||
ARG BUILD_AND_DOWNLOAD_PARALLEL
|
||||
ARG SGL_KERNEL_VERSION
|
||||
ARG SGL_VERSION
|
||||
ARG USE_LATEST_SGLANG
|
||||
ARG FLASHINFER_VERSION
|
||||
ARG GRACE_BLACKWELL
|
||||
ARG GRACE_BLACKWELL_DEEPEP_BRANCH
|
||||
ARG HOPPER_SBO
|
||||
ARG HOPPER_SBO_DEEPEP_COMMIT
|
||||
ARG DEEPEP_COMMIT
|
||||
ARG GITHUB_ARTIFACTORY
|
||||
ARG KTRANSFORMERS_VERSION
|
||||
ARG KTRANSFORMERS_WHEEL
|
||||
ARG FLASH_ATTN_WHEEL
|
||||
|
||||
WORKDIR /workspace
|
||||
|
||||
# Create two conda environments with Python 3.12
|
||||
RUN conda create -n serve python=3.12 -y \
|
||||
&& conda create -n fine-tune python=3.12 -y
|
||||
|
||||
# Set pip mirror for both conda envs
|
||||
RUN /opt/miniconda3/envs/serve/bin/pip config set global.index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple \
|
||||
&& /opt/miniconda3/envs/fine-tune/bin/pip config set global.index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
|
||||
|
||||
# Clone repositories
|
||||
# Use kvcache-ai/sglang fork with kimi_k2 branch
|
||||
RUN git clone https://${GITHUB_ARTIFACTORY}/kvcache-ai/sglang.git /workspace/sglang \
|
||||
&& cd /workspace/sglang && git checkout kimi_k2
|
||||
|
||||
RUN git clone --depth 1 https://${GITHUB_ARTIFACTORY}/hiyouga/LLaMA-Factory.git /workspace/LLaMA-Factory \
|
||||
&& git clone --depth 1 https://${GITHUB_ARTIFACTORY}/kvcache-ai/ktransformers.git /workspace/ktransformers \
|
||||
&& cd /workspace/ktransformers && git submodule update --init --recursive
|
||||
|
||||
# Download ktransformers wheel and flash_attn wheel for fine-tune env
|
||||
RUN curl --retry 3 --retry-delay 2 -fsSL -o /workspace/${KTRANSFORMERS_WHEEL} \
|
||||
https://${GITHUB_ARTIFACTORY}/kvcache-ai/ktransformers/releases/download/v${KTRANSFORMERS_VERSION}/${KTRANSFORMERS_WHEEL} \
|
||||
&& curl --retry 3 --retry-delay 2 -fsSL -o /workspace/${FLASH_ATTN_WHEEL} \
|
||||
https://${GITHUB_ARTIFACTORY}/Dao-AILab/flash-attention/releases/download/v2.8.3/${FLASH_ATTN_WHEEL}
|
||||
|
||||
########################################################
|
||||
# Environment 1: serve (sglang + kt-kernel)
|
||||
########################################################
|
||||
|
||||
# Upgrade pip and install basic tools in serve env
|
||||
RUN --mount=type=cache,target=/root/.cache/pip \
|
||||
/opt/miniconda3/envs/serve/bin/pip install --upgrade pip setuptools wheel html5lib six
|
||||
|
||||
# Install sgl-kernel
|
||||
RUN --mount=type=cache,target=/root/.cache/pip \
|
||||
case "$CUDA_VERSION" in \
|
||||
12.6.1) CUINDEX=126 ;; \
|
||||
12.8.1) CUINDEX=128 ;; \
|
||||
12.9.1) CUINDEX=129 ;; \
|
||||
13.0.1) CUINDEX=130 ;; \
|
||||
*) echo "Unsupported CUDA version: $CUDA_VERSION" && exit 1 ;; \
|
||||
esac \
|
||||
&& if [ "$CUDA_VERSION" = "12.6.1" ]; then \
|
||||
/opt/miniconda3/envs/serve/bin/pip install https://${GITHUB_ARTIFACTORY}/sgl-project/whl/releases/download/v${SGL_KERNEL_VERSION}/sgl_kernel-${SGL_KERNEL_VERSION}+cu124-cp310-abi3-manylinux2014_$(uname -m).whl --force-reinstall --no-deps \
|
||||
; \
|
||||
elif [ "$CUDA_VERSION" = "12.8.1" ] || [ "$CUDA_VERSION" = "12.9.1" ]; then \
|
||||
/opt/miniconda3/envs/serve/bin/pip install sgl-kernel==${SGL_KERNEL_VERSION} \
|
||||
; \
|
||||
elif [ "$CUDA_VERSION" = "13.0.1" ]; then \
|
||||
/opt/miniconda3/envs/serve/bin/pip install https://github.com/sgl-project/whl/releases/download/v${SGL_KERNEL_VERSION}/sgl_kernel-${SGL_KERNEL_VERSION}+cu130-cp310-abi3-manylinux2014_$(uname -m).whl --force-reinstall --no-deps \
|
||||
; \
|
||||
fi
|
||||
|
||||
# Install SGLang in serve env
|
||||
RUN --mount=type=cache,target=/root/.cache/pip \
|
||||
case "$CUDA_VERSION" in \
|
||||
12.6.1) CUINDEX=126 ;; \
|
||||
12.8.1) CUINDEX=128 ;; \
|
||||
12.9.1) CUINDEX=129 ;; \
|
||||
13.0.1) CUINDEX=130 ;; \
|
||||
esac \
|
||||
&& cd /workspace/sglang \
|
||||
&& /opt/miniconda3/envs/serve/bin/pip install -e "python[all]" --extra-index-url https://download.pytorch.org/whl/cu${CUINDEX}
|
||||
|
||||
# Download FlashInfer cubin for serve env
|
||||
RUN --mount=type=cache,target=/root/.cache/pip \
|
||||
FLASHINFER_CUBIN_DOWNLOAD_THREADS=${BUILD_AND_DOWNLOAD_PARALLEL} FLASHINFER_LOGGING_LEVEL=warning \
|
||||
/opt/miniconda3/envs/serve/bin/python -m flashinfer --download-cubin
|
||||
|
||||
# Install DeepEP in serve env
|
||||
RUN set -eux; \
|
||||
if [ "$GRACE_BLACKWELL" = "1" ]; then \
|
||||
git clone https://github.com/fzyzcjy/DeepEP.git /workspace/DeepEP && \
|
||||
cd /workspace/DeepEP && \
|
||||
git checkout ${GRACE_BLACKWELL_DEEPEP_BRANCH} && \
|
||||
sed -i 's/#define NUM_CPU_TIMEOUT_SECS 100/#define NUM_CPU_TIMEOUT_SECS 1000/' csrc/kernels/configs.cuh; \
|
||||
elif [ "$HOPPER_SBO" = "1" ]; then \
|
||||
git clone https://github.com/deepseek-ai/DeepEP.git -b antgroup-opt /workspace/DeepEP && \
|
||||
cd /workspace/DeepEP && \
|
||||
git checkout ${HOPPER_SBO_DEEPEP_COMMIT} && \
|
||||
sed -i 's/#define NUM_CPU_TIMEOUT_SECS 100/#define NUM_CPU_TIMEOUT_SECS 1000/' csrc/kernels/configs.cuh; \
|
||||
else \
|
||||
curl --retry 3 --retry-delay 2 -fsSL -o /tmp/${DEEPEP_COMMIT}.zip \
|
||||
https://${GITHUB_ARTIFACTORY}/deepseek-ai/DeepEP/archive/${DEEPEP_COMMIT}.zip && \
|
||||
unzip -q /tmp/${DEEPEP_COMMIT}.zip -d /tmp && rm /tmp/${DEEPEP_COMMIT}.zip && \
|
||||
mv /tmp/DeepEP-${DEEPEP_COMMIT} /workspace/DeepEP && \
|
||||
cd /workspace/DeepEP && \
|
||||
sed -i 's/#define NUM_CPU_TIMEOUT_SECS 100/#define NUM_CPU_TIMEOUT_SECS 1000/' csrc/kernels/configs.cuh; \
|
||||
fi
|
||||
|
||||
RUN --mount=type=cache,target=/root/.cache/pip \
|
||||
cd /workspace/DeepEP && \
|
||||
case "$CUDA_VERSION" in \
|
||||
12.6.1) CHOSEN_TORCH_CUDA_ARCH_LIST='9.0' ;; \
|
||||
12.8.1) CHOSEN_TORCH_CUDA_ARCH_LIST='9.0;10.0' ;; \
|
||||
12.9.1|13.0.1) CHOSEN_TORCH_CUDA_ARCH_LIST='9.0;10.0;10.3' ;; \
|
||||
*) echo "Unsupported CUDA version: $CUDA_VERSION" && exit 1 ;; \
|
||||
esac && \
|
||||
. /opt/miniconda3/etc/profile.d/conda.sh && conda activate serve && \
|
||||
TORCH_CUDA_ARCH_LIST="${CHOSEN_TORCH_CUDA_ARCH_LIST}" MAX_JOBS=${BUILD_AND_DOWNLOAD_PARALLEL} \
|
||||
pip install --no-build-isolation .
|
||||
|
||||
# Install NCCL for serve env
|
||||
RUN --mount=type=cache,target=/root/.cache/pip \
|
||||
if [ "${CUDA_VERSION%%.*}" = "12" ]; then \
|
||||
/opt/miniconda3/envs/serve/bin/pip install nvidia-nccl-cu12==2.28.3 --force-reinstall --no-deps ; \
|
||||
elif [ "${CUDA_VERSION%%.*}" = "13" ]; then \
|
||||
/opt/miniconda3/envs/serve/bin/pip install nvidia-nccl-cu13==2.28.3 --force-reinstall --no-deps ; \
|
||||
fi
|
||||
|
||||
# Install kt-kernel in serve env with all CPU variants
|
||||
RUN . /opt/miniconda3/etc/profile.d/conda.sh && conda activate serve \
|
||||
&& cd /workspace/ktransformers/kt-kernel \
|
||||
&& CPUINFER_BUILD_ALL_VARIANTS=1 ./install.sh build
|
||||
|
||||
########################################################
|
||||
# Environment 2: fine-tune (LLaMA-Factory + ktransformers)
|
||||
########################################################
|
||||
|
||||
# Install dependency libraries for ktransformers (CUDA 11.8 runtime required)
|
||||
RUN conda install -n fine-tune -y -c conda-forge libstdcxx-ng gcc_impl_linux-64 \
|
||||
&& conda install -n fine-tune -y -c nvidia/label/cuda-11.8.0 cuda-runtime
|
||||
|
||||
# Install PyTorch 2.8 in fine-tune env
|
||||
RUN --mount=type=cache,target=/root/.cache/pip \
|
||||
case "$CUDA_VERSION" in \
|
||||
12.6.1) CUINDEX=126 ;; \
|
||||
12.8.1) CUINDEX=128 ;; \
|
||||
12.9.1) CUINDEX=129 ;; \
|
||||
13.0.1) CUINDEX=130 ;; \
|
||||
esac \
|
||||
&& /opt/miniconda3/envs/fine-tune/bin/pip install --upgrade pip setuptools wheel \
|
||||
&& /opt/miniconda3/envs/fine-tune/bin/pip install \
|
||||
torch==2.8.0 \
|
||||
torchvision \
|
||||
torchaudio \
|
||||
--extra-index-url https://download.pytorch.org/whl/cu${CUINDEX}
|
||||
|
||||
# Install LLaMA-Factory in fine-tune env
|
||||
RUN --mount=type=cache,target=/root/.cache/pip \
|
||||
cd /workspace/LLaMA-Factory \
|
||||
&& /opt/miniconda3/envs/fine-tune/bin/pip install -e ".[torch,metrics]" --no-build-isolation
|
||||
|
||||
# Install ktransformers wheel in fine-tune env
|
||||
RUN --mount=type=cache,target=/root/.cache/pip \
|
||||
/opt/miniconda3/envs/fine-tune/bin/pip install /workspace/${KTRANSFORMERS_WHEEL}
|
||||
|
||||
# Install flash_attn wheel in fine-tune env
|
||||
RUN --mount=type=cache,target=/root/.cache/pip \
|
||||
/opt/miniconda3/envs/fine-tune/bin/pip install /workspace/${FLASH_ATTN_WHEEL}
|
||||
|
||||
# Install NCCL for fine-tune env
|
||||
RUN --mount=type=cache,target=/root/.cache/pip \
|
||||
if [ "${CUDA_VERSION%%.*}" = "12" ]; then \
|
||||
/opt/miniconda3/envs/fine-tune/bin/pip install nvidia-nccl-cu12==2.28.3 --force-reinstall --no-deps ; \
|
||||
elif [ "${CUDA_VERSION%%.*}" = "13" ]; then \
|
||||
/opt/miniconda3/envs/fine-tune/bin/pip install nvidia-nccl-cu13==2.28.3 --force-reinstall --no-deps ; \
|
||||
fi
|
||||
|
||||
########################################################
|
||||
# Cleanup and final setup
|
||||
########################################################
|
||||
|
||||
# Clean up downloaded wheels
|
||||
RUN rm -f /workspace/${KTRANSFORMERS_WHEEL} /workspace/${FLASH_ATTN_WHEEL}
|
||||
|
||||
# Initialize conda for bash
|
||||
RUN /opt/miniconda3/bin/conda init bash
|
||||
|
||||
# Create shell aliases for convenience
|
||||
RUN echo '\n# Conda environment aliases\nalias serve="conda activate serve"\nalias finetune="conda activate fine-tune"' >> /root/.bashrc
|
||||
|
||||
########################################################
|
||||
# Extract version information for image naming
|
||||
########################################################
|
||||
|
||||
# Extract versions from each component and save to versions.env
|
||||
RUN set -x && \
|
||||
# SGLang version (from version.py file)
|
||||
cd /workspace/sglang/python/sglang && \
|
||||
SGLANG_VERSION=$(python3 -c "exec(open('version.py').read()); print(__version__)" 2>/dev/null || echo "unknown") && \
|
||||
echo "SGLANG_VERSION=$SGLANG_VERSION" > /workspace/versions.env && \
|
||||
echo "Extracted SGLang version: $SGLANG_VERSION" && \
|
||||
\
|
||||
# KTransformers version (from version.py in repo)
|
||||
cd /workspace/ktransformers && \
|
||||
KTRANSFORMERS_VERSION=$(python3 -c "exec(open('version.py').read()); print(__version__)" 2>/dev/null || echo "unknown") && \
|
||||
echo "KTRANSFORMERS_VERSION=$KTRANSFORMERS_VERSION" >> /workspace/versions.env && \
|
||||
echo "Extracted KTransformers version: $KTRANSFORMERS_VERSION" && \
|
||||
\
|
||||
# LLaMA-Factory version (from fine-tune environment)
|
||||
. /opt/miniconda3/etc/profile.d/conda.sh && conda activate fine-tune && \
|
||||
cd /workspace/LLaMA-Factory && \
|
||||
LLAMAFACTORY_VERSION=$(python -c "import sys; sys.path.insert(0, 'src'); from llamafactory import __version__; print(__version__)" 2>/dev/null || echo "unknown") && \
|
||||
echo "LLAMAFACTORY_VERSION=$LLAMAFACTORY_VERSION" >> /workspace/versions.env && \
|
||||
echo "Extracted LLaMA-Factory version: $LLAMAFACTORY_VERSION" && \
|
||||
\
|
||||
# Display all versions
|
||||
echo "=== Version Summary ===" && \
|
||||
cat /workspace/versions.env
|
||||
|
||||
WORKDIR /workspace
|
||||
|
||||
CMD ["/bin/bash"]
|
||||
387
docker/README-packaging.md
Normal file
387
docker/README-packaging.md
Normal file
@@ -0,0 +1,387 @@
|
||||
# KTransformers Docker Packaging Guide
|
||||
|
||||
This directory contains scripts for building and distributing KTransformers Docker images with standardized naming conventions.
|
||||
|
||||
## Overview
|
||||
|
||||
The packaging system provides:
|
||||
|
||||
- **Automated version detection** from sglang, ktransformers, and LLaMA-Factory
|
||||
- **Multi-CPU variant support** (AMX, AVX512, AVX2) with runtime auto-detection
|
||||
- **Standardized naming convention** for easy identification and management
|
||||
- **Two distribution methods**:
|
||||
- Local tar file export for offline distribution
|
||||
- DockerHub publishing for online distribution
|
||||
|
||||
## Naming Convention
|
||||
|
||||
Docker images follow this naming pattern:
|
||||
|
||||
```
|
||||
sglang-v{sglang版本}_ktransformers-v{ktransformers版本}_{cpu信息}_{gpu信息}_{功能模式}_{时间戳}
|
||||
```
|
||||
|
||||
### Example Names
|
||||
|
||||
**Tar file:**
|
||||
```
|
||||
sglang-v0.5.6_ktransformers-v0.4.3_x86-intel-multi_cu128_sft_llamafactory-v0.9.3_20241212143022.tar
|
||||
```
|
||||
|
||||
**DockerHub tags:**
|
||||
```
|
||||
Full tag:
|
||||
kvcache/ktransformers:sglang-v0.5.6_ktransformers-v0.4.3_x86-intel-multi_cu128_sft_llamafactory-v0.9.3_20241212143022
|
||||
|
||||
Simplified tag:
|
||||
kvcache/ktransformers:v0.4.3-cu128
|
||||
```
|
||||
|
||||
### Name Components
|
||||
|
||||
| Component | Description | Example |
|
||||
|-----------|-------------|---------|
|
||||
| sglang version | SGLang package version | `v0.5.6` |
|
||||
| ktransformers version | KTransformers version | `v0.4.3` |
|
||||
| cpu info | CPU instruction set support | `x86-intel-multi` (includes AMX/AVX512/AVX2) |
|
||||
| gpu info | CUDA version | `cu128` (CUDA 12.8) |
|
||||
| functionality | Feature mode | `sft_llamafactory-v0.9.3` or `infer` |
|
||||
| timestamp | Build time (Beijing/UTC+8) | `20241212143022` |
|
||||
|
||||
## Files
|
||||
|
||||
| File | Purpose |
|
||||
|------|---------|
|
||||
| `Dockerfile` | Main Dockerfile with multi-CPU build and version extraction |
|
||||
| `docker-utils.sh` | Shared utility functions for both scripts |
|
||||
| `build-docker-tar.sh` | Build and export Docker image to tar file |
|
||||
| `push-to-dockerhub.sh` | Build and push Docker image to DockerHub |
|
||||
|
||||
## Prerequisites
|
||||
|
||||
- Docker installed and running
|
||||
- For DockerHub push: Docker Hub account and login (`docker login`)
|
||||
- Sufficient disk space (at least 20GB recommended)
|
||||
- Internet access (or local mirrors configured)
|
||||
|
||||
## Quick Start
|
||||
|
||||
### Build Local Tar File
|
||||
|
||||
```bash
|
||||
cd docker
|
||||
|
||||
# Basic build
|
||||
./build-docker-tar.sh
|
||||
|
||||
# With specific CUDA version and mirror
|
||||
./build-docker-tar.sh \
|
||||
--cuda-version 12.8.1 \
|
||||
--ubuntu-mirror 1
|
||||
|
||||
# With proxy
|
||||
./build-docker-tar.sh \
|
||||
--cuda-version 12.8.1 \
|
||||
--ubuntu-mirror 1 \
|
||||
--http-proxy "http://127.0.0.1:16981" \
|
||||
--https-proxy "http://127.0.0.1:16981" \
|
||||
--output-dir /path/to/output
|
||||
```
|
||||
|
||||
### Push to DockerHub
|
||||
|
||||
```bash
|
||||
cd docker
|
||||
|
||||
# Basic push (requires --repository)
|
||||
./push-to-dockerhub.sh \
|
||||
--repository kvcache/ktransformers
|
||||
|
||||
# With simplified tag
|
||||
./push-to-dockerhub.sh \
|
||||
--cuda-version 12.8.1 \
|
||||
--repository kvcache/ktransformers \
|
||||
--also-push-simplified
|
||||
|
||||
# Skip build if image exists
|
||||
./push-to-dockerhub.sh \
|
||||
--repository kvcache/ktransformers \
|
||||
--skip-build
|
||||
```
|
||||
|
||||
## Script Options
|
||||
|
||||
### build-docker-tar.sh
|
||||
|
||||
```
|
||||
Build Configuration:
|
||||
--cuda-version VERSION CUDA version (default: 12.8.1)
|
||||
--ubuntu-mirror 0|1 Use Tsinghua mirror (default: 0)
|
||||
--http-proxy URL HTTP proxy URL
|
||||
--https-proxy URL HTTPS proxy URL
|
||||
--cpu-variant VARIANT CPU variant (default: x86-intel-multi)
|
||||
--functionality TYPE Mode: sft or infer (default: sft)
|
||||
|
||||
Paths:
|
||||
--dockerfile PATH Path to Dockerfile (default: ./Dockerfile)
|
||||
--context-dir PATH Build context directory (default: .)
|
||||
--output-dir PATH Output directory for tar (default: .)
|
||||
|
||||
Options:
|
||||
--dry-run Preview without building
|
||||
--keep-image Keep Docker image after export
|
||||
--build-arg KEY=VALUE Additional build arguments
|
||||
-h, --help Show help message
|
||||
```
|
||||
|
||||
### push-to-dockerhub.sh
|
||||
|
||||
```
|
||||
All options from build-docker-tar.sh, plus:
|
||||
|
||||
Registry Settings:
|
||||
--registry REGISTRY Docker registry (default: docker.io)
|
||||
--repository REPO Repository name (REQUIRED)
|
||||
|
||||
Options:
|
||||
--skip-build Skip build if image exists
|
||||
--also-push-simplified Also push simplified tag
|
||||
--max-retries N Max push retries (default: 3)
|
||||
--retry-delay SECONDS Delay between retries (default: 5)
|
||||
```
|
||||
|
||||
## Usage Examples
|
||||
|
||||
### Example 1: Local Development Build
|
||||
|
||||
For testing on your local machine:
|
||||
|
||||
```bash
|
||||
./build-docker-tar.sh \
|
||||
--cuda-version 12.8.1 \
|
||||
--output-dir ./builds \
|
||||
--keep-image
|
||||
```
|
||||
|
||||
This will:
|
||||
1. Build the Docker image
|
||||
2. Export to tar in `./builds/` directory
|
||||
3. Keep the Docker image for local testing
|
||||
|
||||
### Example 2: Production Build for Distribution
|
||||
|
||||
For creating a production build with mirrors and proxy:
|
||||
|
||||
```bash
|
||||
./build-docker-tar.sh \
|
||||
--cuda-version 12.8.1 \
|
||||
--ubuntu-mirror 1 \
|
||||
--http-proxy "http://127.0.0.1:16981" \
|
||||
--https-proxy "http://127.0.0.1:16981" \
|
||||
--output-dir /mnt/data/releases
|
||||
```
|
||||
|
||||
### Example 3: Publish to DockerHub
|
||||
|
||||
For publishing to DockerHub:
|
||||
|
||||
```bash
|
||||
# First, login to Docker Hub
|
||||
docker login
|
||||
|
||||
# Then push
|
||||
./push-to-dockerhub.sh \
|
||||
--cuda-version 12.8.1 \
|
||||
--repository kvcache/ktransformers \
|
||||
--also-push-simplified
|
||||
```
|
||||
|
||||
This creates two tags:
|
||||
- Full: `kvcache/ktransformers:sglang-v0.5.6_ktransformers-v0.4.3_x86-intel-multi_cu128_sft_llamafactory-v0.9.3_20241212143022`
|
||||
- Simplified: `kvcache/ktransformers:v0.4.3-cu128`
|
||||
|
||||
### Example 4: Dry Run
|
||||
|
||||
Preview the build without actually building:
|
||||
|
||||
```bash
|
||||
./build-docker-tar.sh --cuda-version 12.8.1 --dry-run
|
||||
```
|
||||
|
||||
### Example 5: Custom Build Arguments
|
||||
|
||||
Pass additional Docker build arguments:
|
||||
|
||||
```bash
|
||||
./build-docker-tar.sh \
|
||||
--cuda-version 12.8.1 \
|
||||
--build-arg SGL_VERSION=0.5.7 \
|
||||
--build-arg FLASHINFER_VERSION=0.5.4
|
||||
```
|
||||
|
||||
## Using the Built Images
|
||||
|
||||
### Load from Tar File
|
||||
|
||||
```bash
|
||||
# Load the image
|
||||
docker load -i sglang-v0.5.6_ktransformers-v0.4.3_x86-intel-multi_cu128_sft_llamafactory-v0.9.3_20241212143022.tar
|
||||
|
||||
# Run the container
|
||||
docker run -it --rm \
|
||||
--gpus all \
|
||||
sglang-v0.5.6_ktransformers-v0.4.3_x86-intel-multi_cu128_sft_llamafactory-v0.9.3_20241212143022 \
|
||||
/bin/bash
|
||||
```
|
||||
|
||||
### Pull from DockerHub
|
||||
|
||||
```bash
|
||||
# Pull with full tag
|
||||
docker pull kvcache/ktransformers:sglang-v0.5.6_ktransformers-v0.4.3_x86-intel-multi_cu128_sft_llamafactory-v0.9.3_20241212143022
|
||||
|
||||
# Or pull with simplified tag
|
||||
docker pull kvcache/ktransformers:v0.4.3-cu128
|
||||
|
||||
# Run the container
|
||||
docker run -it --rm \
|
||||
--gpus all \
|
||||
kvcache/ktransformers:v0.4.3-cu128 \
|
||||
/bin/bash
|
||||
```
|
||||
|
||||
### Inside the Container
|
||||
|
||||
The image contains two conda environments:
|
||||
|
||||
```bash
|
||||
# Activate serve environment (for inference with sglang)
|
||||
conda activate serve
|
||||
# or use the alias:
|
||||
serve
|
||||
|
||||
# Activate fine-tune environment (for training with LLaMA-Factory)
|
||||
conda activate fine-tune
|
||||
# or use the alias:
|
||||
finetune
|
||||
```
|
||||
|
||||
## Multi-CPU Variant Support
|
||||
|
||||
The Docker image includes all three CPU variants:
|
||||
- **AMX** - For Intel Sapphire Rapids and newer (4th Gen Xeon+)
|
||||
- **AVX512** - For Intel Skylake-X, Ice Lake, Cascade Lake
|
||||
- **AVX2** - Maximum compatibility for older CPUs
|
||||
|
||||
The runtime automatically detects your CPU and loads the appropriate variant. To override:
|
||||
|
||||
```bash
|
||||
# Force use of AVX2 variant
|
||||
export KT_KERNEL_CPU_VARIANT=avx2
|
||||
python your_script.py
|
||||
|
||||
# Enable debug output to see which variant is loaded
|
||||
export KT_KERNEL_DEBUG=1
|
||||
python your_script.py
|
||||
```
|
||||
|
||||
## Version Extraction
|
||||
|
||||
Versions are automatically extracted during Docker build from:
|
||||
|
||||
- **SGLang**: From `sglang.__version__` in serve environment
|
||||
- **KTransformers**: From `version.py` in ktransformers repository
|
||||
- **LLaMA-Factory**: From `llamafactory.__version__` in fine-tune environment
|
||||
|
||||
The versions are saved to `/workspace/versions.env` in the image:
|
||||
|
||||
```bash
|
||||
# View versions in running container
|
||||
cat /workspace/versions.env
|
||||
|
||||
# Output:
|
||||
SGLANG_VERSION=0.5.6
|
||||
KTRANSFORMERS_VERSION=0.4.3
|
||||
LLAMAFACTORY_VERSION=0.9.3
|
||||
```
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### Build Fails with Out of Disk Space
|
||||
|
||||
Check available disk space:
|
||||
```bash
|
||||
df -h
|
||||
```
|
||||
|
||||
The build requires approximately 15-20GB of disk space. Clean up Docker:
|
||||
```bash
|
||||
docker system prune -a
|
||||
```
|
||||
|
||||
### Version Extraction Fails
|
||||
|
||||
If version extraction fails (shows "unknown"), check:
|
||||
|
||||
1. The cloned repositories have the correct branches
|
||||
2. Python packages are properly installed in conda environments
|
||||
3. Version files exist in expected locations
|
||||
|
||||
You can manually verify by running:
|
||||
```bash
|
||||
docker run --rm <image> /bin/bash -c "
|
||||
source /opt/miniconda3/etc/profile.d/conda.sh &&
|
||||
conda activate serve &&
|
||||
python -c 'import sglang; print(sglang.__version__)'
|
||||
"
|
||||
```
|
||||
|
||||
### Push to DockerHub Fails
|
||||
|
||||
1. **Check login**: `docker login`
|
||||
2. **Check repository name**: Must include namespace (e.g., `kvcache/ktransformers`, not just `ktransformers`)
|
||||
3. **Network issues**: Use `--max-retries` and `--retry-delay` options
|
||||
4. **Rate limiting**: DockerHub has pull/push rate limits for free accounts
|
||||
|
||||
## Advanced Topics
|
||||
|
||||
### Custom Dockerfile Location
|
||||
|
||||
```bash
|
||||
./build-docker-tar.sh \
|
||||
--dockerfile /path/to/custom/Dockerfile \
|
||||
--context-dir /path/to/build/context
|
||||
```
|
||||
|
||||
### Building Only Inference Image (Future)
|
||||
|
||||
Currently, the image always includes both serve and fine-tune environments. To create an inference-only image, modify the Dockerfile to skip the fine-tune environment section.
|
||||
|
||||
### Customizing CPU Variants
|
||||
|
||||
To build only specific CPU variants, modify `kt-kernel/install.sh` or set environment variables in the Dockerfile.
|
||||
|
||||
### CI/CD Integration
|
||||
|
||||
The scripts are designed for manual execution but can be integrated into CI/CD pipelines:
|
||||
|
||||
```yaml
|
||||
# Example GitHub Actions workflow
|
||||
- name: Build and push Docker image
|
||||
run: |
|
||||
cd docker
|
||||
./push-to-dockerhub.sh \
|
||||
--cuda-version ${{ matrix.cuda_version }} \
|
||||
--repository ${{ secrets.DOCKER_REPOSITORY }} \
|
||||
--also-push-simplified
|
||||
```
|
||||
|
||||
## Support
|
||||
|
||||
For issues and questions:
|
||||
- File an issue at: https://github.com/kvcache-ai/ktransformers/issues
|
||||
- Check documentation: https://github.com/kvcache-ai/ktransformers
|
||||
|
||||
## License
|
||||
|
||||
This packaging system is part of KTransformers and follows the same license.
|
||||
498
docker/build-docker-tar.sh
Executable file
498
docker/build-docker-tar.sh
Executable file
@@ -0,0 +1,498 @@
|
||||
#!/usr/bin/env bash
|
||||
#
|
||||
# build-docker-tar.sh - Build Docker image and export to tar file
|
||||
#
|
||||
# This script builds a Docker image for ktransformers with standardized naming
|
||||
# and exports it to a tar file for distribution.
|
||||
#
|
||||
# Features:
|
||||
# - Automatic version detection from built image
|
||||
# - Standardized naming convention
|
||||
# - Multi-CPU variant support (AMX/AVX512/AVX2)
|
||||
# - Configurable build parameters
|
||||
# - Comprehensive error handling
|
||||
#
|
||||
# Usage:
|
||||
# ./build-docker-tar.sh [OPTIONS]
|
||||
#
|
||||
# Example:
|
||||
# ./build-docker-tar.sh \
|
||||
# --cuda-version 12.8.1 \
|
||||
# --ubuntu-mirror 1 \
|
||||
# --http-proxy "http://127.0.0.1:16981" \
|
||||
# --output-dir /path/to/output
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
# Get script directory
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
|
||||
# Source utility functions
|
||||
# shellcheck source=docker-utils.sh
|
||||
source "$SCRIPT_DIR/docker-utils.sh"
|
||||
|
||||
################################################################################
|
||||
# Default Configuration
|
||||
################################################################################
|
||||
|
||||
# Build parameters
|
||||
CUDA_VERSION="12.8.1"
|
||||
UBUNTU_MIRROR="0"
|
||||
HTTP_PROXY=""
|
||||
HTTPS_PROXY=""
|
||||
CPU_VARIANT="x86-intel-multi"
|
||||
FUNCTIONALITY="sft"
|
||||
|
||||
# Paths
|
||||
DOCKERFILE="$SCRIPT_DIR/Dockerfile"
|
||||
CONTEXT_DIR="$SCRIPT_DIR"
|
||||
OUTPUT_DIR="."
|
||||
|
||||
# Options
|
||||
DRY_RUN=false
|
||||
KEEP_IMAGE=false
|
||||
EXTRA_BUILD_ARGS=()
|
||||
|
||||
################################################################################
|
||||
# Help Message
|
||||
################################################################################
|
||||
|
||||
usage() {
|
||||
cat <<EOF
|
||||
Usage: $0 [OPTIONS]
|
||||
|
||||
Build Docker image and export to tar file with standardized naming.
|
||||
|
||||
OPTIONS:
|
||||
Build Configuration:
|
||||
--cuda-version VERSION CUDA version (default: 12.8.1)
|
||||
Examples: 12.8.1, 12.6.1, 13.0.1
|
||||
|
||||
--ubuntu-mirror 0|1 Use Tsinghua mirror for Ubuntu packages
|
||||
(default: 0)
|
||||
|
||||
--http-proxy URL HTTP proxy URL
|
||||
Example: http://127.0.0.1:16981
|
||||
|
||||
--https-proxy URL HTTPS proxy URL
|
||||
Example: http://127.0.0.1:16981
|
||||
|
||||
--cpu-variant VARIANT CPU variant identifier
|
||||
(default: x86-intel-multi)
|
||||
|
||||
--functionality TYPE Functionality mode: sft or infer
|
||||
(default: sft, includes LLaMA-Factory)
|
||||
|
||||
Paths:
|
||||
--dockerfile PATH Path to Dockerfile
|
||||
(default: ./Dockerfile)
|
||||
|
||||
--context-dir PATH Docker build context directory
|
||||
(default: .)
|
||||
|
||||
--output-dir PATH Output directory for tar file
|
||||
(default: current directory)
|
||||
|
||||
Options:
|
||||
--dry-run Preview build command without executing
|
||||
--keep-image Keep Docker image after exporting tar
|
||||
--build-arg KEY=VALUE Additional build arguments (can be repeated)
|
||||
-h, --help Show this help message
|
||||
|
||||
EXAMPLES:
|
||||
# Basic build with default settings
|
||||
$0
|
||||
|
||||
# Build with CUDA 12.8.1 and mirror
|
||||
$0 --cuda-version 12.8.1 --ubuntu-mirror 1
|
||||
|
||||
# Build with proxy and custom output directory
|
||||
$0 \\
|
||||
--cuda-version 12.8.1 \\
|
||||
--http-proxy "http://127.0.0.1:16981" \\
|
||||
--https-proxy "http://127.0.0.1:16981" \\
|
||||
--output-dir /mnt/data/docker-images
|
||||
|
||||
# Dry run to preview
|
||||
$0 --cuda-version 12.8.1 --dry-run
|
||||
|
||||
OUTPUT:
|
||||
The tar file will be named following the convention:
|
||||
sglang-v{ver}_ktransformers-v{ver}_{cpu}_{gpu}_{func}_{timestamp}.tar
|
||||
|
||||
Example: sglang-v0.5.6_ktransformers-v0.4.3_x86-intel-multi_cu128_sft_llamafactory-v0.9.3_20241212143022.tar
|
||||
|
||||
EOF
|
||||
exit 0
|
||||
}
|
||||
|
||||
################################################################################
|
||||
# Argument Parsing
|
||||
################################################################################
|
||||
|
||||
parse_args() {
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case "$1" in
|
||||
--cuda-version)
|
||||
CUDA_VERSION="$2"
|
||||
shift 2
|
||||
;;
|
||||
--ubuntu-mirror)
|
||||
UBUNTU_MIRROR="$2"
|
||||
shift 2
|
||||
;;
|
||||
--http-proxy)
|
||||
HTTP_PROXY="$2"
|
||||
shift 2
|
||||
;;
|
||||
--https-proxy)
|
||||
HTTPS_PROXY="$2"
|
||||
shift 2
|
||||
;;
|
||||
--cpu-variant)
|
||||
CPU_VARIANT="$2"
|
||||
shift 2
|
||||
;;
|
||||
--functionality)
|
||||
FUNCTIONALITY="$2"
|
||||
shift 2
|
||||
;;
|
||||
--dockerfile)
|
||||
DOCKERFILE="$2"
|
||||
shift 2
|
||||
;;
|
||||
--context-dir)
|
||||
CONTEXT_DIR="$2"
|
||||
shift 2
|
||||
;;
|
||||
--output-dir)
|
||||
OUTPUT_DIR="$2"
|
||||
shift 2
|
||||
;;
|
||||
--dry-run)
|
||||
DRY_RUN=true
|
||||
shift
|
||||
;;
|
||||
--keep-image)
|
||||
KEEP_IMAGE=true
|
||||
shift
|
||||
;;
|
||||
--build-arg)
|
||||
EXTRA_BUILD_ARGS+=("--build-arg" "$2")
|
||||
shift 2
|
||||
;;
|
||||
-h|--help)
|
||||
usage
|
||||
;;
|
||||
*)
|
||||
log_error "Unknown option: $1"
|
||||
echo "Use -h or --help for usage information"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
done
|
||||
}
|
||||
|
||||
################################################################################
|
||||
# Validation
|
||||
################################################################################
|
||||
|
||||
validate_config() {
|
||||
log_step "Validating configuration"
|
||||
|
||||
# Check Docker is running
|
||||
check_docker_running || exit 1
|
||||
|
||||
# Validate CUDA version
|
||||
validate_cuda_version "$CUDA_VERSION" || exit 1
|
||||
|
||||
# Check Dockerfile exists
|
||||
if [ ! -f "$DOCKERFILE" ]; then
|
||||
log_error "Dockerfile not found: $DOCKERFILE"
|
||||
exit 1
|
||||
fi
|
||||
log_info "Using Dockerfile: $DOCKERFILE"
|
||||
|
||||
# Check context directory exists
|
||||
if [ ! -d "$CONTEXT_DIR" ]; then
|
||||
log_error "Context directory not found: $CONTEXT_DIR"
|
||||
exit 1
|
||||
fi
|
||||
log_info "Using context directory: $CONTEXT_DIR"
|
||||
|
||||
# Create output directory if it doesn't exist
|
||||
if [ ! -d "$OUTPUT_DIR" ]; then
|
||||
log_info "Creating output directory: $OUTPUT_DIR"
|
||||
mkdir -p "$OUTPUT_DIR"
|
||||
fi
|
||||
|
||||
# Check output directory is writable
|
||||
check_writable "$OUTPUT_DIR" || exit 1
|
||||
log_info "Output directory: $OUTPUT_DIR"
|
||||
|
||||
# Check disk space (recommend at least 20GB free)
|
||||
check_disk_space 20 "$OUTPUT_DIR" || {
|
||||
log_warning "Continuing despite low disk space warning..."
|
||||
}
|
||||
|
||||
# Validate functionality mode
|
||||
if [[ "$FUNCTIONALITY" != "sft" && "$FUNCTIONALITY" != "infer" ]]; then
|
||||
log_error "Invalid functionality mode: $FUNCTIONALITY"
|
||||
log_error "Must be 'sft' or 'infer'"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
log_success "Configuration validated"
|
||||
}
|
||||
|
||||
################################################################################
|
||||
# Build Docker Image
|
||||
################################################################################
|
||||
|
||||
build_image() {
|
||||
local temp_tag="ktransformers:temp-build-$(get_beijing_timestamp)"
|
||||
|
||||
log_step "Building Docker image" >&2
|
||||
log_info "Temporary tag: $temp_tag" >&2
|
||||
|
||||
# Prepare build arguments
|
||||
local build_args=()
|
||||
build_args+=("--build-arg" "CUDA_VERSION=$CUDA_VERSION")
|
||||
build_args+=("--build-arg" "UBUNTU_MIRROR=$UBUNTU_MIRROR")
|
||||
build_args+=("--build-arg" "CPU_VARIANT=$CPU_VARIANT")
|
||||
build_args+=("--build-arg" "BUILD_ALL_CPU_VARIANTS=1")
|
||||
|
||||
# Add proxy settings if provided
|
||||
if [ -n "$HTTP_PROXY" ]; then
|
||||
build_args+=("--build-arg" "HTTP_PROXY=$HTTP_PROXY")
|
||||
fi
|
||||
if [ -n "$HTTPS_PROXY" ]; then
|
||||
build_args+=("--build-arg" "HTTPS_PROXY=$HTTPS_PROXY")
|
||||
fi
|
||||
|
||||
# Add extra build args
|
||||
build_args+=("${EXTRA_BUILD_ARGS[@]}")
|
||||
|
||||
# Add network host
|
||||
build_args+=("--network" "host")
|
||||
|
||||
# Build command
|
||||
local build_cmd=(
|
||||
docker build
|
||||
-f "$DOCKERFILE"
|
||||
"${build_args[@]}"
|
||||
-t "$temp_tag"
|
||||
"$CONTEXT_DIR"
|
||||
)
|
||||
|
||||
# Display build command
|
||||
{
|
||||
log_info "Build command:"
|
||||
printf ' %s \\\n' "${build_cmd[@]:0:${#build_cmd[@]}-1}"
|
||||
printf ' %s\n' "${build_cmd[-1]}"
|
||||
} >&2
|
||||
|
||||
if [ "$DRY_RUN" = true ]; then
|
||||
log_warning "DRY RUN: Skipping actual build" >&2
|
||||
echo "$temp_tag"
|
||||
return 0
|
||||
fi
|
||||
|
||||
# Execute build
|
||||
log_info "Starting Docker build (this may take 30-60 minutes)..." >&2
|
||||
if "${build_cmd[@]}" >&2; then
|
||||
log_success "Docker image built successfully" >&2
|
||||
echo "$temp_tag"
|
||||
else
|
||||
log_error "Docker build failed" >&2
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
################################################################################
|
||||
# Extract Versions and Generate Name
|
||||
################################################################################
|
||||
|
||||
generate_tar_name() {
|
||||
local image_tag="$1"
|
||||
local timestamp="$2"
|
||||
|
||||
if [ "$DRY_RUN" = true ]; then
|
||||
log_warning "DRY RUN: Using placeholder versions"
|
||||
# Use placeholder versions for dry run
|
||||
local versions="SGLANG_VERSION=0.5.6
|
||||
KTRANSFORMERS_VERSION=0.4.3
|
||||
LLAMAFACTORY_VERSION=0.9.3"
|
||||
else
|
||||
# Extract versions from image
|
||||
local versions
|
||||
versions=$(extract_versions_from_image "$image_tag")
|
||||
|
||||
if [ $? -ne 0 ]; then
|
||||
log_error "Failed to extract versions from image"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Validate versions
|
||||
if ! validate_versions "$versions"; then
|
||||
log_error "Version validation failed"
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
# Generate standardized image name
|
||||
local tar_name
|
||||
tar_name=$(generate_image_name "$versions" "$CUDA_VERSION" "$CPU_VARIANT" "$FUNCTIONALITY" "$timestamp")
|
||||
|
||||
if [ -z "$tar_name" ]; then
|
||||
log_error "Failed to generate image name"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "$tar_name"
|
||||
}
|
||||
|
||||
################################################################################
|
||||
# Export to Tar
|
||||
################################################################################
|
||||
|
||||
export_to_tar() {
|
||||
local image_tag="$1"
|
||||
local tar_name="$2"
|
||||
local tar_path="$OUTPUT_DIR/${tar_name}.tar"
|
||||
|
||||
log_step "Exporting image to tar file" >&2
|
||||
log_info "Output: $tar_path" >&2
|
||||
|
||||
if [ "$DRY_RUN" = true ]; then
|
||||
log_warning "DRY RUN: Skipping actual export" >&2
|
||||
return 0
|
||||
fi
|
||||
|
||||
# Check if tar file already exists
|
||||
if [ -f "$tar_path" ]; then
|
||||
log_warning "Tar file already exists: $tar_path" >&2
|
||||
read -p "Overwrite? (y/N) " -n 1 -r
|
||||
echo
|
||||
if [[ ! $REPLY =~ ^[Yy]$ ]]; then
|
||||
log_error "Export cancelled by user" >&2
|
||||
exit 1
|
||||
fi
|
||||
rm -f "$tar_path"
|
||||
fi
|
||||
|
||||
# Tag image with the standardized name before saving
|
||||
log_info "Tagging image with standardized name: $tar_name" >&2
|
||||
if ! docker tag "$image_tag" "$tar_name"; then
|
||||
log_error "Failed to tag image" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Export image with the standardized tag
|
||||
log_info "Exporting image (this may take several minutes)..." >&2
|
||||
if docker save -o "$tar_path" "$tar_name"; then
|
||||
log_success "Image exported successfully" >&2
|
||||
|
||||
# Get file size
|
||||
local size
|
||||
size=$(du -h "$tar_path" | cut -f1)
|
||||
log_info "Tar file size: $size" >&2
|
||||
else
|
||||
log_error "Failed to export image" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "$tar_path"
|
||||
}
|
||||
|
||||
################################################################################
|
||||
# Cleanup
|
||||
################################################################################
|
||||
|
||||
cleanup() {
|
||||
local image_tag="$1"
|
||||
|
||||
if [ "$KEEP_IMAGE" = true ]; then
|
||||
log_info "Keeping Docker image as requested: $image_tag"
|
||||
else
|
||||
cleanup_temp_images "$image_tag"
|
||||
fi
|
||||
}
|
||||
|
||||
################################################################################
|
||||
# Main
|
||||
################################################################################
|
||||
|
||||
main() {
|
||||
log_step "KTransformers Docker Image Build and Export"
|
||||
|
||||
# Parse arguments
|
||||
parse_args "$@"
|
||||
|
||||
# Validate configuration
|
||||
validate_config
|
||||
|
||||
# Generate timestamp
|
||||
TIMESTAMP=$(get_beijing_timestamp)
|
||||
log_info "Build timestamp: $TIMESTAMP"
|
||||
|
||||
# Display configuration
|
||||
display_summary "Build Configuration" \
|
||||
"CUDA Version: $CUDA_VERSION" \
|
||||
"Ubuntu Mirror: $UBUNTU_MIRROR" \
|
||||
"CPU Variant: $CPU_VARIANT" \
|
||||
"Functionality: $FUNCTIONALITY" \
|
||||
"HTTP Proxy: ${HTTP_PROXY:-<not set>}" \
|
||||
"HTTPS Proxy: ${HTTPS_PROXY:-<not set>}" \
|
||||
"Dockerfile: $DOCKERFILE" \
|
||||
"Context Dir: $CONTEXT_DIR" \
|
||||
"Output Dir: $OUTPUT_DIR" \
|
||||
"Timestamp: $TIMESTAMP" \
|
||||
"Dry Run: $DRY_RUN"
|
||||
|
||||
# Build image
|
||||
TEMP_TAG=$(build_image)
|
||||
|
||||
# Generate tar name
|
||||
TAR_NAME=$(generate_tar_name "$TEMP_TAG" "$TIMESTAMP")
|
||||
log_info "Generated tar name: $TAR_NAME.tar"
|
||||
|
||||
if [ "$DRY_RUN" = true ]; then
|
||||
# Display dry-run summary
|
||||
display_summary "DRY RUN Preview" \
|
||||
"This is what would be built:" \
|
||||
"" \
|
||||
"Temporary Docker tag: $TEMP_TAG" \
|
||||
"Tar filename: $TAR_NAME.tar" \
|
||||
"Output path: $OUTPUT_DIR/$TAR_NAME.tar" \
|
||||
"" \
|
||||
"After build, you would run:" \
|
||||
" docker load -i $OUTPUT_DIR/$TAR_NAME.tar" \
|
||||
" docker run -it --rm ${TAR_NAME} /bin/bash"
|
||||
|
||||
log_success "DRY RUN: Preview complete. Remove --dry-run to build."
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Export to tar
|
||||
TAR_PATH=$(export_to_tar "$TEMP_TAG" "$TAR_NAME")
|
||||
|
||||
# Cleanup
|
||||
cleanup "$TEMP_TAG"
|
||||
|
||||
# Display summary
|
||||
display_summary "Build Complete" \
|
||||
"Docker Image: $TEMP_TAG ($([ "$KEEP_IMAGE" = true ] && echo "kept" || echo "removed"))" \
|
||||
"Tar File: $TAR_PATH" \
|
||||
"" \
|
||||
"To load the image:" \
|
||||
" docker load -i $TAR_PATH" \
|
||||
"" \
|
||||
"To run the container:" \
|
||||
" docker run -it --rm ${TAR_NAME} /bin/bash"
|
||||
|
||||
log_success "All done!"
|
||||
}
|
||||
|
||||
# Run main function
|
||||
main "$@"
|
||||
372
docker/docker-utils.sh
Executable file
372
docker/docker-utils.sh
Executable file
@@ -0,0 +1,372 @@
|
||||
#!/usr/bin/env bash
|
||||
#
|
||||
# docker-utils.sh - Shared utility functions for Docker image build and publish scripts
|
||||
#
|
||||
# This script provides common functions for:
|
||||
# - Timestamp generation (Beijing timezone)
|
||||
# - Version extraction from Docker images
|
||||
# - Image name generation following naming conventions
|
||||
# - Colored logging
|
||||
# - Validation and error handling
|
||||
#
|
||||
# Usage: source docker-utils.sh
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
# Color codes for logging
|
||||
COLOR_RED='\033[0;31m'
|
||||
COLOR_GREEN='\033[0;32m'
|
||||
COLOR_YELLOW='\033[1;33m'
|
||||
COLOR_BLUE='\033[0;34m'
|
||||
COLOR_CYAN='\033[0;36m'
|
||||
COLOR_RESET='\033[0m'
|
||||
|
||||
################################################################################
|
||||
# Logging Functions
|
||||
################################################################################
|
||||
|
||||
log_info() {
|
||||
echo -e "${COLOR_BLUE}[INFO]${COLOR_RESET} $*"
|
||||
}
|
||||
|
||||
log_success() {
|
||||
echo -e "${COLOR_GREEN}[SUCCESS]${COLOR_RESET} $*"
|
||||
}
|
||||
|
||||
log_warning() {
|
||||
echo -e "${COLOR_YELLOW}[WARNING]${COLOR_RESET} $*"
|
||||
}
|
||||
|
||||
log_error() {
|
||||
echo -e "${COLOR_RED}[ERROR]${COLOR_RESET} $*" >&2
|
||||
}
|
||||
|
||||
log_step() {
|
||||
echo -e "\n${COLOR_CYAN}==>${COLOR_RESET} $*"
|
||||
}
|
||||
|
||||
################################################################################
|
||||
# Timestamp Functions
|
||||
################################################################################
|
||||
|
||||
# Generate timestamp in Beijing timezone (UTC+8)
|
||||
# Format: YYYYMMDDHHMMSS
|
||||
# Example: 20241212143022
|
||||
get_beijing_timestamp() {
|
||||
# Try to use TZ environment variable approach
|
||||
if date --version &>/dev/null 2>&1; then
|
||||
# GNU date (Linux)
|
||||
TZ='Asia/Shanghai' date '+%Y%m%d%H%M%S'
|
||||
else
|
||||
# BSD date (macOS)
|
||||
TZ='Asia/Shanghai' date '+%Y%m%d%H%M%S'
|
||||
fi
|
||||
}
|
||||
|
||||
################################################################################
|
||||
# CUDA Version Parsing
|
||||
################################################################################
|
||||
|
||||
# Parse CUDA version to short format
|
||||
# Input: 12.8.1 or 12.8 or 13.0.1
|
||||
# Output: cu128 or cu130
|
||||
parse_cuda_short_version() {
|
||||
local cuda_version="$1"
|
||||
|
||||
# Extract major and minor version
|
||||
local major minor
|
||||
major=$(echo "$cuda_version" | cut -d. -f1)
|
||||
minor=$(echo "$cuda_version" | cut -d. -f2)
|
||||
|
||||
# Validate
|
||||
if [[ ! "$major" =~ ^[0-9]+$ ]] || [[ ! "$minor" =~ ^[0-9]+$ ]]; then
|
||||
log_error "Invalid CUDA version format: $cuda_version"
|
||||
log_error "Expected format: X.Y.Z (e.g., 12.8.1)"
|
||||
return 1
|
||||
fi
|
||||
|
||||
echo "cu${major}${minor}"
|
||||
}
|
||||
|
||||
################################################################################
|
||||
# Version Extraction
|
||||
################################################################################
|
||||
|
||||
# Extract versions from built Docker image
|
||||
# Input: image tag (e.g., ktransformers:temp-build-20241212)
|
||||
# Output: Sets environment variables or prints to stdout
|
||||
# SGLANG_VERSION=x.y.z
|
||||
# KTRANSFORMERS_VERSION=x.y.z
|
||||
# LLAMAFACTORY_VERSION=x.y.z
|
||||
extract_versions_from_image() {
|
||||
local image_tag="$1"
|
||||
|
||||
log_step "Extracting versions from image: $image_tag"
|
||||
|
||||
# Check if image exists
|
||||
if ! docker image inspect "$image_tag" &>/dev/null; then
|
||||
log_error "Image not found: $image_tag"
|
||||
return 1
|
||||
fi
|
||||
|
||||
# Extract versions.env file from the image
|
||||
local versions_content
|
||||
versions_content=$(docker run --rm "$image_tag" cat /workspace/versions.env 2>/dev/null)
|
||||
|
||||
if [ -z "$versions_content" ]; then
|
||||
log_error "Failed to extract versions from image"
|
||||
log_error "The /workspace/versions.env file may not exist in the image"
|
||||
return 1
|
||||
fi
|
||||
|
||||
# Parse and display versions
|
||||
log_info "Extracted versions:"
|
||||
echo "$versions_content" | while IFS= read -r line; do
|
||||
log_info " $line"
|
||||
done
|
||||
|
||||
# Output the content (caller can parse this or eval it)
|
||||
echo "$versions_content"
|
||||
}
|
||||
|
||||
# Validate that all required versions were extracted
|
||||
# Input: versions string (output from extract_versions_from_image)
|
||||
validate_versions() {
|
||||
local versions="$1"
|
||||
local all_valid=true
|
||||
|
||||
# Check each required version
|
||||
for var in SGLANG_VERSION KTRANSFORMERS_VERSION LLAMAFACTORY_VERSION; do
|
||||
local value
|
||||
value=$(echo "$versions" | grep "^${var}=" | cut -d= -f2)
|
||||
|
||||
if [ -z "$value" ]; then
|
||||
log_error "Missing version: $var"
|
||||
all_valid=false
|
||||
elif [ "$value" = "unknown" ]; then
|
||||
log_warning "Version is 'unknown': $var"
|
||||
# Don't fail, but warn user
|
||||
fi
|
||||
done
|
||||
|
||||
if [ "$all_valid" = false ]; then
|
||||
return 1
|
||||
fi
|
||||
|
||||
return 0
|
||||
}
|
||||
|
||||
################################################################################
|
||||
# Image Naming
|
||||
################################################################################
|
||||
|
||||
# Generate standardized image name
|
||||
# Input:
|
||||
# $1: versions string (from extract_versions_from_image)
|
||||
# $2: cuda_version (e.g., 12.8.1)
|
||||
# $3: cpu_variant (e.g., x86-intel-multi)
|
||||
# $4: functionality (e.g., sft_llamafactory or infer)
|
||||
# $5: timestamp (optional, will generate if not provided)
|
||||
# Output: Standardized image name
|
||||
# Format: sglang-v{ver}_ktransformers-v{ver}_{cpu}_{gpu}_{func}_{timestamp}
|
||||
generate_image_name() {
|
||||
local versions="$1"
|
||||
local cuda_version="$2"
|
||||
local cpu_variant="$3"
|
||||
local functionality="$4"
|
||||
local timestamp="${5:-$(get_beijing_timestamp)}"
|
||||
|
||||
# Parse versions from the versions string
|
||||
local sglang_ver ktrans_ver llama_ver
|
||||
sglang_ver=$(echo "$versions" | grep "^SGLANG_VERSION=" | cut -d= -f2)
|
||||
ktrans_ver=$(echo "$versions" | grep "^KTRANSFORMERS_VERSION=" | cut -d= -f2)
|
||||
llama_ver=$(echo "$versions" | grep "^LLAMAFACTORY_VERSION=" | cut -d= -f2)
|
||||
|
||||
# Validate versions were extracted
|
||||
if [ -z "$sglang_ver" ] || [ -z "$ktrans_ver" ] || [ -z "$llama_ver" ]; then
|
||||
log_error "Failed to parse versions from input"
|
||||
return 1
|
||||
fi
|
||||
|
||||
# Parse CUDA short version
|
||||
local cuda_short
|
||||
cuda_short=$(parse_cuda_short_version "$cuda_version")
|
||||
|
||||
# Build functionality string
|
||||
local func_str
|
||||
if [ "$functionality" = "sft" ]; then
|
||||
func_str="sft_llamafactory-v${llama_ver}"
|
||||
else
|
||||
func_str="infer"
|
||||
fi
|
||||
|
||||
# Generate full image name
|
||||
# Format: sglang-v{ver}_ktransformers-v{ver}_{cpu}_{gpu}_{func}_{timestamp}
|
||||
local image_name
|
||||
image_name="sglang-v${sglang_ver}_ktransformers-v${ktrans_ver}_${cpu_variant}_${cuda_short}_${func_str}_${timestamp}"
|
||||
|
||||
echo "$image_name"
|
||||
}
|
||||
|
||||
# Generate simplified tag for DockerHub
|
||||
# Input:
|
||||
# $1: ktransformers_version (e.g., 0.4.3)
|
||||
# $2: cuda_version (e.g., 12.8.1)
|
||||
# Output: Simplified tag (e.g., v0.4.3-cu128)
|
||||
generate_simplified_tag() {
|
||||
local ktrans_ver="$1"
|
||||
local cuda_version="$2"
|
||||
|
||||
local cuda_short
|
||||
cuda_short=$(parse_cuda_short_version "$cuda_version")
|
||||
|
||||
echo "v${ktrans_ver}-${cuda_short}"
|
||||
}
|
||||
|
||||
################################################################################
|
||||
# Validation Functions
|
||||
################################################################################
|
||||
|
||||
# Check if Docker daemon is running
|
||||
check_docker_running() {
|
||||
if ! docker info &>/dev/null; then
|
||||
log_error "Docker daemon is not running"
|
||||
log_error "Please start Docker and try again"
|
||||
return 1
|
||||
fi
|
||||
return 0
|
||||
}
|
||||
|
||||
# Check if user is logged into Docker registry
|
||||
# Input: registry (optional, default: docker.io)
|
||||
check_docker_login() {
|
||||
local registry="${1:-docker.io}"
|
||||
|
||||
# Try to check auth by attempting a trivial operation
|
||||
if ! docker login --help &>/dev/null; then
|
||||
log_error "Docker CLI is not available"
|
||||
return 1
|
||||
fi
|
||||
|
||||
# Note: This is a best-effort check
|
||||
# docker login status is not always easy to check programmatically
|
||||
log_info "Assuming Docker login is configured"
|
||||
log_info "If push fails, please run: docker login $registry"
|
||||
|
||||
return 0
|
||||
}
|
||||
|
||||
# Validate CUDA version format
|
||||
validate_cuda_version() {
|
||||
local cuda_version="$1"
|
||||
|
||||
if [[ ! "$cuda_version" =~ ^[0-9]+\.[0-9]+(\.[0-9]+)?$ ]]; then
|
||||
log_error "Invalid CUDA version format: $cuda_version"
|
||||
log_error "Expected format: X.Y or X.Y.Z (e.g., 12.8 or 12.8.1)"
|
||||
return 1
|
||||
fi
|
||||
|
||||
return 0
|
||||
}
|
||||
|
||||
# Check available disk space
|
||||
# Input: required space in GB
|
||||
check_disk_space() {
|
||||
local required_gb="$1"
|
||||
local output_dir="${2:-.}"
|
||||
|
||||
# Get available space in GB (works on Linux and macOS)
|
||||
local available_kb
|
||||
if df -k "$output_dir" &>/dev/null; then
|
||||
available_kb=$(df -k "$output_dir" | tail -1 | awk '{print $4}')
|
||||
local available_gb=$((available_kb / 1024 / 1024))
|
||||
|
||||
log_info "Available disk space: ${available_gb}GB"
|
||||
|
||||
if [ "$available_gb" -lt "$required_gb" ]; then
|
||||
log_warning "Low disk space: ${available_gb}GB available, ${required_gb}GB recommended"
|
||||
return 1
|
||||
fi
|
||||
else
|
||||
log_warning "Unable to check disk space"
|
||||
fi
|
||||
|
||||
return 0
|
||||
}
|
||||
|
||||
# Check if file/directory exists and is writable
|
||||
check_writable() {
|
||||
local path="$1"
|
||||
|
||||
if [ -e "$path" ]; then
|
||||
if [ ! -w "$path" ]; then
|
||||
log_error "Path exists but is not writable: $path"
|
||||
return 1
|
||||
fi
|
||||
else
|
||||
# Try to create parent directory to test writability
|
||||
local parent_dir
|
||||
parent_dir=$(dirname "$path")
|
||||
if [ ! -w "$parent_dir" ]; then
|
||||
log_error "Parent directory is not writable: $parent_dir"
|
||||
return 1
|
||||
fi
|
||||
fi
|
||||
|
||||
return 0
|
||||
}
|
||||
|
||||
################################################################################
|
||||
# Cleanup Functions
|
||||
################################################################################
|
||||
|
||||
# Remove intermediate Docker images
|
||||
cleanup_temp_images() {
|
||||
local image_tag="$1"
|
||||
|
||||
log_step "Cleaning up temporary image: $image_tag"
|
||||
|
||||
if docker image inspect "$image_tag" &>/dev/null; then
|
||||
docker rmi "$image_tag" &>/dev/null || true
|
||||
log_success "Cleaned up temporary image"
|
||||
fi
|
||||
}
|
||||
|
||||
################################################################################
|
||||
# Display Functions
|
||||
################################################################################
|
||||
|
||||
# Display a summary box
|
||||
display_summary() {
|
||||
local title="$1"
|
||||
shift
|
||||
local lines=("$@")
|
||||
|
||||
local width=80
|
||||
local border=$(printf '=%.0s' $(seq 1 $width))
|
||||
|
||||
echo ""
|
||||
echo "$border"
|
||||
echo " $title"
|
||||
echo "$border"
|
||||
for line in "${lines[@]}"; do
|
||||
echo " $line"
|
||||
done
|
||||
echo "$border"
|
||||
echo ""
|
||||
}
|
||||
|
||||
################################################################################
|
||||
# Export functions
|
||||
################################################################################
|
||||
|
||||
# Export all functions so they can be used by scripts that source this file
|
||||
export -f log_info log_success log_warning log_error log_step
|
||||
export -f get_beijing_timestamp
|
||||
export -f parse_cuda_short_version
|
||||
export -f extract_versions_from_image validate_versions
|
||||
export -f generate_image_name generate_simplified_tag
|
||||
export -f check_docker_running check_docker_login validate_cuda_version
|
||||
export -f check_disk_space check_writable
|
||||
export -f cleanup_temp_images
|
||||
export -f display_summary
|
||||
1142
docker/push-to-dockerhub.sh
Executable file
1142
docker/push-to-dockerhub.sh
Executable file
File diff suppressed because it is too large
Load Diff
@@ -28,7 +28,7 @@ option(KTRANSFORMERS_CPU_MOE_AMD "ktransformers: CPU use moe kernel for amd" OFF
|
||||
# LTO control
|
||||
option(CPUINFER_ENABLE_LTO "Enable link time optimization (IPO)" OFF)
|
||||
|
||||
project(kt_kernel_ext VERSION 0.1.0)
|
||||
project(kt_kernel_ext VERSION 0.4.2)
|
||||
# Choose compilers BEFORE project() so CMake honors them
|
||||
if(USE_CONDA_TOOLCHAIN)
|
||||
if(NOT DEFINED ENV{CONDA_PREFIX} OR NOT EXISTS "$ENV{CONDA_PREFIX}")
|
||||
|
||||
37
kt-kernel/MANIFEST.in
Normal file
37
kt-kernel/MANIFEST.in
Normal file
@@ -0,0 +1,37 @@
|
||||
# MANIFEST.in for kt-kernel
|
||||
# Ensures source distribution includes all necessary files for building from source
|
||||
|
||||
# Core build files
|
||||
include CMakeLists.txt
|
||||
include CMakePresets.json
|
||||
include setup.py
|
||||
include pyproject.toml
|
||||
include requirements.txt
|
||||
include README.md
|
||||
include LICENSE
|
||||
|
||||
# CMake modules and configuration
|
||||
recursive-include cmake *.cmake *.in
|
||||
|
||||
# C++ source files
|
||||
recursive-include cpu_backend *.h *.hpp *.cpp *.c *.cc
|
||||
recursive-include operators *.h *.hpp *.cpp *.c *.cc
|
||||
include ext_bindings.cpp
|
||||
|
||||
# Python package
|
||||
recursive-include python *.py
|
||||
|
||||
# Third-party dependencies (vendored)
|
||||
recursive-include third_party *
|
||||
|
||||
# Exclude compiled and cache files
|
||||
global-exclude *.pyc
|
||||
global-exclude *.pyo
|
||||
global-exclude __pycache__
|
||||
global-exclude .git*
|
||||
global-exclude *.so
|
||||
global-exclude *.o
|
||||
global-exclude *.a
|
||||
global-exclude build
|
||||
global-exclude dist
|
||||
global-exclude *.egg-info
|
||||
@@ -47,14 +47,75 @@ High-performance kernel operations for KTransformers, featuring CPU-optimized Mo
|
||||
|
||||
## Installation
|
||||
|
||||
### Prerequisites
|
||||
### Option 1: Install from PyPI (Recommended for Most Users)
|
||||
|
||||
Choose the version matching your CUDA installation:
|
||||
|
||||
```bash
|
||||
# For CUDA 11.8
|
||||
pip install kt-kernel==0.4.2.cu118
|
||||
|
||||
# For CUDA 12.1
|
||||
pip install kt-kernel==0.4.2.cu121
|
||||
|
||||
# For CUDA 12.4
|
||||
pip install kt-kernel==0.4.2.cu124
|
||||
|
||||
# For CUDA 12.6
|
||||
pip install kt-kernel==0.4.2.cu126
|
||||
```
|
||||
|
||||
> **Note**: Replace `0.4.2` with the [latest version](https://pypi.org/project/kt-kernel/#history) if available.
|
||||
|
||||
**Features:**
|
||||
- ✅ **Automatic CPU detection**: Detects your CPU and loads the optimal kernel variant
|
||||
- ✅ **Multi-variant wheel**: Includes AMX, AVX512, and AVX2 variants in a single package
|
||||
- ✅ **No compilation needed**: Pre-built wheels for Python 3.10, 3.11, 3.12
|
||||
- ✅ **Multiple CUDA versions**: Choose the version matching your environment
|
||||
|
||||
**Requirements:**
|
||||
- CUDA 11.8+ or 12.x runtime (must match the package version you install)
|
||||
- PyTorch 2.0+ (install separately, must match CUDA version)
|
||||
- Linux x86-64
|
||||
|
||||
**CPU Variants Included:**
|
||||
| Variant | CPU Support | Use Case |
|
||||
|---------|-------------|----------|
|
||||
| **AMX** | Intel Sapphire Rapids+ | Best performance on latest Intel CPUs |
|
||||
| **AVX512** | Intel Skylake-X/Ice Lake/Cascade Lake | AVX512-capable CPUs without AMX |
|
||||
| **AVX2** | Intel Haswell+, AMD Zen+ | Maximum compatibility |
|
||||
|
||||
**Check which variant is loaded:**
|
||||
```python
|
||||
import kt_kernel
|
||||
print(f"CPU variant: {kt_kernel.__cpu_variant__}") # 'amx', 'avx512', or 'avx2'
|
||||
print(f"Version: {kt_kernel.__version__}")
|
||||
```
|
||||
|
||||
**Environment Variables:**
|
||||
```bash
|
||||
# Override automatic CPU detection
|
||||
export KT_KERNEL_CPU_VARIANT=avx2 # or 'avx512', 'amx'
|
||||
|
||||
# Enable debug output
|
||||
export KT_KERNEL_DEBUG=1
|
||||
python -c "import kt_kernel"
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Option 2: Install from Source (For AMD, ARM, or Custom Builds)
|
||||
|
||||
If you need AMD (BLIS), ARM (KML), or custom CUDA versions, build from source:
|
||||
|
||||
#### Prerequisites
|
||||
|
||||
First, initialize git submodules:
|
||||
```bash
|
||||
git submodule update --init --recursive
|
||||
```
|
||||
|
||||
### Quick Installation (Recommended)
|
||||
#### Quick Installation
|
||||
|
||||
Step 0: Create and activate a conda environment (recommended):
|
||||
|
||||
@@ -65,7 +126,7 @@ conda activate kt-kernel
|
||||
|
||||
You can now install in two clear steps using the same script.
|
||||
|
||||
Option A: Two-step (specify dependencies installation and build separately)
|
||||
**Option A: Two-step** (specify dependencies installation and build separately)
|
||||
|
||||
```bash
|
||||
# 1) Install system prerequisites (cmake, hwloc, pkg-config)
|
||||
@@ -76,7 +137,7 @@ Option A: Two-step (specify dependencies installation and build separately)
|
||||
./install.sh build
|
||||
```
|
||||
|
||||
Option B: One-step
|
||||
**Option B: One-step**
|
||||
|
||||
```bash
|
||||
./install.sh
|
||||
|
||||
@@ -161,6 +161,34 @@ build_step() {
|
||||
echo "Skipping clean of $REPO_ROOT/build (requested by --no-clean)"
|
||||
fi
|
||||
|
||||
# Check for multi-variant build mode (Docker environment)
|
||||
if [ "${CPUINFER_BUILD_ALL_VARIANTS:-0}" = "1" ]; then
|
||||
echo "=========================================="
|
||||
echo "Building ALL CPU variants (AMX/AVX512/AVX2)"
|
||||
echo "=========================================="
|
||||
echo ""
|
||||
echo "This will build three variants in a single wheel:"
|
||||
echo " - AMX variant (Intel Sapphire Rapids+)"
|
||||
echo " - AVX512 variant (Intel Skylake-X/Ice Lake+)"
|
||||
echo " - AVX2 variant (maximum compatibility)"
|
||||
echo ""
|
||||
echo "Runtime CPU detection will automatically select the best variant."
|
||||
echo ""
|
||||
|
||||
export CPUINFER_FORCE_REBUILD=1
|
||||
export CPUINFER_BUILD_TYPE=${CPUINFER_BUILD_TYPE:-Release}
|
||||
export CPUINFER_PARALLEL=${CPUINFER_PARALLEL:-8}
|
||||
|
||||
echo "Building with:"
|
||||
echo " CPUINFER_BUILD_ALL_VARIANTS=1"
|
||||
echo " CPUINFER_BUILD_TYPE=$CPUINFER_BUILD_TYPE"
|
||||
echo " CPUINFER_PARALLEL=$CPUINFER_PARALLEL"
|
||||
echo ""
|
||||
|
||||
pip install . -v
|
||||
return 0
|
||||
fi
|
||||
|
||||
if [ "$MANUAL_MODE" = "0" ]; then
|
||||
# Auto-detection mode
|
||||
echo "=========================================="
|
||||
|
||||
@@ -5,7 +5,8 @@ build-backend = "setuptools.build_meta"
|
||||
|
||||
[project]
|
||||
name = "kt-kernel"
|
||||
version = "0.1.0"
|
||||
# Version is dynamically read from ../version.py via setup.py
|
||||
dynamic = ["version"]
|
||||
description = "KT-Kernel: High-performance kernel operations for KTransformers (AMX/AVX/KML optimizations)"
|
||||
readme = "README.md"
|
||||
authors = [{ name = "kvcache-ai" }]
|
||||
|
||||
@@ -5,6 +5,9 @@
|
||||
KT-Kernel provides high-performance kernel operations for KTransformers,
|
||||
including CPU-optimized MoE inference with AMX, AVX, and KML support.
|
||||
|
||||
The package automatically detects your CPU capabilities and loads the optimal
|
||||
kernel variant (AMX, AVX512, or AVX2) at runtime.
|
||||
|
||||
Example usage:
|
||||
>>> from kt_kernel import KTMoEWrapper
|
||||
>>> wrapper = KTMoEWrapper(
|
||||
@@ -20,11 +23,41 @@ Example usage:
|
||||
... chunked_prefill_size=512,
|
||||
... method="AMXINT4"
|
||||
... )
|
||||
|
||||
Check which CPU variant is loaded:
|
||||
>>> import kt_kernel
|
||||
>>> print(kt_kernel.__cpu_variant__) # 'amx', 'avx512', or 'avx2'
|
||||
|
||||
Environment Variables:
|
||||
KT_KERNEL_CPU_VARIANT: Override automatic detection ('amx', 'avx512', 'avx2')
|
||||
KT_KERNEL_DEBUG: Enable debug output ('1' to enable)
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
# Detect CPU and load optimal extension variant
|
||||
from ._cpu_detect import initialize as _initialize_cpu
|
||||
_kt_kernel_ext, __cpu_variant__ = _initialize_cpu()
|
||||
|
||||
# Make the extension module available to other modules in this package
|
||||
import sys
|
||||
sys.modules['kt_kernel_ext'] = _kt_kernel_ext
|
||||
|
||||
# Also expose kt_kernel_ext as an attribute for backward compatibility
|
||||
kt_kernel_ext = _kt_kernel_ext
|
||||
|
||||
# Import main API
|
||||
from .experts import KTMoEWrapper
|
||||
|
||||
__version__ = "0.1.0"
|
||||
__all__ = ["KTMoEWrapper"]
|
||||
# Read version from project root version.py
|
||||
import os
|
||||
_root_version_file = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(__file__))), 'version.py')
|
||||
if os.path.exists(_root_version_file):
|
||||
_version_ns = {}
|
||||
with open(_root_version_file, 'r', encoding='utf-8') as f:
|
||||
exec(f.read(), _version_ns)
|
||||
__version__ = _version_ns.get('__version__', '0.4.2')
|
||||
else:
|
||||
__version__ = "0.4.2"
|
||||
|
||||
__all__ = ["KTMoEWrapper", "kt_kernel_ext", "__cpu_variant__", "__version__"]
|
||||
|
||||
233
kt-kernel/python/_cpu_detect.py
Normal file
233
kt-kernel/python/_cpu_detect.py
Normal file
@@ -0,0 +1,233 @@
|
||||
"""
|
||||
CPU feature detection and optimal kernel loader for kt-kernel.
|
||||
|
||||
This module automatically detects CPU capabilities and loads the best available
|
||||
kernel variant (AMX, AVX512, or AVX2) at runtime.
|
||||
|
||||
Environment Variables:
|
||||
KT_KERNEL_CPU_VARIANT: Override automatic detection ('amx', 'avx512', 'avx2')
|
||||
KT_KERNEL_DEBUG: Enable debug output ('1' to enable)
|
||||
|
||||
Example:
|
||||
>>> import kt_kernel
|
||||
>>> print(kt_kernel.__cpu_variant__) # Shows detected variant
|
||||
|
||||
# Override detection
|
||||
>>> import os
|
||||
>>> os.environ['KT_KERNEL_CPU_VARIANT'] = 'avx2'
|
||||
>>> import kt_kernel # Will use AVX2 variant
|
||||
"""
|
||||
import os
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def detect_cpu_features():
|
||||
"""
|
||||
Detect CPU features to determine the best kernel variant.
|
||||
|
||||
Detection hierarchy:
|
||||
1. AMX: Intel Sapphire Rapids+ with AMX support
|
||||
2. AVX512: CPUs with AVX512F support
|
||||
3. AVX2: Fallback for maximum compatibility
|
||||
|
||||
Returns:
|
||||
str: 'amx', 'avx512', or 'avx2'
|
||||
"""
|
||||
# Check environment override
|
||||
variant = os.environ.get('KT_KERNEL_CPU_VARIANT', '').lower()
|
||||
if variant in ['amx', 'avx512', 'avx2']:
|
||||
if os.environ.get('KT_KERNEL_DEBUG') == '1':
|
||||
print(f"[kt-kernel] Using environment override: {variant}")
|
||||
return variant
|
||||
|
||||
# Try to read /proc/cpuinfo on Linux
|
||||
try:
|
||||
with open('/proc/cpuinfo', 'r') as f:
|
||||
cpuinfo = f.read().lower()
|
||||
|
||||
# Check for AMX support (Intel Sapphire Rapids+)
|
||||
# AMX requires amx_tile, amx_int8, and amx_bf16
|
||||
amx_flags = ['amx_tile', 'amx_int8', 'amx_bf16']
|
||||
has_amx = all(flag in cpuinfo for flag in amx_flags)
|
||||
|
||||
if has_amx:
|
||||
if os.environ.get('KT_KERNEL_DEBUG') == '1':
|
||||
print("[kt-kernel] Detected AMX support via /proc/cpuinfo")
|
||||
return 'amx'
|
||||
|
||||
# Check for AVX512 support
|
||||
# AVX512F is the foundation for all AVX512 variants
|
||||
if 'avx512f' in cpuinfo:
|
||||
if os.environ.get('KT_KERNEL_DEBUG') == '1':
|
||||
print("[kt-kernel] Detected AVX512 support via /proc/cpuinfo")
|
||||
return 'avx512'
|
||||
|
||||
# Check for AVX2 support
|
||||
if 'avx2' in cpuinfo:
|
||||
if os.environ.get('KT_KERNEL_DEBUG') == '1':
|
||||
print("[kt-kernel] Detected AVX2 support via /proc/cpuinfo")
|
||||
return 'avx2'
|
||||
|
||||
# Fallback to AVX2 (should be rare on modern CPUs)
|
||||
if os.environ.get('KT_KERNEL_DEBUG') == '1':
|
||||
print("[kt-kernel] No AVX2/AVX512/AMX detected, using AVX2 fallback")
|
||||
return 'avx2'
|
||||
|
||||
except FileNotFoundError:
|
||||
# /proc/cpuinfo doesn't exist (not Linux or in container)
|
||||
# Try cpufeature package as fallback
|
||||
if os.environ.get('KT_KERNEL_DEBUG') == '1':
|
||||
print("[kt-kernel] /proc/cpuinfo not found, trying cpufeature package")
|
||||
|
||||
try:
|
||||
import cpufeature
|
||||
|
||||
# Check for AMX
|
||||
if cpufeature.CPUFeature.get('AMX_TILE', False):
|
||||
if os.environ.get('KT_KERNEL_DEBUG') == '1':
|
||||
print("[kt-kernel] Detected AMX support via cpufeature")
|
||||
return 'amx'
|
||||
|
||||
# Check for AVX512
|
||||
if cpufeature.CPUFeature.get('AVX512F', False):
|
||||
if os.environ.get('KT_KERNEL_DEBUG') == '1':
|
||||
print("[kt-kernel] Detected AVX512 support via cpufeature")
|
||||
return 'avx512'
|
||||
|
||||
# Fallback to AVX2
|
||||
if os.environ.get('KT_KERNEL_DEBUG') == '1':
|
||||
print("[kt-kernel] Using AVX2 fallback via cpufeature")
|
||||
return 'avx2'
|
||||
|
||||
except ImportError:
|
||||
# cpufeature not available - ultimate fallback
|
||||
if os.environ.get('KT_KERNEL_DEBUG') == '1':
|
||||
print("[kt-kernel] cpufeature not available, using AVX2 fallback")
|
||||
return 'avx2'
|
||||
|
||||
except Exception as e:
|
||||
# Any other error - safe fallback
|
||||
if os.environ.get('KT_KERNEL_DEBUG') == '1':
|
||||
print(f"[kt-kernel] Error during CPU detection: {e}, using AVX2 fallback")
|
||||
return 'avx2'
|
||||
|
||||
|
||||
def load_extension(variant):
|
||||
"""
|
||||
Load the appropriate kt_kernel_ext variant.
|
||||
|
||||
Tries to import the specified variant, with automatic fallback to
|
||||
lower-performance variants if the requested one is not available.
|
||||
|
||||
Supports both multi-variant builds (_kt_kernel_ext_amx.*.so) and
|
||||
single-variant builds (kt_kernel_ext.*.so).
|
||||
|
||||
Fallback order: amx -> avx512 -> avx2 -> single-variant
|
||||
|
||||
Args:
|
||||
variant (str): 'amx', 'avx512', or 'avx2'
|
||||
|
||||
Returns:
|
||||
module: The loaded extension module
|
||||
|
||||
Raises:
|
||||
ImportError: If all variants fail to load
|
||||
"""
|
||||
import importlib.util
|
||||
import glob
|
||||
|
||||
# The .so files can be named in two ways:
|
||||
# Multi-variant: _kt_kernel_ext_amx.cpython-311-x86_64-linux-gnu.so
|
||||
# Single-variant: kt_kernel_ext.cpython-311-x86_64-linux-gnu.so
|
||||
# Both export PyInit_kt_kernel_ext (the original module name)
|
||||
|
||||
try:
|
||||
# Find the kt_kernel package directory
|
||||
# We can't import kt_kernel here (circular import), so use __file__
|
||||
kt_kernel_dir = os.path.dirname(os.path.abspath(__file__))
|
||||
|
||||
# Try multi-variant naming first
|
||||
pattern = os.path.join(kt_kernel_dir, f'_kt_kernel_ext_{variant}.*.so')
|
||||
so_files = glob.glob(pattern)
|
||||
|
||||
if not so_files:
|
||||
# Try single-variant naming (fallback for builds without CPUINFER_BUILD_ALL_VARIANTS)
|
||||
pattern = os.path.join(kt_kernel_dir, 'kt_kernel_ext.*.so')
|
||||
so_files = glob.glob(pattern)
|
||||
|
||||
if so_files:
|
||||
if os.environ.get('KT_KERNEL_DEBUG') == '1':
|
||||
print(f"[kt-kernel] Multi-variant {variant} not found, using single-variant build")
|
||||
else:
|
||||
raise ImportError(f"No .so file found for variant {variant} (tried patterns: {kt_kernel_dir}/_kt_kernel_ext_{variant}.*.so and {kt_kernel_dir}/kt_kernel_ext.*.so)")
|
||||
|
||||
so_file = so_files[0]
|
||||
|
||||
if os.environ.get('KT_KERNEL_DEBUG') == '1':
|
||||
print(f"[kt-kernel] Loading {variant} from: {so_file}")
|
||||
|
||||
# Load the module manually
|
||||
# The module exports PyInit_kt_kernel_ext, so we use that as the module name
|
||||
spec = importlib.util.spec_from_file_location('kt_kernel_ext', so_file)
|
||||
if spec is None or spec.loader is None:
|
||||
raise ImportError(f"Failed to create spec for {so_file}")
|
||||
|
||||
ext = importlib.util.module_from_spec(spec)
|
||||
spec.loader.exec_module(ext)
|
||||
|
||||
if os.environ.get('KT_KERNEL_DEBUG') == '1':
|
||||
print(f"[kt-kernel] Successfully loaded {variant.upper()} variant")
|
||||
return ext
|
||||
|
||||
except (ImportError, ModuleNotFoundError, FileNotFoundError) as e:
|
||||
if os.environ.get('KT_KERNEL_DEBUG') == '1':
|
||||
print(f"[kt-kernel] Failed to load {variant} variant: {e}")
|
||||
|
||||
# Automatic fallback to next best variant
|
||||
if variant == 'amx':
|
||||
if os.environ.get('KT_KERNEL_DEBUG') == '1':
|
||||
print("[kt-kernel] Falling back from AMX to AVX512")
|
||||
return load_extension('avx512')
|
||||
elif variant == 'avx512':
|
||||
if os.environ.get('KT_KERNEL_DEBUG') == '1':
|
||||
print("[kt-kernel] Falling back from AVX512 to AVX2")
|
||||
return load_extension('avx2')
|
||||
else:
|
||||
# AVX2 is the last fallback - if this fails, we can't continue
|
||||
raise ImportError(
|
||||
f"Failed to load kt_kernel extension (variant: {variant}). "
|
||||
f"Original error: {e}\n"
|
||||
f"This usually means the kt_kernel package is not properly installed."
|
||||
)
|
||||
|
||||
|
||||
def initialize():
|
||||
"""
|
||||
Detect CPU capabilities and load the optimal extension variant.
|
||||
|
||||
This is the main entry point called by kt_kernel.__init__.py.
|
||||
|
||||
Returns:
|
||||
tuple: (extension_module, variant_name)
|
||||
- extension_module: The loaded C++ extension module
|
||||
- variant_name: String indicating which variant was loaded ('amx', 'avx512', 'avx2')
|
||||
|
||||
Example:
|
||||
>>> ext, variant = initialize()
|
||||
>>> print(f"Loaded {variant} variant")
|
||||
>>> wrapper = ext.AMXMoEWrapper(...)
|
||||
"""
|
||||
# Detect CPU features
|
||||
variant = detect_cpu_features()
|
||||
|
||||
if os.environ.get('KT_KERNEL_DEBUG') == '1':
|
||||
print(f"[kt-kernel] Selected CPU variant: {variant}")
|
||||
|
||||
# Load the appropriate extension
|
||||
ext = load_extension(variant)
|
||||
|
||||
if os.environ.get('KT_KERNEL_DEBUG') == '1':
|
||||
print(f"[kt-kernel] Extension module loaded: {ext.__name__}")
|
||||
|
||||
return ext, variant
|
||||
@@ -229,6 +229,133 @@ class CMakeBuild(build_ext):
|
||||
return info
|
||||
|
||||
def build_extension(self, ext: CMakeExtension):
|
||||
"""
|
||||
Main entry point for building the extension.
|
||||
|
||||
Checks if multi-variant build is requested (CPUINFER_BUILD_ALL_VARIANTS=1)
|
||||
and routes to the appropriate build method.
|
||||
"""
|
||||
if _env_get_bool("CPUINFER_BUILD_ALL_VARIANTS", False):
|
||||
# Build all 3 variants (AMX, AVX512, AVX2)
|
||||
self.build_multi_variants(ext)
|
||||
else:
|
||||
# Build single variant (original behavior)
|
||||
self._build_single_variant(ext)
|
||||
|
||||
def build_multi_variants(self, ext: CMakeExtension):
|
||||
"""
|
||||
Build all 3 CPU variants (AMX, AVX512, AVX2) in a single wheel.
|
||||
|
||||
This method is called when CPUINFER_BUILD_ALL_VARIANTS=1 is set.
|
||||
It builds three separate extensions with different CPU instruction sets
|
||||
and renames the output .so files with variant suffixes.
|
||||
"""
|
||||
print("=" * 80)
|
||||
print("Building kt-kernel with ALL CPU variants (AMX, AVX512, AVX2)")
|
||||
print("=" * 80)
|
||||
|
||||
# Define the 3 variants to build
|
||||
variants = [
|
||||
{
|
||||
'name': 'amx',
|
||||
'env': {
|
||||
'CPUINFER_CPU_INSTRUCT': 'NATIVE',
|
||||
'CPUINFER_ENABLE_AMX': 'ON',
|
||||
},
|
||||
'description': 'AMX variant (Intel Sapphire Rapids+)'
|
||||
},
|
||||
{
|
||||
'name': 'avx512',
|
||||
'env': {
|
||||
'CPUINFER_CPU_INSTRUCT': 'AVX512',
|
||||
'CPUINFER_ENABLE_AMX': 'OFF',
|
||||
},
|
||||
'description': 'AVX512 variant (Intel Skylake-X/Ice Lake/Cascade Lake)'
|
||||
},
|
||||
{
|
||||
'name': 'avx2',
|
||||
'env': {
|
||||
'CPUINFER_CPU_INSTRUCT': 'AVX2',
|
||||
'CPUINFER_ENABLE_AMX': 'OFF',
|
||||
},
|
||||
'description': 'AVX2 variant (maximum compatibility)'
|
||||
}
|
||||
]
|
||||
|
||||
# Save original environment
|
||||
original_env = os.environ.copy()
|
||||
|
||||
extdir = Path(self.get_ext_fullpath(ext.name)).parent.resolve()
|
||||
|
||||
for i, variant in enumerate(variants, 1):
|
||||
print(f"\n{'=' * 80}")
|
||||
print(f"Building variant {i}/3: {variant['description']}")
|
||||
print(f"{'=' * 80}\n")
|
||||
|
||||
# Set variant-specific environment variables
|
||||
os.environ.update(variant['env'])
|
||||
|
||||
# Use a unique build directory for this variant
|
||||
original_build_temp = self.build_temp
|
||||
self.build_temp = str(Path(self.build_temp) / f"variant_{variant['name']}")
|
||||
|
||||
try:
|
||||
# Build this variant (calls the single-variant build logic)
|
||||
self._build_single_variant(ext)
|
||||
|
||||
# Rename the generated .so file to include variant suffix
|
||||
# Original: kt_kernel_ext.cpython-311-x86_64-linux-gnu.so
|
||||
# Renamed: _kt_kernel_ext_amx.cpython-311-x86_64-linux-gnu.so
|
||||
|
||||
# Extract the base extension name (without package prefix)
|
||||
# ext.name is "kt_kernel.kt_kernel_ext", we want "kt_kernel_ext"
|
||||
base_ext_name = ext.name.split('.')[-1]
|
||||
|
||||
# Find the newly built .so file
|
||||
import time
|
||||
time.sleep(0.5) # Give filesystem time to sync
|
||||
|
||||
built_candidates = [
|
||||
f for f in Path(extdir).glob("*.so")
|
||||
if f.name.startswith(base_ext_name) and not f.name.startswith(f"_{base_ext_name}_")
|
||||
]
|
||||
|
||||
if not built_candidates:
|
||||
print(f"WARNING: No .so file found for {base_ext_name} in {extdir}")
|
||||
print(f"Files in {extdir}:")
|
||||
for f in Path(extdir).glob("*.so"):
|
||||
print(f" {f.name}")
|
||||
|
||||
for so_file in built_candidates:
|
||||
# Extract the python tag part (e.g., ".cpython-311-x86_64-linux-gnu.so")
|
||||
suffix = so_file.name.replace(base_ext_name, "")
|
||||
new_name = f"_{base_ext_name}_{variant['name']}{suffix}"
|
||||
new_path = extdir / new_name
|
||||
|
||||
print(f"-- Renaming {so_file.name} -> {new_name}")
|
||||
if new_path.exists():
|
||||
print(f" WARNING: Target file already exists, removing: {new_path}")
|
||||
new_path.unlink()
|
||||
so_file.rename(new_path)
|
||||
print(f" ✓ Successfully renamed to {new_name}")
|
||||
|
||||
finally:
|
||||
# Restore build_temp for next iteration
|
||||
self.build_temp = original_build_temp
|
||||
|
||||
# Restore original environment
|
||||
os.environ.clear()
|
||||
os.environ.update(original_env)
|
||||
|
||||
print(f"\n{'=' * 80}")
|
||||
print("✓ Successfully built all 3 CPU variants")
|
||||
print(f"{'=' * 80}\n")
|
||||
|
||||
def _build_single_variant(self, ext: CMakeExtension):
|
||||
"""
|
||||
Build a single CPU variant. This contains the core build logic
|
||||
extracted from the original build_extension method.
|
||||
"""
|
||||
# Auto-detect CUDA toolkit if user did not explicitly set CPUINFER_USE_CUDA
|
||||
def detect_cuda_toolkit() -> bool:
|
||||
# Respect CUDA_HOME
|
||||
@@ -276,6 +403,10 @@ class CMakeBuild(build_ext):
|
||||
auto_cuda = detect_cuda_toolkit()
|
||||
os.environ["CPUINFER_USE_CUDA"] = "1" if auto_cuda else "0"
|
||||
print(f"-- CPUINFER_USE_CUDA not set; auto-detected CUDA toolkit: {'YES' if auto_cuda else 'NO'}")
|
||||
elif cuda_env:
|
||||
print("-- CPUINFER_USE_CUDA explicitly enabled")
|
||||
else:
|
||||
print("-- CPUINFER_USE_CUDA explicitly disabled")
|
||||
|
||||
extdir = Path(self.get_ext_fullpath(ext.name)).parent.resolve()
|
||||
cfg = default_build_type()
|
||||
@@ -431,7 +562,15 @@ class CMakeBuild(build_ext):
|
||||
# Version (simple). If you later add a python package dir, you can read from it.
|
||||
################################################################################
|
||||
|
||||
VERSION = os.environ.get("CPUINFER_VERSION", "0.1.0")
|
||||
# Import version from shared version.py at project root
|
||||
_version_file = Path(__file__).resolve().parent.parent / "version.py"
|
||||
if _version_file.exists():
|
||||
_version_ns = {}
|
||||
with open(_version_file, "r", encoding="utf-8") as f:
|
||||
exec(f.read(), _version_ns)
|
||||
VERSION = os.environ.get("CPUINFER_VERSION", _version_ns.get("__version__", "0.4.2"))
|
||||
else:
|
||||
VERSION = os.environ.get("CPUINFER_VERSION", "0.4.2")
|
||||
|
||||
################################################################################
|
||||
# Setup
|
||||
@@ -449,7 +588,7 @@ setup(
|
||||
"kt_kernel": "python",
|
||||
"kt_kernel.utils": "python/utils",
|
||||
},
|
||||
ext_modules=[CMakeExtension("kt_kernel_ext", str(REPO_ROOT))],
|
||||
ext_modules=[CMakeExtension("kt_kernel.kt_kernel_ext", str(REPO_ROOT))],
|
||||
cmdclass={"build_ext": CMakeBuild},
|
||||
zip_safe=False,
|
||||
classifiers=[
|
||||
|
||||
@@ -16,7 +16,8 @@ register_cpu_ci(est_time=30, suite="default")
|
||||
|
||||
# Check if kt_kernel_ext is available
|
||||
try:
|
||||
import kt_kernel_ext
|
||||
import kt_kernel # Import kt_kernel first to register kt_kernel_ext
|
||||
kt_kernel_ext = kt_kernel.kt_kernel_ext # Access the extension module
|
||||
HAS_KT_KERNEL = True
|
||||
except ImportError:
|
||||
HAS_KT_KERNEL = False
|
||||
|
||||
@@ -19,7 +19,8 @@ register_cpu_ci(est_time=120, suite="default")
|
||||
# Check if dependencies are available
|
||||
try:
|
||||
import torch
|
||||
import kt_kernel_ext
|
||||
import kt_kernel # Import kt_kernel first to register kt_kernel_ext
|
||||
kt_kernel_ext = kt_kernel.kt_kernel_ext # Access the extension module
|
||||
HAS_DEPS = True
|
||||
except ImportError as e:
|
||||
HAS_DEPS = False
|
||||
|
||||
@@ -19,7 +19,8 @@ register_cpu_ci(est_time=120, suite="default")
|
||||
# Check if dependencies are available
|
||||
try:
|
||||
import torch
|
||||
import kt_kernel_ext
|
||||
import kt_kernel # Import kt_kernel first to register kt_kernel_ext
|
||||
kt_kernel_ext = kt_kernel.kt_kernel_ext # Access the extension module
|
||||
HAS_DEPS = True
|
||||
except ImportError as e:
|
||||
HAS_DEPS = False
|
||||
|
||||
@@ -19,7 +19,8 @@ register_cpu_ci(est_time=120, suite="default")
|
||||
# Check if dependencies are available
|
||||
try:
|
||||
import torch
|
||||
import kt_kernel_ext
|
||||
import kt_kernel # Import kt_kernel first to register kt_kernel_ext
|
||||
kt_kernel_ext = kt_kernel.kt_kernel_ext # Access the extension module
|
||||
HAS_DEPS = True
|
||||
except ImportError as e:
|
||||
HAS_DEPS = False
|
||||
|
||||
@@ -19,7 +19,8 @@ register_cpu_ci(est_time=120, suite="default")
|
||||
# Check if dependencies are available
|
||||
try:
|
||||
import torch
|
||||
import kt_kernel_ext
|
||||
import kt_kernel # Import kt_kernel first to register kt_kernel_ext
|
||||
kt_kernel_ext = kt_kernel.kt_kernel_ext # Access the extension module
|
||||
HAS_DEPS = True
|
||||
except ImportError as e:
|
||||
HAS_DEPS = False
|
||||
|
||||
@@ -23,7 +23,8 @@ register_cpu_ci(est_time=300, suite="default")
|
||||
# Check if dependencies are available
|
||||
try:
|
||||
import torch
|
||||
import kt_kernel_ext
|
||||
import kt_kernel # Import kt_kernel first to register kt_kernel_ext
|
||||
kt_kernel_ext = kt_kernel.kt_kernel_ext # Access the extension module
|
||||
from tqdm import tqdm
|
||||
HAS_DEPS = True
|
||||
except ImportError as e:
|
||||
|
||||
@@ -23,7 +23,8 @@ register_cpu_ci(est_time=300, suite="default")
|
||||
# Check if dependencies are available
|
||||
try:
|
||||
import torch
|
||||
import kt_kernel_ext
|
||||
import kt_kernel # Import kt_kernel first to register kt_kernel_ext
|
||||
kt_kernel_ext = kt_kernel.kt_kernel_ext # Access the extension module
|
||||
from tqdm import tqdm
|
||||
|
||||
HAS_DEPS = True
|
||||
|
||||
@@ -24,7 +24,8 @@ register_cpu_ci(est_time=300, suite="default")
|
||||
# Check if dependencies are available
|
||||
try:
|
||||
import torch
|
||||
import kt_kernel_ext
|
||||
import kt_kernel # Import kt_kernel first to register kt_kernel_ext
|
||||
kt_kernel_ext = kt_kernel.kt_kernel_ext # Access the extension module
|
||||
from tqdm import tqdm
|
||||
HAS_DEPS = True
|
||||
except ImportError as e:
|
||||
|
||||
@@ -23,7 +23,8 @@ register_cpu_ci(est_time=300, suite="default")
|
||||
# Check if dependencies are available
|
||||
try:
|
||||
import torch
|
||||
import kt_kernel_ext
|
||||
import kt_kernel # Import kt_kernel first to register kt_kernel_ext
|
||||
kt_kernel_ext = kt_kernel.kt_kernel_ext # Access the extension module
|
||||
from tqdm import tqdm
|
||||
HAS_DEPS = True
|
||||
except ImportError as e:
|
||||
|
||||
@@ -1,11 +1,20 @@
|
||||
#!/usr/bin/env python
|
||||
# coding=utf-8
|
||||
'''
|
||||
Description :
|
||||
Description :
|
||||
Author : kkk1nak0
|
||||
Date : 2024-08-15 07:34:46
|
||||
Version : 1.0.0
|
||||
LastEditors : chenxl
|
||||
LastEditors : chenxl
|
||||
LastEditTime : 2025-02-15 03:53:02
|
||||
'''
|
||||
__version__ = "0.4.1"
|
||||
import sys
|
||||
import os
|
||||
|
||||
# Import version from shared version.py at project root
|
||||
_root_dir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
sys.path.insert(0, _root_dir)
|
||||
try:
|
||||
from version import __version__
|
||||
finally:
|
||||
sys.path.pop(0)
|
||||
|
||||
6
version.py
Normal file
6
version.py
Normal file
@@ -0,0 +1,6 @@
|
||||
"""
|
||||
KTransformers version information.
|
||||
Shared across kt-kernel and kt-sft modules.
|
||||
"""
|
||||
|
||||
__version__ = "0.4.3"
|
||||
Reference in New Issue
Block a user