[feat](kt-kernel): Add automatic deployment workflow (#1719)

This commit is contained in:
Jianwei Dong
2025-12-16 15:20:06 +08:00
committed by GitHub
parent f25e58ad69
commit 1f79f6da92
31 changed files with 3691 additions and 552 deletions

View File

@@ -5,9 +5,24 @@ on:
types: [published]
workflow_dispatch:
inputs:
choose:
description: 'Will you push the image to DockerHub? 0 for No, 1 for Yes'
push_to_dockerhub:
description: 'Push image to DockerHub? (true/false)'
required: true
default: 'false'
type: boolean
cuda_version:
description: 'CUDA version (e.g., 12.8.1)'
required: false
default: '12.8.1'
type: string
push_simplified_tag:
description: 'Also push simplified tag? (true/false)'
required: false
default: 'true'
type: boolean
ubuntu_mirror:
description: 'Use Tsinghua Ubuntu mirror? (0/1)'
required: false
default: '0'
type: string
@@ -20,79 +35,108 @@ jobs:
test:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- uses: actions/checkout@v4
- name: Run tests
run: |
if [ -f docker-compose.test.yml ]; then
docker-compose --file docker-compose.test.yml build
docker-compose --file docker-compose.test.yml run sut
else
docker build . --file Dockerfile
docker build . --file docker/Dockerfile
fi
docker_task:
build-and-push:
needs: test
name: ${{ matrix.instruct}}
name: Build and Push Multi-Variant Docker Image
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
include:
# for amd64
- {instruct: "FANCY", platform: "linux/amd64"}
- {instruct: "AVX512", platform: "linux/amd64"}
- {instruct: "AVX2", platform: "linux/amd64"}
- {instruct: "NATIVE", platform: "linux/amd64"}
# for arm64
- {instruct: "NATIVE", platform: "linux/arm64"}
steps:
- name: Move Docker data directory
run: |
sudo systemctl stop docker
sudo mkdir -p /mnt/docker
sudo rsync -avz /var/lib/docker/ /mnt/docker
sudo rm -rf /var/lib/docker
sudo ln -s /mnt/docker /var/lib/docker
sudo systemctl start docker
- name: Checkout repository
uses: actions/checkout@v4
-
name: Set up QEMU
uses: docker/setup-qemu-action@v3
- name: Move Docker data directory
run: |
sudo systemctl stop docker
sudo mkdir -p /mnt/docker
sudo rsync -avz /var/lib/docker/ /mnt/docker
sudo rm -rf /var/lib/docker
sudo ln -s /mnt/docker /var/lib/docker
sudo systemctl start docker
-
name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
-
name: Login to Docker Hub
uses: docker/login-action@v3
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}
-
name: Build and push for amd64
if: matrix.platform == 'linux/amd64'
uses: docker/build-push-action@v6
with:
push: true
platforms: |
linux/amd64
tags: |
${{ env.DOCKERHUB_REPO }}:latest-${{ matrix.instruct }}
${{ env.DOCKERHUB_REPO }}:${{ github.event.release.tag_name }}-${{ matrix.instruct }}
build-args: |
CPU_INSTRUCT=${{ matrix.instruct }}
-
name: Build and push for arm64
if: matrix.platform == 'linux/arm64'
uses: docker/build-push-action@v6
with:
push: true
platforms: |
linux/arm64
tags: |
${{ env.DOCKERHUB_REPO }}:latest-${{ matrix.instruct }}
${{ env.DOCKERHUB_REPO }}:${{ github.event.release.tag_name }}-${{ matrix.instruct }}
build-args: |
CPU_INSTRUCT=${{ matrix.instruct }}
- name: Login to Docker Hub
uses: docker/login-action@v3
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}
- name: Determine build parameters
id: params
run: |
# Determine if we should push
if [ "${{ github.event_name }}" = "release" ]; then
echo "should_push=true" >> $GITHUB_OUTPUT
echo "push_simplified=true" >> $GITHUB_OUTPUT
elif [ "${{ github.event_name }}" = "workflow_dispatch" ]; then
echo "should_push=${{ inputs.push_to_dockerhub }}" >> $GITHUB_OUTPUT
echo "push_simplified=${{ inputs.push_simplified_tag }}" >> $GITHUB_OUTPUT
else
echo "should_push=false" >> $GITHUB_OUTPUT
echo "push_simplified=false" >> $GITHUB_OUTPUT
fi
# Determine CUDA version
if [ "${{ github.event_name }}" = "workflow_dispatch" ] && [ -n "${{ inputs.cuda_version }}" ]; then
echo "cuda_version=${{ inputs.cuda_version }}" >> $GITHUB_OUTPUT
else
echo "cuda_version=12.8.1" >> $GITHUB_OUTPUT
fi
# Determine Ubuntu mirror setting
if [ "${{ github.event_name }}" = "workflow_dispatch" ] && [ -n "${{ inputs.ubuntu_mirror }}" ]; then
echo "ubuntu_mirror=${{ inputs.ubuntu_mirror }}" >> $GITHUB_OUTPUT
else
echo "ubuntu_mirror=0" >> $GITHUB_OUTPUT
fi
- name: Build and push Docker image
run: |
cd docker
# Build command arguments
BUILD_ARGS=(
--cuda-version "${{ steps.params.outputs.cuda_version }}"
--ubuntu-mirror "${{ steps.params.outputs.ubuntu_mirror }}"
--repository "${{ env.DOCKERHUB_REPO }}"
)
# Add simplified tag option if enabled
if [ "${{ steps.params.outputs.push_simplified }}" = "true" ]; then
BUILD_ARGS+=(--also-push-simplified)
fi
# Add HTTP proxy if available
if [ -n "${{ secrets.HTTP_PROXY }}" ]; then
BUILD_ARGS+=(--http-proxy "${{ secrets.HTTP_PROXY }}")
fi
# Add HTTPS proxy if available
if [ -n "${{ secrets.HTTPS_PROXY }}" ]; then
BUILD_ARGS+=(--https-proxy "${{ secrets.HTTPS_PROXY }}")
fi
# Dry run if not pushing
if [ "${{ steps.params.outputs.should_push }}" != "true" ]; then
BUILD_ARGS+=(--dry-run)
fi
# Execute build script
./push-to-dockerhub.sh "${BUILD_ARGS[@]}"
- name: Display image information
if: steps.params.outputs.should_push == 'true'
run: |
echo "::notice title=Docker Image::Image pushed successfully to ${{ env.DOCKERHUB_REPO }}"
echo "Pull command: docker pull ${{ env.DOCKERHUB_REPO }}:v\$(VERSION)-cu\$(CUDA_SHORT)"

View File

@@ -1,71 +0,0 @@
name: Install / Test KTransformers
run-name: Install / Test KTransformers
on:
workflow_dispatch:
inputs:
job_to_run:
description: "Which job to run?"
required: true
default: "test"
type: choice
options:
- create-install-test
- install-test
- test
jobs:
Install-Test-KTransformers:
runs-on: self-hosted
steps:
- run: echo "🎉 The job was automatically triggered by a ${{ github.event_name }} event."
- run: echo "🔎 The name of your branch is ${{ github.ref }} and your repository is ${{ github.repository }}."
- name: Check out repository code
uses: actions/checkout@v4
- run: echo "💡 The ${{ github.repository }} repository has been cloned to the runner."
- name: Remove old conda environment
continue-on-error: true
if: contains(inputs.job_to_run, 'create')
run: |
source /home/qujing3/anaconda3/etc/profile.d/conda.sh
conda env remove --name ktransformers-dev -y
- name: Create conda environment
if: contains(inputs.job_to_run, 'create')
run: |
source /home/qujing3/anaconda3/etc/profile.d/conda.sh
conda create --name ktransformers-dev python=3.11
conda activate ktransformers-dev
conda install -c conda-forge libstdcxx-ng -y
- name: Install dependencies
if: contains(inputs.job_to_run, 'create')
run: |
source /home/qujing3/anaconda3/etc/profile.d/conda.sh
conda activate ktransformers-dev
pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu126
pip3 install packaging ninja cpufeature numpy
pip install ~/flash_attn-2.7.4.post1+cu12torch2.6cxx11abiTRUE-cp311-cp311-linux_x86_64.whl
- name: Install KTransformers
if: contains(inputs.job_to_run, 'install')
run: |
source /home/qujing3/anaconda3/etc/profile.d/conda.sh
conda activate ktransformers-dev
pip3 uninstall ktransformers -y
cd ${{ github.workspace }}
git submodule init
git submodule update
bash install.sh
- name: Test Local Chat 1
run: |
set -e
source /home/qujing3/anaconda3/etc/profile.d/conda.sh
conda activate ktransformers-dev
export PATH=/usr/local/cuda-12.4/bin:$PATH
export LD_LIBRARY_PATH=/usr/local/cuda-12.4/lib64:$LD_LIBRARY_PATH
export CUDA_HOME=/usr/local/cuda-12.4
cd ${{ github.workspace }}
echo "Running Local Chat 1 (book.txt) ..."
python ktransformers/local_chat_test.py --model_path /home/qujing3/models/DeepSeek-R1-Q4_K_M/config --gguf_path /home/qujing3/models/DeepSeek-R1-Q4_K_M/ --max_new_tokens 256 --cpu_infer 64 --prompt_file /home/qujing3/prompts/book.txt > log1.txt
sed -n '/Prompt:/,$p' log1.txt
echo "Running Local Chat 2 [force think] (chinese.txt) ..."
python ktransformers/local_chat_test.py --model_path /home/qujing3/models/DeepSeek-R1-Q4_K_M/config --gguf_path /home/qujing3/models/DeepSeek-R1-Q4_K_M/ --max_new_tokens 256 --cpu_infer 64 --prompt_file /home/qujing3/prompts/chinese.txt -f > log2.txt
sed -n '/Prompt:/,$p' log2.txt
- run: echo "This job's status is ${{ job.status }}."

View File

@@ -1,231 +0,0 @@
name: Build Wheels
on:
workflow_dispatch:
inputs:
release:
description: 'Release? 1 = yes, 0 = no'
default: '0'
required: true
type: string
jobs:
build_wheels:
name: ${{ matrix.os }} Python=${{ matrix.pyver }} CUDA=${{ matrix.cuda }} CPU_INSTRUCT=${{ matrix.instruct }} Torch=${{ matrix.torch }}
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
include:
# Ubuntu
- { os: ubuntu-20.04, pyver: '3.12', cuda: '12.5.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'FANCY', torch_cu: '124'}
- { os: ubuntu-20.04, pyver: '3.12', cuda: '12.5.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '124'}
- { os: ubuntu-20.04, pyver: '3.12', cuda: '12.5.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '124'}
- { os: ubuntu-20.04, pyver: '3.12', cuda: '12.4.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'FANCY', torch_cu: '124'}
- { os: ubuntu-20.04, pyver: '3.12', cuda: '12.4.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '124'}
- { os: ubuntu-20.04, pyver: '3.12', cuda: '12.4.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '124'}
- { os: ubuntu-20.04, pyver: '3.12', cuda: '12.2.2', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'FANCY', torch_cu: '121'}
- { os: ubuntu-20.04, pyver: '3.12', cuda: '12.2.2', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '121'}
- { os: ubuntu-20.04, pyver: '3.12', cuda: '12.2.2', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '121'}
- { os: ubuntu-20.04, pyver: '3.12', cuda: '12.1.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'FANCY', torch_cu: '121'}
- { os: ubuntu-20.04, pyver: '3.12', cuda: '12.1.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '121'}
- { os: ubuntu-20.04, pyver: '3.12', cuda: '12.1.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '121'}
- { os: ubuntu-20.04, pyver: '3.12', cuda: '12.2.2', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'FANCY', torch_cu: '121'}
- { os: ubuntu-20.04, pyver: '3.12', cuda: '12.2.2', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '121'}
- { os: ubuntu-20.04, pyver: '3.12', cuda: '12.2.2', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '121'}
- { os: ubuntu-20.04, pyver: '3.12', cuda: '12.1.1', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'FANCY', torch_cu: '121'}
- { os: ubuntu-20.04, pyver: '3.12', cuda: '12.1.1', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '121'}
- { os: ubuntu-20.04, pyver: '3.12', cuda: '12.1.1', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '121'}
- { os: ubuntu-20.04, pyver: '3.11', cuda: '12.5.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'FANCY', torch_cu: '124'}
- { os: ubuntu-20.04, pyver: '3.11', cuda: '12.5.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '124'}
- { os: ubuntu-20.04, pyver: '3.11', cuda: '12.5.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '124'}
- { os: ubuntu-20.04, pyver: '3.11', cuda: '12.4.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'FANCY', torch_cu: '124'}
- { os: ubuntu-20.04, pyver: '3.11', cuda: '12.4.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '124'}
- { os: ubuntu-20.04, pyver: '3.11', cuda: '12.4.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '124'}
- { os: ubuntu-20.04, pyver: '3.11', cuda: '12.2.2', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'FANCY', torch_cu: '121'}
- { os: ubuntu-20.04, pyver: '3.11', cuda: '12.2.2', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '121'}
- { os: ubuntu-20.04, pyver: '3.11', cuda: '12.2.2', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '121'}
- { os: ubuntu-20.04, pyver: '3.11', cuda: '12.1.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'FANCY', torch_cu: '121'}
- { os: ubuntu-20.04, pyver: '3.11', cuda: '12.1.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '121'}
- { os: ubuntu-20.04, pyver: '3.11', cuda: '12.1.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '121'}
- { os: ubuntu-20.04, pyver: '3.11', cuda: '12.2.2', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'FANCY', torch_cu: '121'}
- { os: ubuntu-20.04, pyver: '3.11', cuda: '12.2.2', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '121'}
- { os: ubuntu-20.04, pyver: '3.11', cuda: '12.2.2', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '121'}
- { os: ubuntu-20.04, pyver: '3.11', cuda: '12.1.1', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'FANCY', torch_cu: '121'}
- { os: ubuntu-20.04, pyver: '3.11', cuda: '12.1.1', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '121'}
- { os: ubuntu-20.04, pyver: '3.11', cuda: '12.1.1', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '121'}
- { os: ubuntu-20.04, pyver: '3.10', cuda: '12.5.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'FANCY', torch_cu: '124'}
- { os: ubuntu-20.04, pyver: '3.10', cuda: '12.5.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '124'}
- { os: ubuntu-20.04, pyver: '3.10', cuda: '12.5.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '124'}
- { os: ubuntu-20.04, pyver: '3.10', cuda: '12.4.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'FANCY', torch_cu: '124'}
- { os: ubuntu-20.04, pyver: '3.10', cuda: '12.4.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '124'}
- { os: ubuntu-20.04, pyver: '3.10', cuda: '12.4.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '124'}
- { os: ubuntu-20.04, pyver: '3.10', cuda: '12.2.2', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'FANCY', torch_cu: '121'}
- { os: ubuntu-20.04, pyver: '3.10', cuda: '12.2.2', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '121'}
- { os: ubuntu-20.04, pyver: '3.10', cuda: '12.2.2', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '121'}
- { os: ubuntu-20.04, pyver: '3.10', cuda: '12.1.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'FANCY', torch_cu: '121'}
- { os: ubuntu-20.04, pyver: '3.10', cuda: '12.1.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '121'}
- { os: ubuntu-20.04, pyver: '3.10', cuda: '12.1.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '121'}
- { os: ubuntu-20.04, pyver: '3.10', cuda: '12.2.2', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'FANCY', torch_cu: '121'}
- { os: ubuntu-20.04, pyver: '3.10', cuda: '12.2.2', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '121'}
- { os: ubuntu-20.04, pyver: '3.10', cuda: '12.2.2', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '121'}
- { os: ubuntu-20.04, pyver: '3.10', cuda: '12.1.1', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'FANCY', torch_cu: '121'}
- { os: ubuntu-20.04, pyver: '3.10', cuda: '12.1.1', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '121'}
- { os: ubuntu-20.04, pyver: '3.10', cuda: '12.1.1', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '121'}
# Windows
- { os: windows-2022, pyver: '3.12', cuda: '12.5.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '124'}
- { os: windows-2022, pyver: '3.12', cuda: '12.5.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '124'}
- { os: windows-2022, pyver: '3.12', cuda: '12.4.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '124'}
- { os: windows-2022, pyver: '3.12', cuda: '12.4.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '124'}
- { os: windows-2022, pyver: '3.12', cuda: '12.2.2', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '121'}
- { os: windows-2022, pyver: '3.12', cuda: '12.2.2', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '121'}
- { os: windows-2022, pyver: '3.12', cuda: '12.1.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '121'}
- { os: windows-2022, pyver: '3.12', cuda: '12.1.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '121'}
- { os: windows-2022, pyver: '3.12', cuda: '12.2.2', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '121'}
- { os: windows-2022, pyver: '3.12', cuda: '12.2.2', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '121'}
- { os: windows-2022, pyver: '3.12', cuda: '12.1.1', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '121'}
- { os: windows-2022, pyver: '3.12', cuda: '12.1.1', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '121'}
- { os: windows-2022, pyver: '3.11', cuda: '12.5.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '124'}
- { os: windows-2022, pyver: '3.11', cuda: '12.5.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '124'}
- { os: windows-2022, pyver: '3.11', cuda: '12.4.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '124'}
- { os: windows-2022, pyver: '3.11', cuda: '12.4.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '124'}
- { os: windows-2022, pyver: '3.11', cuda: '12.2.2', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '121'}
- { os: windows-2022, pyver: '3.11', cuda: '12.2.2', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '121'}
- { os: windows-2022, pyver: '3.11', cuda: '12.1.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '121'}
- { os: windows-2022, pyver: '3.11', cuda: '12.1.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '121'}
- { os: windows-2022, pyver: '3.11', cuda: '12.2.2', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '121'}
- { os: windows-2022, pyver: '3.11', cuda: '12.2.2', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '121'}
- { os: windows-2022, pyver: '3.11', cuda: '12.1.1', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '121'}
- { os: windows-2022, pyver: '3.11', cuda: '12.1.1', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '121'}
- { os: windows-2022, pyver: '3.10', cuda: '12.5.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '124'}
- { os: windows-2022, pyver: '3.10', cuda: '12.5.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '124'}
- { os: windows-2022, pyver: '3.10', cuda: '12.4.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '124'}
- { os: windows-2022, pyver: '3.10', cuda: '12.4.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '124'}
- { os: windows-2022, pyver: '3.10', cuda: '12.2.2', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '121'}
- { os: windows-2022, pyver: '3.10', cuda: '12.2.2', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '121'}
- { os: windows-2022, pyver: '3.10', cuda: '12.1.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '121'}
- { os: windows-2022, pyver: '3.10', cuda: '12.1.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '121'}
- { os: windows-2022, pyver: '3.10', cuda: '12.2.2', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '121'}
- { os: windows-2022, pyver: '3.10', cuda: '12.2.2', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '121'}
- { os: windows-2022, pyver: '3.10', cuda: '12.1.1', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '121'}
- { os: windows-2022, pyver: '3.10', cuda: '12.1.1', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '121'}
defaults:
run:
shell: pwsh
steps:
- uses: actions/checkout@v3
- name: Free Disk Space
uses: jlumbroso/free-disk-space@v1.3.1
if: runner.os == 'Linux'
with:
tool-cache: true
android: true
dotnet: true
haskell: true
large-packages: false
swap-storage: true
- uses: actions/setup-python@v4
with:
python-version: ${{ matrix.pyver }}
- name: check_space
run: |
if($IsLinux) {df -h}
if($IsWindows) {Get-PSDrive -PSProvider 'FileSystem'}
- uses: actions/setup-node@v4
with:
node-version: 20
- name: Setup Mamba
if: matrix.cuda != ''
uses: conda-incubator/setup-miniconda@v3
with:
activate-environment: "ktransformers"
python-version: ${{ matrix.pyver }}
miniforge-variant: Miniforge3
miniforge-version: latest
use-mamba: true
add-pip-as-python-dependency: true
auto-activate-base: false
- name: build web
run: |
cd ktransformers/website/
npm install
npm run build
cd ../../
- name: build for cuda
if: matrix.cuda != ''
env:
USE_BALANCE_SERVE: "1"
run: |
git submodule init
git submodule update
if($IsWindows){
$originalPath = Get-Location
Import-Module 'C:\Program Files\Microsoft Visual Studio\2022\Enterprise\Common7\Tools\Microsoft.VisualStudio.DevShell.dll'
Enter-VsDevShell -VsInstallPath 'C:\Program Files\Microsoft Visual Studio\2022\Enterprise' -DevCmdArguments '-arch=x64 -host_arch=x64'
$env:DISTUTILS_USE_SDK=1
Set-Location $originalPath
}
$cudaVersion = '${{ matrix.cuda }}'
$env:MAMBA_NO_LOW_SPEED_LIMIT = 1
mamba install -y -c nvidia/label/cuda-$cudaVersion cuda-toolkit cuda-runtime
$env:CUDA_PATH = $env:CONDA_PREFIX
$env:CUDA_HOME = $env:CONDA_PREFIX
if ($IsLinux) {
$env:LD_LIBRARY_PATH = $env:CONDA_PREFIX + '/lib:' + $env:LD_LIBRARY_PATH
$env:LD_LIBRARY_PATH = $env:CONDA_PREFIX + '/lib/python${{ matrix.pyver }}/site-packages/nvidia/nvjitlink/lib:' + $env:LD_LIBRARY_PATH
if (!(Test-Path $env:CUDA_HOME/lib64)) {
New-Item -ItemType SymbolicLink -Path $env:CUDA_HOME/lib64 -Target $env:CUDA_HOME/lib
}
}
if ($IsWindows) {
if (Test-Path -Path "$env:CUDA_PATH/Library/bin/nvcc.exe"){
$env:CUDA_PATH = "$env:CUDA_PATH/Library"
$env:CUDA_HOME = $env:CUDA_PATH
}
$env:PATH = "$env:CUDA_PATH/bin;" + $env:PATH
$directory = "$env:CUDA_PATH/lib/x64/"
if (-not (Test-Path -Path $directory)) {
New-Item -ItemType Directory -Path $directory
Write-Output "Directory '$directory' created."
}
cp $env:CUDA_PATH/lib/*.lib $env:CUDA_PATH/lib/x64/
$env:INCLUDE =$env:CUDA_PATH + "/include/targets/x64;" + $env:INCLUDE
$env:INCLUDE =$env:CONDA_PREFIX + "/include;" + $env:INCLUDE
}
python -m pip install torch==${{ matrix.torch }} torchvision torchaudio --index-url https://download.pytorch.org/whl/cu${{ matrix.torch_cu }}
python -m pip install cpufeature build wheel ninja packaging setuptools
$env:KTRANSFORMERS_FORCE_BUILD = "TRUE"
$env:CPU_INSTRUCT = '${{ matrix.instruct }}'
$env:TORCH_CUDA_ARCH_LIST = '${{ matrix.cudaarch }}'
python -m build --no-isolation --verbose
- name: create Rlease dir
run: |
if ($IsWindows) {
$env:date = $(Get-Date -Format "yyyy-MM-dd")
New-Item -ItemType Directory -Force -Path "$Env:USERPROFILE\.ssh"
$Env:SSH_PATH = "$Env:USERPROFILE\.ssh\id_rsa"
Set-Content -Path $Env:SSH_PATH -Value "${{ secrets.SSH_PRIVATE_KEY }}"
(Get-Content -Path $Env:SSH_PATH).Replace("`r`n","`n") | Set-Content -Path $Env:SSH_PATH
chmod 600 $Env:SSH_PATH
}
if ($IsLinux) {
$env:date = $(date +%Y-%m-%d)
mkdir -p ~/.ssh/
echo "${{ secrets.SSH_PRIVATE_KEY }}" > ~/.ssh/id_rsa
chmod 600 ~/.ssh/id_rsa
}
ssh -p ${{ secrets.SSH_PORT }} -o StrictHostKeyChecking=no root@${{ secrets.SSH_SERVER }} "mkdir -p /mnt/data/release-$env:date"
scp -P ${{ secrets.SSH_PORT }} -o StrictHostKeyChecking=no dist/*.whl root@${{ secrets.SSH_SERVER }}:/mnt/data/release-$env:date/

View File

@@ -1,141 +0,0 @@
name: Build Wheels Tests
on:
workflow_dispatch:
inputs:
release:
description: 'Release? 1 = yes, 0 = no'
default: '0'
required: true
type: string
jobs:
build_wheels:
name: ${{ matrix.os }} Python=${{ matrix.pyver }} CUDA=${{ matrix.cuda }} CPU_INSTRUCT=${{ matrix.instruct }} Torch=${{ matrix.torch }}
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
include:
# Ubuntu
- { os: ubuntu-20.04, pyver: '3.12', cuda: '12.5.1', torch: '2.4.0', cudaarch: '8.9;9.0+PTX', instruct: 'FANCY', torch_cu: '124'}
- { os: ubuntu-20.04, pyver: '3.12', cuda: '12.2.2', torch: '2.3.0', cudaarch: '8.9;9.0+PTX', instruct: 'FANCY', torch_cu: '121'}
- { os: windows-2022, pyver: '3.11', cuda: '12.5.1', torch: '2.4.0', cudaarch: '8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '124'}
- { os: windows-2022, pyver: '3.12', cuda: '12.1.1', torch: '2.3.0', cudaarch: '8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '121'}
defaults:
run:
shell: pwsh
steps:
- uses: actions/checkout@v3
- name: Free Disk Space
uses: jlumbroso/free-disk-space@v1.3.1
if: runner.os == 'Linux'
with:
tool-cache: true
android: true
dotnet: true
haskell: true
large-packages: false
swap-storage: true
- uses: actions/setup-python@v4
with:
python-version: ${{ matrix.pyver }}
- name: check_space
run: |
if($IsLinux) {df -h}
if($IsWindows) {Get-PSDrive -PSProvider 'FileSystem'}
- uses: actions/setup-node@v4
with:
node-version: 20
- name: Setup Mamba
if: matrix.cuda != ''
uses: conda-incubator/setup-miniconda@v3
with:
activate-environment: "ktransformers"
python-version: ${{ matrix.pyver }}
miniforge-variant: Miniforge3
miniforge-version: latest
use-mamba: true
add-pip-as-python-dependency: true
auto-activate-base: false
- name: build web
run: |
cd ktransformers/website/
npm install
npm run build
cd ../../
- name: build for cuda
if: matrix.cuda != ''
run: |
git submodule init
git submodule update
if($IsWindows){
$originalPath = Get-Location
Import-Module 'C:\Program Files\Microsoft Visual Studio\2022\Enterprise\Common7\Tools\Microsoft.VisualStudio.DevShell.dll'
Enter-VsDevShell -VsInstallPath 'C:\Program Files\Microsoft Visual Studio\2022\Enterprise' -DevCmdArguments '-arch=x64 -host_arch=x64'
$env:DISTUTILS_USE_SDK=1
Set-Location $originalPath
}
$cudaVersion = '${{ matrix.cuda }}'
$env:MAMBA_NO_LOW_SPEED_LIMIT = 1
mamba install -y -c nvidia/label/cuda-$cudaVersion cuda-toolkit cuda-runtime
$env:CUDA_PATH = $env:CONDA_PREFIX
$env:CUDA_HOME = $env:CONDA_PREFIX
if ($IsLinux) {
$env:LD_LIBRARY_PATH = $env:CONDA_PREFIX + '/lib:' + $env:LD_LIBRARY_PATH
$env:LD_LIBRARY_PATH = $env:CONDA_PREFIX + '/lib/python${{ matrix.pyver }}/site-packages/nvidia/nvjitlink/lib:' + $env:LD_LIBRARY_PATH
if (!(Test-Path $env:CUDA_HOME/lib64)) {
New-Item -ItemType SymbolicLink -Path $env:CUDA_HOME/lib64 -Target $env:CUDA_HOME/lib
}
}
if ($IsWindows) {
if (Test-Path -Path "$env:CUDA_PATH/Library/bin/nvcc.exe"){
$env:CUDA_PATH = "$env:CUDA_PATH/Library"
$env:CUDA_HOME = $env:CUDA_PATH
}
$env:PATH = "$env:CUDA_PATH/bin;" + $env:PATH
$directory = "$env:CUDA_PATH/lib/x64/"
if (-not (Test-Path -Path $directory)) {
New-Item -ItemType Directory -Path $directory
Write-Output "Directory '$directory' created."
}
cp $env:CUDA_PATH/lib/*.lib $env:CUDA_PATH/lib/x64/
$env:INCLUDE =$env:CUDA_PATH + "/include/targets/x64;" + $env:INCLUDE
$env:INCLUDE =$env:CONDA_PREFIX + "/include;" + $env:INCLUDE
}
python -m pip install torch==${{ matrix.torch }} torchvision torchaudio --index-url https://download.pytorch.org/whl/cu${{ matrix.torch_cu }}
python -m pip install cpufeature build wheel ninja packaging setuptools
$env:KTRANSFORMERS_FORCE_BUILD = "TRUE"
$env:CPU_INSTRUCT = '${{ matrix.instruct }}'
$env:TORCH_CUDA_ARCH_LIST = '${{ matrix.cudaarch }}'
python -m build --no-isolation --verbose
- name: create Rlease dir
run: |
if ($IsWindows) {
$env:date = $(Get-Date -Format "yyyy-MM-dd")
New-Item -ItemType Directory -Force -Path "$Env:USERPROFILE\.ssh"
$Env:SSH_PATH = "$Env:USERPROFILE\.ssh\id_rsa"
Set-Content -Path $Env:SSH_PATH -Value "${{ secrets.SSH_PRIVATE_KEY }}"
(Get-Content -Path $Env:SSH_PATH).Replace("`r`n","`n") | Set-Content -Path $Env:SSH_PATH
chmod 600 $Env:SSH_PATH
}
if ($IsLinux) {
$env:date = $(date +%Y-%m-%d)
mkdir -p ~/.ssh/
echo "${{ secrets.SSH_PRIVATE_KEY }}" > ~/.ssh/id_rsa
chmod 600 ~/.ssh/id_rsa
}
ssh -p ${{ secrets.SSH_PORT }} -o StrictHostKeyChecking=no root@${{ secrets.SSH_SERVER }} "mkdir -p /mnt/data/release-$env:date"
scp -P ${{ secrets.SSH_PORT }} -o StrictHostKeyChecking=no dist/*.whl root@${{ secrets.SSH_SERVER }}:/mnt/data/release-$env:date/

36
.github/workflows/release-fake-tag.yml vendored Normal file
View File

@@ -0,0 +1,36 @@
name: Release Fake Tag
on:
push:
branches:
- main
paths:
- "version.py"
workflow_dispatch:
permissions:
contents: write
jobs:
publish:
if: github.repository == 'kvcache-ai/ktransformers'
runs-on: ubuntu-latest
environment: 'prod'
steps:
- name: Checkout repository
uses: actions/checkout@v4
with:
token: ${{ secrets.GITHUB_TOKEN }}
- name: Get version
id: get_version
run: |
version=$(cat version.py | grep '__version__' | cut -d'"' -f2)
echo "TAG=v$version" >> $GITHUB_OUTPUT
- name: Create and push tag
run: |
git config user.name "ktransformers-bot"
git config user.email "ktransformers-bot@users.noreply.github.com"
git tag ${{ steps.get_version.outputs.TAG }}
git push origin ${{ steps.get_version.outputs.TAG }}

163
.github/workflows/release-pypi.yml vendored Normal file
View File

@@ -0,0 +1,163 @@
name: Release to PyPI
on:
push:
branches:
- main
paths:
- "version.py"
workflow_dispatch:
inputs:
test_pypi:
description: 'Publish to TestPyPI instead of PyPI (for testing)'
required: false
default: 'false'
type: choice
options:
- 'true'
- 'false'
permissions:
contents: read
jobs:
build-kt-kernel:
name: Build kt-kernel CPU-only (Python ${{ matrix.python-version }})
runs-on: ubuntu-20.04
strategy:
fail-fast: false
matrix:
python-version: ['3.10', '3.11', '3.12']
steps:
- name: Checkout repository
uses: actions/checkout@v4
with:
submodules: recursive
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}
- name: Install system dependencies
run: |
sudo apt-get update
sudo apt-get install -y cmake libhwloc-dev pkg-config libnuma-dev
- name: Install Python build tools
run: |
python -m pip install --upgrade pip
pip install build wheel setuptools
- name: Build kt-kernel wheel (CPU-only, multi-variant)
working-directory: kt-kernel
env:
CPUINFER_BUILD_ALL_VARIANTS: '1'
CPUINFER_USE_CUDA: '0'
CPUINFER_BUILD_TYPE: 'Release'
CPUINFER_PARALLEL: '4'
CPUINFER_FORCE_REBUILD: '1'
run: |
echo "Building kt-kernel CPU-only with all CPU variants (AMX, AVX512, AVX2)"
python -m build --wheel --no-isolation -v
- name: List generated wheels
working-directory: kt-kernel
run: |
echo "Generated wheels:"
ls -lh dist/
- name: Test wheel import
working-directory: kt-kernel
run: |
pip install dist/*.whl
python -c "import kt_kernel; print('✓ Import successful'); print(f'CPU variant detected: {kt_kernel.__cpu_variant__}'); print(f'Version: {kt_kernel.__version__}')"
- name: Verify wheel contains all variants
working-directory: kt-kernel
run: |
echo "Checking wheel contents for CPU variants..."
python -m zipfile -l dist/*.whl | grep "_kt_kernel_ext_" || echo "ERROR: No variant .so files found!"
python -m zipfile -l dist/*.whl | grep "_kt_kernel_ext_amx.cpython" && echo "✓ AMX variant found" || echo "✗ AMX variant missing"
python -m zipfile -l dist/*.whl | grep "_kt_kernel_ext_avx512.cpython" && echo "✓ AVX512 variant found" || echo "✗ AVX512 variant missing"
python -m zipfile -l dist/*.whl | grep "_kt_kernel_ext_avx2.cpython" && echo "✓ AVX2 variant found" || echo "✗ AVX2 variant missing"
- name: Upload wheel artifact
uses: actions/upload-artifact@v3
with:
name: kt-kernel-wheels-py${{ matrix.python-version }}
path: kt-kernel/dist/*.whl
retention-days: 7
publish-pypi:
name: Publish to PyPI
needs: build-kt-kernel
runs-on: ubuntu-latest
if: github.repository == 'kvcache-ai/ktransformers' && github.ref == 'refs/heads/main'
environment: prod
permissions:
id-token: write # For trusted publishing (OIDC)
contents: read
steps:
- name: Download all wheel artifacts
uses: actions/download-artifact@v3
with:
path: artifacts/
- name: Organize wheels into dist/
run: |
mkdir -p dist/
find artifacts/ -name "*.whl" -exec cp {} dist/ \;
echo "Wheels to publish:"
ls -lh dist/
- name: Get version from wheel
id: get_version
run: |
# Extract version from first wheel filename
wheel_name=$(ls dist/*.whl | head -1 | xargs basename)
# Extract version (format: kt_kernel-X.Y.Z-...)
version=$(echo "$wheel_name" | sed 's/kt_kernel-\([0-9.]*\)-.*/\1/')
echo "VERSION=$version" >> $GITHUB_OUTPUT
echo "Publishing version: $version"
- name: Publish to TestPyPI (if requested)
if: github.event.inputs.test_pypi == 'true'
uses: pypa/gh-action-pypi-publish@release/v1
with:
repository-url: https://test.pypi.org/legacy/
password: ${{ secrets.TEST_PYPI_API_TOKEN }}
skip-existing: true
print-hash: true
- name: Publish to PyPI
if: github.event.inputs.test_pypi != 'true'
uses: pypa/gh-action-pypi-publish@release/v1
with:
password: ${{ secrets.PYPI_API_TOKEN }}
skip-existing: true
print-hash: true
- name: Create release summary
run: |
echo "## 🎉 kt-kernel v${{ steps.get_version.outputs.VERSION }} Published to PyPI" >> $GITHUB_STEP_SUMMARY
echo "" >> $GITHUB_STEP_SUMMARY
echo "### Installation" >> $GITHUB_STEP_SUMMARY
echo '```bash' >> $GITHUB_STEP_SUMMARY
echo "pip install kt-kernel==${{ steps.get_version.outputs.VERSION }}" >> $GITHUB_STEP_SUMMARY
echo '```' >> $GITHUB_STEP_SUMMARY
echo "" >> $GITHUB_STEP_SUMMARY
echo "### Published Wheels" >> $GITHUB_STEP_SUMMARY
echo "Total: $(ls -1 dist/*.whl | wc -l) wheels (3 Python versions: 3.10, 3.11, 3.12)" >> $GITHUB_STEP_SUMMARY
echo "" >> $GITHUB_STEP_SUMMARY
echo "### Features" >> $GITHUB_STEP_SUMMARY
echo "**CPU-only build with multi-variant support:**" >> $GITHUB_STEP_SUMMARY
echo "- ✅ AMX (Intel Sapphire Rapids+)" >> $GITHUB_STEP_SUMMARY
echo "- ✅ AVX512 (Intel Skylake-X/Ice Lake/Cascade Lake)" >> $GITHUB_STEP_SUMMARY
echo "- ✅ AVX2 (Maximum compatibility)" >> $GITHUB_STEP_SUMMARY
echo "" >> $GITHUB_STEP_SUMMARY
echo "**Runtime CPU detection:** Automatically selects the best variant for your CPU" >> $GITHUB_STEP_SUMMARY
echo "" >> $GITHUB_STEP_SUMMARY
echo "PyPI link: https://pypi.org/project/kt-kernel/#history" >> $GITHUB_STEP_SUMMARY

View File

@@ -1,24 +0,0 @@
name: Human Eval Score
run-name: Human Eval Score
on: workflow_dispatch
jobs:
Human-Eval-Score:
runs-on: self-hosted
steps:
- run: echo "🎉 The job was automatically triggered by a ${{ github.event_name }} event."
- run: echo "🔎 The name of your branch is ${{ github.ref }} and your repository is ${{ github.repository }}."
- name: Check out repository code
uses: actions/checkout@v4
- run: echo "💡 The ${{ github.repository }} repository has been cloned to the runner."
- name: Human Eval Run
run: |
set -e
source /home/qujing3/anaconda3/etc/profile.d/conda.sh
conda activate ktransformers-dev
export PATH=/usr/local/cuda-12.4/bin:$PATH
export LD_LIBRARY_PATH=/usr/local/cuda-12.4/lib64:$LD_LIBRARY_PATH
export CUDA_HOME=/usr/local/cuda-12.4
cd ${{ github.workspace }}
python ktransformers/tests/score.py
- run: echo "This job's status is ${{ job.status }}."

408
docker/Dockerfile Normal file
View File

@@ -0,0 +1,408 @@
ARG CUDA_VERSION=12.8.1
FROM docker.1ms.run/nvidia/cuda:${CUDA_VERSION}-cudnn-devel-ubuntu24.04 AS base
ARG TARGETARCH
ARG GRACE_BLACKWELL=0
ARG HOPPER_SBO=0
ARG CPU_VARIANT=x86-intel-multi
ARG BUILD_ALL_CPU_VARIANTS=1
# Proxy settings for build-time network access
ARG HTTP_PROXY
ARG HTTPS_PROXY
ARG http_proxy
ARG https_proxy
ENV HTTP_PROXY=${HTTP_PROXY} \
HTTPS_PROXY=${HTTPS_PROXY} \
http_proxy=${http_proxy} \
https_proxy=${https_proxy}
ARG GRACE_BLACKWELL_DEEPEP_BRANCH=gb200_blog_part_2
ARG HOPPER_SBO_DEEPEP_COMMIT=9f2fc4b3182a51044ae7ecb6610f7c9c3258c4d6
ARG DEEPEP_COMMIT=9af0e0d0e74f3577af1979c9b9e1ac2cad0104ee
ARG BUILD_AND_DOWNLOAD_PARALLEL=8
ARG SGL_KERNEL_VERSION=0.3.19
ARG SGL_VERSION=0.5.6.post1
ARG USE_LATEST_SGLANG=0
ARG GDRCOPY_VERSION=2.5.1
ARG UBUNTU_MIRROR
ARG GITHUB_ARTIFACTORY=github.com
ARG FLASHINFER_VERSION=0.5.3
# ktransformers wheel version (cu128torch28 for CUDA 12.8 + PyTorch 2.8)
ARG KTRANSFORMERS_VERSION=0.4.2
ARG KTRANSFORMERS_WHEEL=ktransformers-0.4.2+cu128torch28fancy-cp312-cp312-linux_x86_64.whl
# flash_attn wheel for fine-tune env
ARG FLASH_ATTN_WHEEL=flash_attn-2.8.3+cu12torch2.8cxx11abiTRUE-cp312-cp312-linux_x86_64.whl
ENV DEBIAN_FRONTEND=noninteractive \
CUDA_HOME=/usr/local/cuda \
GDRCOPY_HOME=/usr/src/gdrdrv-${GDRCOPY_VERSION}/ \
FLASHINFER_VERSION=${FLASHINFER_VERSION}
# Add GKE default lib and bin locations
ENV PATH="${PATH}:/usr/local/nvidia/bin" \
LD_LIBRARY_PATH="${LD_LIBRARY_PATH}:/usr/local/nvidia/lib:/usr/local/nvidia/lib64"
# Replace Ubuntu sources with Tsinghua mirror for Ubuntu 24.04 (noble)
RUN if [ -n "$UBUNTU_MIRROR" ]; then \
echo "deb https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ noble main restricted universe multiverse" > /etc/apt/sources.list && \
echo "deb https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ noble-updates main restricted universe multiverse" >> /etc/apt/sources.list && \
echo "deb https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ noble-backports main restricted universe multiverse" >> /etc/apt/sources.list && \
echo "deb http://security.ubuntu.com/ubuntu/ noble-security main restricted universe multiverse" >> /etc/apt/sources.list && \
rm -f /etc/apt/sources.list.d/ubuntu.sources; \
fi
# Install system dependencies (organized by category for better caching)
RUN --mount=type=cache,target=/var/cache/apt,id=base-apt \
echo 'tzdata tzdata/Areas select America' | debconf-set-selections \
&& echo 'tzdata tzdata/Zones/America select Los_Angeles' | debconf-set-selections \
&& apt-get update && apt-get install -y --no-install-recommends --allow-change-held-packages \
# Core system utilities
tzdata \
ca-certificates \
software-properties-common \
netcat-openbsd \
kmod \
unzip \
openssh-server \
curl \
wget \
lsof \
locales \
# Build essentials
build-essential \
cmake \
perl \
patchelf \
ccache \
git \
git-lfs \
# MPI and NUMA
libopenmpi-dev \
libnuma1 \
libnuma-dev \
numactl \
# transformers multimodal VLM
ffmpeg \
# InfiniBand/RDMA
libibverbs-dev \
libibverbs1 \
libibumad3 \
librdmacm1 \
libnl-3-200 \
libnl-route-3-200 \
libnl-route-3-dev \
libnl-3-dev \
ibverbs-providers \
infiniband-diags \
perftest \
# Development libraries
libgoogle-glog-dev \
libgtest-dev \
libjsoncpp-dev \
libunwind-dev \
libboost-all-dev \
libssl-dev \
libgrpc-dev \
libgrpc++-dev \
libprotobuf-dev \
protobuf-compiler \
protobuf-compiler-grpc \
pybind11-dev \
libhiredis-dev \
libcurl4-openssl-dev \
libczmq4 \
libczmq-dev \
libfabric-dev \
# Package building tools
devscripts \
debhelper \
fakeroot \
dkms \
check \
libsubunit0 \
libsubunit-dev \
# Development tools
gdb \
ninja-build \
vim \
tmux \
htop \
zsh \
tree \
less \
rdma-core \
# NCCL
libnccl2 \
libnccl-dev \
&& rm -rf /var/lib/apt/lists/* \
&& apt-get clean
# GDRCopy installation
RUN mkdir -p /tmp/gdrcopy && cd /tmp \
&& curl --retry 3 --retry-delay 2 -fsSL -o v${GDRCOPY_VERSION}.tar.gz \
https://${GITHUB_ARTIFACTORY}/NVIDIA/gdrcopy/archive/refs/tags/v${GDRCOPY_VERSION}.tar.gz \
&& tar -xzf v${GDRCOPY_VERSION}.tar.gz && rm v${GDRCOPY_VERSION}.tar.gz \
&& cd gdrcopy-${GDRCOPY_VERSION}/packages \
&& CUDA=/usr/local/cuda ./build-deb-packages.sh \
&& dpkg -i gdrdrv-dkms_*.deb libgdrapi_*.deb gdrcopy-tests_*.deb gdrcopy_*.deb \
&& cd / && rm -rf /tmp/gdrcopy
# Fix DeepEP IBGDA symlink
RUN ln -sf /usr/lib/$(uname -m)-linux-gnu/libmlx5.so.1 /usr/lib/$(uname -m)-linux-gnu/libmlx5.so
# Set up locale
RUN locale-gen en_US.UTF-8
ENV LANG=en_US.UTF-8 \
LANGUAGE=en_US:en \
LC_ALL=en_US.UTF-8
########################################################
########## Install Miniconda ###########################
########################################################
RUN mkdir -p /opt/miniconda3 \
&& wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O /opt/miniconda3/miniconda.sh \
&& bash /opt/miniconda3/miniconda.sh -b -u -p /opt/miniconda3 \
&& rm /opt/miniconda3/miniconda.sh
# Add conda to PATH
ENV PATH="/opt/miniconda3/bin:${PATH}"
# Accept conda TOS
RUN conda tos accept --override-channels --channel https://repo.anaconda.com/pkgs/main \
&& conda tos accept --override-channels --channel https://repo.anaconda.com/pkgs/r
# Configure conda to use Tsinghua mirror
RUN conda config --add channels https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/main \
&& conda config --add channels https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free \
&& conda config --set show_channel_urls yes
########################################################
########## Dual Conda Environment Setup ################
########################################################
FROM base AS framework
ARG CUDA_VERSION
ARG BUILD_AND_DOWNLOAD_PARALLEL
ARG SGL_KERNEL_VERSION
ARG SGL_VERSION
ARG USE_LATEST_SGLANG
ARG FLASHINFER_VERSION
ARG GRACE_BLACKWELL
ARG GRACE_BLACKWELL_DEEPEP_BRANCH
ARG HOPPER_SBO
ARG HOPPER_SBO_DEEPEP_COMMIT
ARG DEEPEP_COMMIT
ARG GITHUB_ARTIFACTORY
ARG KTRANSFORMERS_VERSION
ARG KTRANSFORMERS_WHEEL
ARG FLASH_ATTN_WHEEL
WORKDIR /workspace
# Create two conda environments with Python 3.12
RUN conda create -n serve python=3.12 -y \
&& conda create -n fine-tune python=3.12 -y
# Set pip mirror for both conda envs
RUN /opt/miniconda3/envs/serve/bin/pip config set global.index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple \
&& /opt/miniconda3/envs/fine-tune/bin/pip config set global.index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
# Clone repositories
# Use kvcache-ai/sglang fork with kimi_k2 branch
RUN git clone https://${GITHUB_ARTIFACTORY}/kvcache-ai/sglang.git /workspace/sglang \
&& cd /workspace/sglang && git checkout kimi_k2
RUN git clone --depth 1 https://${GITHUB_ARTIFACTORY}/hiyouga/LLaMA-Factory.git /workspace/LLaMA-Factory \
&& git clone --depth 1 https://${GITHUB_ARTIFACTORY}/kvcache-ai/ktransformers.git /workspace/ktransformers \
&& cd /workspace/ktransformers && git submodule update --init --recursive
# Download ktransformers wheel and flash_attn wheel for fine-tune env
RUN curl --retry 3 --retry-delay 2 -fsSL -o /workspace/${KTRANSFORMERS_WHEEL} \
https://${GITHUB_ARTIFACTORY}/kvcache-ai/ktransformers/releases/download/v${KTRANSFORMERS_VERSION}/${KTRANSFORMERS_WHEEL} \
&& curl --retry 3 --retry-delay 2 -fsSL -o /workspace/${FLASH_ATTN_WHEEL} \
https://${GITHUB_ARTIFACTORY}/Dao-AILab/flash-attention/releases/download/v2.8.3/${FLASH_ATTN_WHEEL}
########################################################
# Environment 1: serve (sglang + kt-kernel)
########################################################
# Upgrade pip and install basic tools in serve env
RUN --mount=type=cache,target=/root/.cache/pip \
/opt/miniconda3/envs/serve/bin/pip install --upgrade pip setuptools wheel html5lib six
# Install sgl-kernel
RUN --mount=type=cache,target=/root/.cache/pip \
case "$CUDA_VERSION" in \
12.6.1) CUINDEX=126 ;; \
12.8.1) CUINDEX=128 ;; \
12.9.1) CUINDEX=129 ;; \
13.0.1) CUINDEX=130 ;; \
*) echo "Unsupported CUDA version: $CUDA_VERSION" && exit 1 ;; \
esac \
&& if [ "$CUDA_VERSION" = "12.6.1" ]; then \
/opt/miniconda3/envs/serve/bin/pip install https://${GITHUB_ARTIFACTORY}/sgl-project/whl/releases/download/v${SGL_KERNEL_VERSION}/sgl_kernel-${SGL_KERNEL_VERSION}+cu124-cp310-abi3-manylinux2014_$(uname -m).whl --force-reinstall --no-deps \
; \
elif [ "$CUDA_VERSION" = "12.8.1" ] || [ "$CUDA_VERSION" = "12.9.1" ]; then \
/opt/miniconda3/envs/serve/bin/pip install sgl-kernel==${SGL_KERNEL_VERSION} \
; \
elif [ "$CUDA_VERSION" = "13.0.1" ]; then \
/opt/miniconda3/envs/serve/bin/pip install https://github.com/sgl-project/whl/releases/download/v${SGL_KERNEL_VERSION}/sgl_kernel-${SGL_KERNEL_VERSION}+cu130-cp310-abi3-manylinux2014_$(uname -m).whl --force-reinstall --no-deps \
; \
fi
# Install SGLang in serve env
RUN --mount=type=cache,target=/root/.cache/pip \
case "$CUDA_VERSION" in \
12.6.1) CUINDEX=126 ;; \
12.8.1) CUINDEX=128 ;; \
12.9.1) CUINDEX=129 ;; \
13.0.1) CUINDEX=130 ;; \
esac \
&& cd /workspace/sglang \
&& /opt/miniconda3/envs/serve/bin/pip install -e "python[all]" --extra-index-url https://download.pytorch.org/whl/cu${CUINDEX}
# Download FlashInfer cubin for serve env
RUN --mount=type=cache,target=/root/.cache/pip \
FLASHINFER_CUBIN_DOWNLOAD_THREADS=${BUILD_AND_DOWNLOAD_PARALLEL} FLASHINFER_LOGGING_LEVEL=warning \
/opt/miniconda3/envs/serve/bin/python -m flashinfer --download-cubin
# Install DeepEP in serve env
RUN set -eux; \
if [ "$GRACE_BLACKWELL" = "1" ]; then \
git clone https://github.com/fzyzcjy/DeepEP.git /workspace/DeepEP && \
cd /workspace/DeepEP && \
git checkout ${GRACE_BLACKWELL_DEEPEP_BRANCH} && \
sed -i 's/#define NUM_CPU_TIMEOUT_SECS 100/#define NUM_CPU_TIMEOUT_SECS 1000/' csrc/kernels/configs.cuh; \
elif [ "$HOPPER_SBO" = "1" ]; then \
git clone https://github.com/deepseek-ai/DeepEP.git -b antgroup-opt /workspace/DeepEP && \
cd /workspace/DeepEP && \
git checkout ${HOPPER_SBO_DEEPEP_COMMIT} && \
sed -i 's/#define NUM_CPU_TIMEOUT_SECS 100/#define NUM_CPU_TIMEOUT_SECS 1000/' csrc/kernels/configs.cuh; \
else \
curl --retry 3 --retry-delay 2 -fsSL -o /tmp/${DEEPEP_COMMIT}.zip \
https://${GITHUB_ARTIFACTORY}/deepseek-ai/DeepEP/archive/${DEEPEP_COMMIT}.zip && \
unzip -q /tmp/${DEEPEP_COMMIT}.zip -d /tmp && rm /tmp/${DEEPEP_COMMIT}.zip && \
mv /tmp/DeepEP-${DEEPEP_COMMIT} /workspace/DeepEP && \
cd /workspace/DeepEP && \
sed -i 's/#define NUM_CPU_TIMEOUT_SECS 100/#define NUM_CPU_TIMEOUT_SECS 1000/' csrc/kernels/configs.cuh; \
fi
RUN --mount=type=cache,target=/root/.cache/pip \
cd /workspace/DeepEP && \
case "$CUDA_VERSION" in \
12.6.1) CHOSEN_TORCH_CUDA_ARCH_LIST='9.0' ;; \
12.8.1) CHOSEN_TORCH_CUDA_ARCH_LIST='9.0;10.0' ;; \
12.9.1|13.0.1) CHOSEN_TORCH_CUDA_ARCH_LIST='9.0;10.0;10.3' ;; \
*) echo "Unsupported CUDA version: $CUDA_VERSION" && exit 1 ;; \
esac && \
. /opt/miniconda3/etc/profile.d/conda.sh && conda activate serve && \
TORCH_CUDA_ARCH_LIST="${CHOSEN_TORCH_CUDA_ARCH_LIST}" MAX_JOBS=${BUILD_AND_DOWNLOAD_PARALLEL} \
pip install --no-build-isolation .
# Install NCCL for serve env
RUN --mount=type=cache,target=/root/.cache/pip \
if [ "${CUDA_VERSION%%.*}" = "12" ]; then \
/opt/miniconda3/envs/serve/bin/pip install nvidia-nccl-cu12==2.28.3 --force-reinstall --no-deps ; \
elif [ "${CUDA_VERSION%%.*}" = "13" ]; then \
/opt/miniconda3/envs/serve/bin/pip install nvidia-nccl-cu13==2.28.3 --force-reinstall --no-deps ; \
fi
# Install kt-kernel in serve env with all CPU variants
RUN . /opt/miniconda3/etc/profile.d/conda.sh && conda activate serve \
&& cd /workspace/ktransformers/kt-kernel \
&& CPUINFER_BUILD_ALL_VARIANTS=1 ./install.sh build
########################################################
# Environment 2: fine-tune (LLaMA-Factory + ktransformers)
########################################################
# Install dependency libraries for ktransformers (CUDA 11.8 runtime required)
RUN conda install -n fine-tune -y -c conda-forge libstdcxx-ng gcc_impl_linux-64 \
&& conda install -n fine-tune -y -c nvidia/label/cuda-11.8.0 cuda-runtime
# Install PyTorch 2.8 in fine-tune env
RUN --mount=type=cache,target=/root/.cache/pip \
case "$CUDA_VERSION" in \
12.6.1) CUINDEX=126 ;; \
12.8.1) CUINDEX=128 ;; \
12.9.1) CUINDEX=129 ;; \
13.0.1) CUINDEX=130 ;; \
esac \
&& /opt/miniconda3/envs/fine-tune/bin/pip install --upgrade pip setuptools wheel \
&& /opt/miniconda3/envs/fine-tune/bin/pip install \
torch==2.8.0 \
torchvision \
torchaudio \
--extra-index-url https://download.pytorch.org/whl/cu${CUINDEX}
# Install LLaMA-Factory in fine-tune env
RUN --mount=type=cache,target=/root/.cache/pip \
cd /workspace/LLaMA-Factory \
&& /opt/miniconda3/envs/fine-tune/bin/pip install -e ".[torch,metrics]" --no-build-isolation
# Install ktransformers wheel in fine-tune env
RUN --mount=type=cache,target=/root/.cache/pip \
/opt/miniconda3/envs/fine-tune/bin/pip install /workspace/${KTRANSFORMERS_WHEEL}
# Install flash_attn wheel in fine-tune env
RUN --mount=type=cache,target=/root/.cache/pip \
/opt/miniconda3/envs/fine-tune/bin/pip install /workspace/${FLASH_ATTN_WHEEL}
# Install NCCL for fine-tune env
RUN --mount=type=cache,target=/root/.cache/pip \
if [ "${CUDA_VERSION%%.*}" = "12" ]; then \
/opt/miniconda3/envs/fine-tune/bin/pip install nvidia-nccl-cu12==2.28.3 --force-reinstall --no-deps ; \
elif [ "${CUDA_VERSION%%.*}" = "13" ]; then \
/opt/miniconda3/envs/fine-tune/bin/pip install nvidia-nccl-cu13==2.28.3 --force-reinstall --no-deps ; \
fi
########################################################
# Cleanup and final setup
########################################################
# Clean up downloaded wheels
RUN rm -f /workspace/${KTRANSFORMERS_WHEEL} /workspace/${FLASH_ATTN_WHEEL}
# Initialize conda for bash
RUN /opt/miniconda3/bin/conda init bash
# Create shell aliases for convenience
RUN echo '\n# Conda environment aliases\nalias serve="conda activate serve"\nalias finetune="conda activate fine-tune"' >> /root/.bashrc
########################################################
# Extract version information for image naming
########################################################
# Extract versions from each component and save to versions.env
RUN set -x && \
# SGLang version (from version.py file)
cd /workspace/sglang/python/sglang && \
SGLANG_VERSION=$(python3 -c "exec(open('version.py').read()); print(__version__)" 2>/dev/null || echo "unknown") && \
echo "SGLANG_VERSION=$SGLANG_VERSION" > /workspace/versions.env && \
echo "Extracted SGLang version: $SGLANG_VERSION" && \
\
# KTransformers version (from version.py in repo)
cd /workspace/ktransformers && \
KTRANSFORMERS_VERSION=$(python3 -c "exec(open('version.py').read()); print(__version__)" 2>/dev/null || echo "unknown") && \
echo "KTRANSFORMERS_VERSION=$KTRANSFORMERS_VERSION" >> /workspace/versions.env && \
echo "Extracted KTransformers version: $KTRANSFORMERS_VERSION" && \
\
# LLaMA-Factory version (from fine-tune environment)
. /opt/miniconda3/etc/profile.d/conda.sh && conda activate fine-tune && \
cd /workspace/LLaMA-Factory && \
LLAMAFACTORY_VERSION=$(python -c "import sys; sys.path.insert(0, 'src'); from llamafactory import __version__; print(__version__)" 2>/dev/null || echo "unknown") && \
echo "LLAMAFACTORY_VERSION=$LLAMAFACTORY_VERSION" >> /workspace/versions.env && \
echo "Extracted LLaMA-Factory version: $LLAMAFACTORY_VERSION" && \
\
# Display all versions
echo "=== Version Summary ===" && \
cat /workspace/versions.env
WORKDIR /workspace
CMD ["/bin/bash"]

387
docker/README-packaging.md Normal file
View File

@@ -0,0 +1,387 @@
# KTransformers Docker Packaging Guide
This directory contains scripts for building and distributing KTransformers Docker images with standardized naming conventions.
## Overview
The packaging system provides:
- **Automated version detection** from sglang, ktransformers, and LLaMA-Factory
- **Multi-CPU variant support** (AMX, AVX512, AVX2) with runtime auto-detection
- **Standardized naming convention** for easy identification and management
- **Two distribution methods**:
- Local tar file export for offline distribution
- DockerHub publishing for online distribution
## Naming Convention
Docker images follow this naming pattern:
```
sglang-v{sglang版本}_ktransformers-v{ktransformers版本}_{cpu信息}_{gpu信息}_{功能模式}_{时间戳}
```
### Example Names
**Tar file:**
```
sglang-v0.5.6_ktransformers-v0.4.3_x86-intel-multi_cu128_sft_llamafactory-v0.9.3_20241212143022.tar
```
**DockerHub tags:**
```
Full tag:
kvcache/ktransformers:sglang-v0.5.6_ktransformers-v0.4.3_x86-intel-multi_cu128_sft_llamafactory-v0.9.3_20241212143022
Simplified tag:
kvcache/ktransformers:v0.4.3-cu128
```
### Name Components
| Component | Description | Example |
|-----------|-------------|---------|
| sglang version | SGLang package version | `v0.5.6` |
| ktransformers version | KTransformers version | `v0.4.3` |
| cpu info | CPU instruction set support | `x86-intel-multi` (includes AMX/AVX512/AVX2) |
| gpu info | CUDA version | `cu128` (CUDA 12.8) |
| functionality | Feature mode | `sft_llamafactory-v0.9.3` or `infer` |
| timestamp | Build time (Beijing/UTC+8) | `20241212143022` |
## Files
| File | Purpose |
|------|---------|
| `Dockerfile` | Main Dockerfile with multi-CPU build and version extraction |
| `docker-utils.sh` | Shared utility functions for both scripts |
| `build-docker-tar.sh` | Build and export Docker image to tar file |
| `push-to-dockerhub.sh` | Build and push Docker image to DockerHub |
## Prerequisites
- Docker installed and running
- For DockerHub push: Docker Hub account and login (`docker login`)
- Sufficient disk space (at least 20GB recommended)
- Internet access (or local mirrors configured)
## Quick Start
### Build Local Tar File
```bash
cd docker
# Basic build
./build-docker-tar.sh
# With specific CUDA version and mirror
./build-docker-tar.sh \
--cuda-version 12.8.1 \
--ubuntu-mirror 1
# With proxy
./build-docker-tar.sh \
--cuda-version 12.8.1 \
--ubuntu-mirror 1 \
--http-proxy "http://127.0.0.1:16981" \
--https-proxy "http://127.0.0.1:16981" \
--output-dir /path/to/output
```
### Push to DockerHub
```bash
cd docker
# Basic push (requires --repository)
./push-to-dockerhub.sh \
--repository kvcache/ktransformers
# With simplified tag
./push-to-dockerhub.sh \
--cuda-version 12.8.1 \
--repository kvcache/ktransformers \
--also-push-simplified
# Skip build if image exists
./push-to-dockerhub.sh \
--repository kvcache/ktransformers \
--skip-build
```
## Script Options
### build-docker-tar.sh
```
Build Configuration:
--cuda-version VERSION CUDA version (default: 12.8.1)
--ubuntu-mirror 0|1 Use Tsinghua mirror (default: 0)
--http-proxy URL HTTP proxy URL
--https-proxy URL HTTPS proxy URL
--cpu-variant VARIANT CPU variant (default: x86-intel-multi)
--functionality TYPE Mode: sft or infer (default: sft)
Paths:
--dockerfile PATH Path to Dockerfile (default: ./Dockerfile)
--context-dir PATH Build context directory (default: .)
--output-dir PATH Output directory for tar (default: .)
Options:
--dry-run Preview without building
--keep-image Keep Docker image after export
--build-arg KEY=VALUE Additional build arguments
-h, --help Show help message
```
### push-to-dockerhub.sh
```
All options from build-docker-tar.sh, plus:
Registry Settings:
--registry REGISTRY Docker registry (default: docker.io)
--repository REPO Repository name (REQUIRED)
Options:
--skip-build Skip build if image exists
--also-push-simplified Also push simplified tag
--max-retries N Max push retries (default: 3)
--retry-delay SECONDS Delay between retries (default: 5)
```
## Usage Examples
### Example 1: Local Development Build
For testing on your local machine:
```bash
./build-docker-tar.sh \
--cuda-version 12.8.1 \
--output-dir ./builds \
--keep-image
```
This will:
1. Build the Docker image
2. Export to tar in `./builds/` directory
3. Keep the Docker image for local testing
### Example 2: Production Build for Distribution
For creating a production build with mirrors and proxy:
```bash
./build-docker-tar.sh \
--cuda-version 12.8.1 \
--ubuntu-mirror 1 \
--http-proxy "http://127.0.0.1:16981" \
--https-proxy "http://127.0.0.1:16981" \
--output-dir /mnt/data/releases
```
### Example 3: Publish to DockerHub
For publishing to DockerHub:
```bash
# First, login to Docker Hub
docker login
# Then push
./push-to-dockerhub.sh \
--cuda-version 12.8.1 \
--repository kvcache/ktransformers \
--also-push-simplified
```
This creates two tags:
- Full: `kvcache/ktransformers:sglang-v0.5.6_ktransformers-v0.4.3_x86-intel-multi_cu128_sft_llamafactory-v0.9.3_20241212143022`
- Simplified: `kvcache/ktransformers:v0.4.3-cu128`
### Example 4: Dry Run
Preview the build without actually building:
```bash
./build-docker-tar.sh --cuda-version 12.8.1 --dry-run
```
### Example 5: Custom Build Arguments
Pass additional Docker build arguments:
```bash
./build-docker-tar.sh \
--cuda-version 12.8.1 \
--build-arg SGL_VERSION=0.5.7 \
--build-arg FLASHINFER_VERSION=0.5.4
```
## Using the Built Images
### Load from Tar File
```bash
# Load the image
docker load -i sglang-v0.5.6_ktransformers-v0.4.3_x86-intel-multi_cu128_sft_llamafactory-v0.9.3_20241212143022.tar
# Run the container
docker run -it --rm \
--gpus all \
sglang-v0.5.6_ktransformers-v0.4.3_x86-intel-multi_cu128_sft_llamafactory-v0.9.3_20241212143022 \
/bin/bash
```
### Pull from DockerHub
```bash
# Pull with full tag
docker pull kvcache/ktransformers:sglang-v0.5.6_ktransformers-v0.4.3_x86-intel-multi_cu128_sft_llamafactory-v0.9.3_20241212143022
# Or pull with simplified tag
docker pull kvcache/ktransformers:v0.4.3-cu128
# Run the container
docker run -it --rm \
--gpus all \
kvcache/ktransformers:v0.4.3-cu128 \
/bin/bash
```
### Inside the Container
The image contains two conda environments:
```bash
# Activate serve environment (for inference with sglang)
conda activate serve
# or use the alias:
serve
# Activate fine-tune environment (for training with LLaMA-Factory)
conda activate fine-tune
# or use the alias:
finetune
```
## Multi-CPU Variant Support
The Docker image includes all three CPU variants:
- **AMX** - For Intel Sapphire Rapids and newer (4th Gen Xeon+)
- **AVX512** - For Intel Skylake-X, Ice Lake, Cascade Lake
- **AVX2** - Maximum compatibility for older CPUs
The runtime automatically detects your CPU and loads the appropriate variant. To override:
```bash
# Force use of AVX2 variant
export KT_KERNEL_CPU_VARIANT=avx2
python your_script.py
# Enable debug output to see which variant is loaded
export KT_KERNEL_DEBUG=1
python your_script.py
```
## Version Extraction
Versions are automatically extracted during Docker build from:
- **SGLang**: From `sglang.__version__` in serve environment
- **KTransformers**: From `version.py` in ktransformers repository
- **LLaMA-Factory**: From `llamafactory.__version__` in fine-tune environment
The versions are saved to `/workspace/versions.env` in the image:
```bash
# View versions in running container
cat /workspace/versions.env
# Output:
SGLANG_VERSION=0.5.6
KTRANSFORMERS_VERSION=0.4.3
LLAMAFACTORY_VERSION=0.9.3
```
## Troubleshooting
### Build Fails with Out of Disk Space
Check available disk space:
```bash
df -h
```
The build requires approximately 15-20GB of disk space. Clean up Docker:
```bash
docker system prune -a
```
### Version Extraction Fails
If version extraction fails (shows "unknown"), check:
1. The cloned repositories have the correct branches
2. Python packages are properly installed in conda environments
3. Version files exist in expected locations
You can manually verify by running:
```bash
docker run --rm <image> /bin/bash -c "
source /opt/miniconda3/etc/profile.d/conda.sh &&
conda activate serve &&
python -c 'import sglang; print(sglang.__version__)'
"
```
### Push to DockerHub Fails
1. **Check login**: `docker login`
2. **Check repository name**: Must include namespace (e.g., `kvcache/ktransformers`, not just `ktransformers`)
3. **Network issues**: Use `--max-retries` and `--retry-delay` options
4. **Rate limiting**: DockerHub has pull/push rate limits for free accounts
## Advanced Topics
### Custom Dockerfile Location
```bash
./build-docker-tar.sh \
--dockerfile /path/to/custom/Dockerfile \
--context-dir /path/to/build/context
```
### Building Only Inference Image (Future)
Currently, the image always includes both serve and fine-tune environments. To create an inference-only image, modify the Dockerfile to skip the fine-tune environment section.
### Customizing CPU Variants
To build only specific CPU variants, modify `kt-kernel/install.sh` or set environment variables in the Dockerfile.
### CI/CD Integration
The scripts are designed for manual execution but can be integrated into CI/CD pipelines:
```yaml
# Example GitHub Actions workflow
- name: Build and push Docker image
run: |
cd docker
./push-to-dockerhub.sh \
--cuda-version ${{ matrix.cuda_version }} \
--repository ${{ secrets.DOCKER_REPOSITORY }} \
--also-push-simplified
```
## Support
For issues and questions:
- File an issue at: https://github.com/kvcache-ai/ktransformers/issues
- Check documentation: https://github.com/kvcache-ai/ktransformers
## License
This packaging system is part of KTransformers and follows the same license.

498
docker/build-docker-tar.sh Executable file
View File

@@ -0,0 +1,498 @@
#!/usr/bin/env bash
#
# build-docker-tar.sh - Build Docker image and export to tar file
#
# This script builds a Docker image for ktransformers with standardized naming
# and exports it to a tar file for distribution.
#
# Features:
# - Automatic version detection from built image
# - Standardized naming convention
# - Multi-CPU variant support (AMX/AVX512/AVX2)
# - Configurable build parameters
# - Comprehensive error handling
#
# Usage:
# ./build-docker-tar.sh [OPTIONS]
#
# Example:
# ./build-docker-tar.sh \
# --cuda-version 12.8.1 \
# --ubuntu-mirror 1 \
# --http-proxy "http://127.0.0.1:16981" \
# --output-dir /path/to/output
set -euo pipefail
# Get script directory
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
# Source utility functions
# shellcheck source=docker-utils.sh
source "$SCRIPT_DIR/docker-utils.sh"
################################################################################
# Default Configuration
################################################################################
# Build parameters
CUDA_VERSION="12.8.1"
UBUNTU_MIRROR="0"
HTTP_PROXY=""
HTTPS_PROXY=""
CPU_VARIANT="x86-intel-multi"
FUNCTIONALITY="sft"
# Paths
DOCKERFILE="$SCRIPT_DIR/Dockerfile"
CONTEXT_DIR="$SCRIPT_DIR"
OUTPUT_DIR="."
# Options
DRY_RUN=false
KEEP_IMAGE=false
EXTRA_BUILD_ARGS=()
################################################################################
# Help Message
################################################################################
usage() {
cat <<EOF
Usage: $0 [OPTIONS]
Build Docker image and export to tar file with standardized naming.
OPTIONS:
Build Configuration:
--cuda-version VERSION CUDA version (default: 12.8.1)
Examples: 12.8.1, 12.6.1, 13.0.1
--ubuntu-mirror 0|1 Use Tsinghua mirror for Ubuntu packages
(default: 0)
--http-proxy URL HTTP proxy URL
Example: http://127.0.0.1:16981
--https-proxy URL HTTPS proxy URL
Example: http://127.0.0.1:16981
--cpu-variant VARIANT CPU variant identifier
(default: x86-intel-multi)
--functionality TYPE Functionality mode: sft or infer
(default: sft, includes LLaMA-Factory)
Paths:
--dockerfile PATH Path to Dockerfile
(default: ./Dockerfile)
--context-dir PATH Docker build context directory
(default: .)
--output-dir PATH Output directory for tar file
(default: current directory)
Options:
--dry-run Preview build command without executing
--keep-image Keep Docker image after exporting tar
--build-arg KEY=VALUE Additional build arguments (can be repeated)
-h, --help Show this help message
EXAMPLES:
# Basic build with default settings
$0
# Build with CUDA 12.8.1 and mirror
$0 --cuda-version 12.8.1 --ubuntu-mirror 1
# Build with proxy and custom output directory
$0 \\
--cuda-version 12.8.1 \\
--http-proxy "http://127.0.0.1:16981" \\
--https-proxy "http://127.0.0.1:16981" \\
--output-dir /mnt/data/docker-images
# Dry run to preview
$0 --cuda-version 12.8.1 --dry-run
OUTPUT:
The tar file will be named following the convention:
sglang-v{ver}_ktransformers-v{ver}_{cpu}_{gpu}_{func}_{timestamp}.tar
Example: sglang-v0.5.6_ktransformers-v0.4.3_x86-intel-multi_cu128_sft_llamafactory-v0.9.3_20241212143022.tar
EOF
exit 0
}
################################################################################
# Argument Parsing
################################################################################
parse_args() {
while [[ $# -gt 0 ]]; do
case "$1" in
--cuda-version)
CUDA_VERSION="$2"
shift 2
;;
--ubuntu-mirror)
UBUNTU_MIRROR="$2"
shift 2
;;
--http-proxy)
HTTP_PROXY="$2"
shift 2
;;
--https-proxy)
HTTPS_PROXY="$2"
shift 2
;;
--cpu-variant)
CPU_VARIANT="$2"
shift 2
;;
--functionality)
FUNCTIONALITY="$2"
shift 2
;;
--dockerfile)
DOCKERFILE="$2"
shift 2
;;
--context-dir)
CONTEXT_DIR="$2"
shift 2
;;
--output-dir)
OUTPUT_DIR="$2"
shift 2
;;
--dry-run)
DRY_RUN=true
shift
;;
--keep-image)
KEEP_IMAGE=true
shift
;;
--build-arg)
EXTRA_BUILD_ARGS+=("--build-arg" "$2")
shift 2
;;
-h|--help)
usage
;;
*)
log_error "Unknown option: $1"
echo "Use -h or --help for usage information"
exit 1
;;
esac
done
}
################################################################################
# Validation
################################################################################
validate_config() {
log_step "Validating configuration"
# Check Docker is running
check_docker_running || exit 1
# Validate CUDA version
validate_cuda_version "$CUDA_VERSION" || exit 1
# Check Dockerfile exists
if [ ! -f "$DOCKERFILE" ]; then
log_error "Dockerfile not found: $DOCKERFILE"
exit 1
fi
log_info "Using Dockerfile: $DOCKERFILE"
# Check context directory exists
if [ ! -d "$CONTEXT_DIR" ]; then
log_error "Context directory not found: $CONTEXT_DIR"
exit 1
fi
log_info "Using context directory: $CONTEXT_DIR"
# Create output directory if it doesn't exist
if [ ! -d "$OUTPUT_DIR" ]; then
log_info "Creating output directory: $OUTPUT_DIR"
mkdir -p "$OUTPUT_DIR"
fi
# Check output directory is writable
check_writable "$OUTPUT_DIR" || exit 1
log_info "Output directory: $OUTPUT_DIR"
# Check disk space (recommend at least 20GB free)
check_disk_space 20 "$OUTPUT_DIR" || {
log_warning "Continuing despite low disk space warning..."
}
# Validate functionality mode
if [[ "$FUNCTIONALITY" != "sft" && "$FUNCTIONALITY" != "infer" ]]; then
log_error "Invalid functionality mode: $FUNCTIONALITY"
log_error "Must be 'sft' or 'infer'"
exit 1
fi
log_success "Configuration validated"
}
################################################################################
# Build Docker Image
################################################################################
build_image() {
local temp_tag="ktransformers:temp-build-$(get_beijing_timestamp)"
log_step "Building Docker image" >&2
log_info "Temporary tag: $temp_tag" >&2
# Prepare build arguments
local build_args=()
build_args+=("--build-arg" "CUDA_VERSION=$CUDA_VERSION")
build_args+=("--build-arg" "UBUNTU_MIRROR=$UBUNTU_MIRROR")
build_args+=("--build-arg" "CPU_VARIANT=$CPU_VARIANT")
build_args+=("--build-arg" "BUILD_ALL_CPU_VARIANTS=1")
# Add proxy settings if provided
if [ -n "$HTTP_PROXY" ]; then
build_args+=("--build-arg" "HTTP_PROXY=$HTTP_PROXY")
fi
if [ -n "$HTTPS_PROXY" ]; then
build_args+=("--build-arg" "HTTPS_PROXY=$HTTPS_PROXY")
fi
# Add extra build args
build_args+=("${EXTRA_BUILD_ARGS[@]}")
# Add network host
build_args+=("--network" "host")
# Build command
local build_cmd=(
docker build
-f "$DOCKERFILE"
"${build_args[@]}"
-t "$temp_tag"
"$CONTEXT_DIR"
)
# Display build command
{
log_info "Build command:"
printf ' %s \\\n' "${build_cmd[@]:0:${#build_cmd[@]}-1}"
printf ' %s\n' "${build_cmd[-1]}"
} >&2
if [ "$DRY_RUN" = true ]; then
log_warning "DRY RUN: Skipping actual build" >&2
echo "$temp_tag"
return 0
fi
# Execute build
log_info "Starting Docker build (this may take 30-60 minutes)..." >&2
if "${build_cmd[@]}" >&2; then
log_success "Docker image built successfully" >&2
echo "$temp_tag"
else
log_error "Docker build failed" >&2
exit 1
fi
}
################################################################################
# Extract Versions and Generate Name
################################################################################
generate_tar_name() {
local image_tag="$1"
local timestamp="$2"
if [ "$DRY_RUN" = true ]; then
log_warning "DRY RUN: Using placeholder versions"
# Use placeholder versions for dry run
local versions="SGLANG_VERSION=0.5.6
KTRANSFORMERS_VERSION=0.4.3
LLAMAFACTORY_VERSION=0.9.3"
else
# Extract versions from image
local versions
versions=$(extract_versions_from_image "$image_tag")
if [ $? -ne 0 ]; then
log_error "Failed to extract versions from image"
exit 1
fi
# Validate versions
if ! validate_versions "$versions"; then
log_error "Version validation failed"
exit 1
fi
fi
# Generate standardized image name
local tar_name
tar_name=$(generate_image_name "$versions" "$CUDA_VERSION" "$CPU_VARIANT" "$FUNCTIONALITY" "$timestamp")
if [ -z "$tar_name" ]; then
log_error "Failed to generate image name"
exit 1
fi
echo "$tar_name"
}
################################################################################
# Export to Tar
################################################################################
export_to_tar() {
local image_tag="$1"
local tar_name="$2"
local tar_path="$OUTPUT_DIR/${tar_name}.tar"
log_step "Exporting image to tar file" >&2
log_info "Output: $tar_path" >&2
if [ "$DRY_RUN" = true ]; then
log_warning "DRY RUN: Skipping actual export" >&2
return 0
fi
# Check if tar file already exists
if [ -f "$tar_path" ]; then
log_warning "Tar file already exists: $tar_path" >&2
read -p "Overwrite? (y/N) " -n 1 -r
echo
if [[ ! $REPLY =~ ^[Yy]$ ]]; then
log_error "Export cancelled by user" >&2
exit 1
fi
rm -f "$tar_path"
fi
# Tag image with the standardized name before saving
log_info "Tagging image with standardized name: $tar_name" >&2
if ! docker tag "$image_tag" "$tar_name"; then
log_error "Failed to tag image" >&2
exit 1
fi
# Export image with the standardized tag
log_info "Exporting image (this may take several minutes)..." >&2
if docker save -o "$tar_path" "$tar_name"; then
log_success "Image exported successfully" >&2
# Get file size
local size
size=$(du -h "$tar_path" | cut -f1)
log_info "Tar file size: $size" >&2
else
log_error "Failed to export image" >&2
exit 1
fi
echo "$tar_path"
}
################################################################################
# Cleanup
################################################################################
cleanup() {
local image_tag="$1"
if [ "$KEEP_IMAGE" = true ]; then
log_info "Keeping Docker image as requested: $image_tag"
else
cleanup_temp_images "$image_tag"
fi
}
################################################################################
# Main
################################################################################
main() {
log_step "KTransformers Docker Image Build and Export"
# Parse arguments
parse_args "$@"
# Validate configuration
validate_config
# Generate timestamp
TIMESTAMP=$(get_beijing_timestamp)
log_info "Build timestamp: $TIMESTAMP"
# Display configuration
display_summary "Build Configuration" \
"CUDA Version: $CUDA_VERSION" \
"Ubuntu Mirror: $UBUNTU_MIRROR" \
"CPU Variant: $CPU_VARIANT" \
"Functionality: $FUNCTIONALITY" \
"HTTP Proxy: ${HTTP_PROXY:-<not set>}" \
"HTTPS Proxy: ${HTTPS_PROXY:-<not set>}" \
"Dockerfile: $DOCKERFILE" \
"Context Dir: $CONTEXT_DIR" \
"Output Dir: $OUTPUT_DIR" \
"Timestamp: $TIMESTAMP" \
"Dry Run: $DRY_RUN"
# Build image
TEMP_TAG=$(build_image)
# Generate tar name
TAR_NAME=$(generate_tar_name "$TEMP_TAG" "$TIMESTAMP")
log_info "Generated tar name: $TAR_NAME.tar"
if [ "$DRY_RUN" = true ]; then
# Display dry-run summary
display_summary "DRY RUN Preview" \
"This is what would be built:" \
"" \
"Temporary Docker tag: $TEMP_TAG" \
"Tar filename: $TAR_NAME.tar" \
"Output path: $OUTPUT_DIR/$TAR_NAME.tar" \
"" \
"After build, you would run:" \
" docker load -i $OUTPUT_DIR/$TAR_NAME.tar" \
" docker run -it --rm ${TAR_NAME} /bin/bash"
log_success "DRY RUN: Preview complete. Remove --dry-run to build."
exit 0
fi
# Export to tar
TAR_PATH=$(export_to_tar "$TEMP_TAG" "$TAR_NAME")
# Cleanup
cleanup "$TEMP_TAG"
# Display summary
display_summary "Build Complete" \
"Docker Image: $TEMP_TAG ($([ "$KEEP_IMAGE" = true ] && echo "kept" || echo "removed"))" \
"Tar File: $TAR_PATH" \
"" \
"To load the image:" \
" docker load -i $TAR_PATH" \
"" \
"To run the container:" \
" docker run -it --rm ${TAR_NAME} /bin/bash"
log_success "All done!"
}
# Run main function
main "$@"

372
docker/docker-utils.sh Executable file
View File

@@ -0,0 +1,372 @@
#!/usr/bin/env bash
#
# docker-utils.sh - Shared utility functions for Docker image build and publish scripts
#
# This script provides common functions for:
# - Timestamp generation (Beijing timezone)
# - Version extraction from Docker images
# - Image name generation following naming conventions
# - Colored logging
# - Validation and error handling
#
# Usage: source docker-utils.sh
set -euo pipefail
# Color codes for logging
COLOR_RED='\033[0;31m'
COLOR_GREEN='\033[0;32m'
COLOR_YELLOW='\033[1;33m'
COLOR_BLUE='\033[0;34m'
COLOR_CYAN='\033[0;36m'
COLOR_RESET='\033[0m'
################################################################################
# Logging Functions
################################################################################
log_info() {
echo -e "${COLOR_BLUE}[INFO]${COLOR_RESET} $*"
}
log_success() {
echo -e "${COLOR_GREEN}[SUCCESS]${COLOR_RESET} $*"
}
log_warning() {
echo -e "${COLOR_YELLOW}[WARNING]${COLOR_RESET} $*"
}
log_error() {
echo -e "${COLOR_RED}[ERROR]${COLOR_RESET} $*" >&2
}
log_step() {
echo -e "\n${COLOR_CYAN}==>${COLOR_RESET} $*"
}
################################################################################
# Timestamp Functions
################################################################################
# Generate timestamp in Beijing timezone (UTC+8)
# Format: YYYYMMDDHHMMSS
# Example: 20241212143022
get_beijing_timestamp() {
# Try to use TZ environment variable approach
if date --version &>/dev/null 2>&1; then
# GNU date (Linux)
TZ='Asia/Shanghai' date '+%Y%m%d%H%M%S'
else
# BSD date (macOS)
TZ='Asia/Shanghai' date '+%Y%m%d%H%M%S'
fi
}
################################################################################
# CUDA Version Parsing
################################################################################
# Parse CUDA version to short format
# Input: 12.8.1 or 12.8 or 13.0.1
# Output: cu128 or cu130
parse_cuda_short_version() {
local cuda_version="$1"
# Extract major and minor version
local major minor
major=$(echo "$cuda_version" | cut -d. -f1)
minor=$(echo "$cuda_version" | cut -d. -f2)
# Validate
if [[ ! "$major" =~ ^[0-9]+$ ]] || [[ ! "$minor" =~ ^[0-9]+$ ]]; then
log_error "Invalid CUDA version format: $cuda_version"
log_error "Expected format: X.Y.Z (e.g., 12.8.1)"
return 1
fi
echo "cu${major}${minor}"
}
################################################################################
# Version Extraction
################################################################################
# Extract versions from built Docker image
# Input: image tag (e.g., ktransformers:temp-build-20241212)
# Output: Sets environment variables or prints to stdout
# SGLANG_VERSION=x.y.z
# KTRANSFORMERS_VERSION=x.y.z
# LLAMAFACTORY_VERSION=x.y.z
extract_versions_from_image() {
local image_tag="$1"
log_step "Extracting versions from image: $image_tag"
# Check if image exists
if ! docker image inspect "$image_tag" &>/dev/null; then
log_error "Image not found: $image_tag"
return 1
fi
# Extract versions.env file from the image
local versions_content
versions_content=$(docker run --rm "$image_tag" cat /workspace/versions.env 2>/dev/null)
if [ -z "$versions_content" ]; then
log_error "Failed to extract versions from image"
log_error "The /workspace/versions.env file may not exist in the image"
return 1
fi
# Parse and display versions
log_info "Extracted versions:"
echo "$versions_content" | while IFS= read -r line; do
log_info " $line"
done
# Output the content (caller can parse this or eval it)
echo "$versions_content"
}
# Validate that all required versions were extracted
# Input: versions string (output from extract_versions_from_image)
validate_versions() {
local versions="$1"
local all_valid=true
# Check each required version
for var in SGLANG_VERSION KTRANSFORMERS_VERSION LLAMAFACTORY_VERSION; do
local value
value=$(echo "$versions" | grep "^${var}=" | cut -d= -f2)
if [ -z "$value" ]; then
log_error "Missing version: $var"
all_valid=false
elif [ "$value" = "unknown" ]; then
log_warning "Version is 'unknown': $var"
# Don't fail, but warn user
fi
done
if [ "$all_valid" = false ]; then
return 1
fi
return 0
}
################################################################################
# Image Naming
################################################################################
# Generate standardized image name
# Input:
# $1: versions string (from extract_versions_from_image)
# $2: cuda_version (e.g., 12.8.1)
# $3: cpu_variant (e.g., x86-intel-multi)
# $4: functionality (e.g., sft_llamafactory or infer)
# $5: timestamp (optional, will generate if not provided)
# Output: Standardized image name
# Format: sglang-v{ver}_ktransformers-v{ver}_{cpu}_{gpu}_{func}_{timestamp}
generate_image_name() {
local versions="$1"
local cuda_version="$2"
local cpu_variant="$3"
local functionality="$4"
local timestamp="${5:-$(get_beijing_timestamp)}"
# Parse versions from the versions string
local sglang_ver ktrans_ver llama_ver
sglang_ver=$(echo "$versions" | grep "^SGLANG_VERSION=" | cut -d= -f2)
ktrans_ver=$(echo "$versions" | grep "^KTRANSFORMERS_VERSION=" | cut -d= -f2)
llama_ver=$(echo "$versions" | grep "^LLAMAFACTORY_VERSION=" | cut -d= -f2)
# Validate versions were extracted
if [ -z "$sglang_ver" ] || [ -z "$ktrans_ver" ] || [ -z "$llama_ver" ]; then
log_error "Failed to parse versions from input"
return 1
fi
# Parse CUDA short version
local cuda_short
cuda_short=$(parse_cuda_short_version "$cuda_version")
# Build functionality string
local func_str
if [ "$functionality" = "sft" ]; then
func_str="sft_llamafactory-v${llama_ver}"
else
func_str="infer"
fi
# Generate full image name
# Format: sglang-v{ver}_ktransformers-v{ver}_{cpu}_{gpu}_{func}_{timestamp}
local image_name
image_name="sglang-v${sglang_ver}_ktransformers-v${ktrans_ver}_${cpu_variant}_${cuda_short}_${func_str}_${timestamp}"
echo "$image_name"
}
# Generate simplified tag for DockerHub
# Input:
# $1: ktransformers_version (e.g., 0.4.3)
# $2: cuda_version (e.g., 12.8.1)
# Output: Simplified tag (e.g., v0.4.3-cu128)
generate_simplified_tag() {
local ktrans_ver="$1"
local cuda_version="$2"
local cuda_short
cuda_short=$(parse_cuda_short_version "$cuda_version")
echo "v${ktrans_ver}-${cuda_short}"
}
################################################################################
# Validation Functions
################################################################################
# Check if Docker daemon is running
check_docker_running() {
if ! docker info &>/dev/null; then
log_error "Docker daemon is not running"
log_error "Please start Docker and try again"
return 1
fi
return 0
}
# Check if user is logged into Docker registry
# Input: registry (optional, default: docker.io)
check_docker_login() {
local registry="${1:-docker.io}"
# Try to check auth by attempting a trivial operation
if ! docker login --help &>/dev/null; then
log_error "Docker CLI is not available"
return 1
fi
# Note: This is a best-effort check
# docker login status is not always easy to check programmatically
log_info "Assuming Docker login is configured"
log_info "If push fails, please run: docker login $registry"
return 0
}
# Validate CUDA version format
validate_cuda_version() {
local cuda_version="$1"
if [[ ! "$cuda_version" =~ ^[0-9]+\.[0-9]+(\.[0-9]+)?$ ]]; then
log_error "Invalid CUDA version format: $cuda_version"
log_error "Expected format: X.Y or X.Y.Z (e.g., 12.8 or 12.8.1)"
return 1
fi
return 0
}
# Check available disk space
# Input: required space in GB
check_disk_space() {
local required_gb="$1"
local output_dir="${2:-.}"
# Get available space in GB (works on Linux and macOS)
local available_kb
if df -k "$output_dir" &>/dev/null; then
available_kb=$(df -k "$output_dir" | tail -1 | awk '{print $4}')
local available_gb=$((available_kb / 1024 / 1024))
log_info "Available disk space: ${available_gb}GB"
if [ "$available_gb" -lt "$required_gb" ]; then
log_warning "Low disk space: ${available_gb}GB available, ${required_gb}GB recommended"
return 1
fi
else
log_warning "Unable to check disk space"
fi
return 0
}
# Check if file/directory exists and is writable
check_writable() {
local path="$1"
if [ -e "$path" ]; then
if [ ! -w "$path" ]; then
log_error "Path exists but is not writable: $path"
return 1
fi
else
# Try to create parent directory to test writability
local parent_dir
parent_dir=$(dirname "$path")
if [ ! -w "$parent_dir" ]; then
log_error "Parent directory is not writable: $parent_dir"
return 1
fi
fi
return 0
}
################################################################################
# Cleanup Functions
################################################################################
# Remove intermediate Docker images
cleanup_temp_images() {
local image_tag="$1"
log_step "Cleaning up temporary image: $image_tag"
if docker image inspect "$image_tag" &>/dev/null; then
docker rmi "$image_tag" &>/dev/null || true
log_success "Cleaned up temporary image"
fi
}
################################################################################
# Display Functions
################################################################################
# Display a summary box
display_summary() {
local title="$1"
shift
local lines=("$@")
local width=80
local border=$(printf '=%.0s' $(seq 1 $width))
echo ""
echo "$border"
echo " $title"
echo "$border"
for line in "${lines[@]}"; do
echo " $line"
done
echo "$border"
echo ""
}
################################################################################
# Export functions
################################################################################
# Export all functions so they can be used by scripts that source this file
export -f log_info log_success log_warning log_error log_step
export -f get_beijing_timestamp
export -f parse_cuda_short_version
export -f extract_versions_from_image validate_versions
export -f generate_image_name generate_simplified_tag
export -f check_docker_running check_docker_login validate_cuda_version
export -f check_disk_space check_writable
export -f cleanup_temp_images
export -f display_summary

1142
docker/push-to-dockerhub.sh Executable file

File diff suppressed because it is too large Load Diff

View File

@@ -28,7 +28,7 @@ option(KTRANSFORMERS_CPU_MOE_AMD "ktransformers: CPU use moe kernel for amd" OFF
# LTO control
option(CPUINFER_ENABLE_LTO "Enable link time optimization (IPO)" OFF)
project(kt_kernel_ext VERSION 0.1.0)
project(kt_kernel_ext VERSION 0.4.2)
# Choose compilers BEFORE project() so CMake honors them
if(USE_CONDA_TOOLCHAIN)
if(NOT DEFINED ENV{CONDA_PREFIX} OR NOT EXISTS "$ENV{CONDA_PREFIX}")

37
kt-kernel/MANIFEST.in Normal file
View File

@@ -0,0 +1,37 @@
# MANIFEST.in for kt-kernel
# Ensures source distribution includes all necessary files for building from source
# Core build files
include CMakeLists.txt
include CMakePresets.json
include setup.py
include pyproject.toml
include requirements.txt
include README.md
include LICENSE
# CMake modules and configuration
recursive-include cmake *.cmake *.in
# C++ source files
recursive-include cpu_backend *.h *.hpp *.cpp *.c *.cc
recursive-include operators *.h *.hpp *.cpp *.c *.cc
include ext_bindings.cpp
# Python package
recursive-include python *.py
# Third-party dependencies (vendored)
recursive-include third_party *
# Exclude compiled and cache files
global-exclude *.pyc
global-exclude *.pyo
global-exclude __pycache__
global-exclude .git*
global-exclude *.so
global-exclude *.o
global-exclude *.a
global-exclude build
global-exclude dist
global-exclude *.egg-info

View File

@@ -47,14 +47,75 @@ High-performance kernel operations for KTransformers, featuring CPU-optimized Mo
## Installation
### Prerequisites
### Option 1: Install from PyPI (Recommended for Most Users)
Choose the version matching your CUDA installation:
```bash
# For CUDA 11.8
pip install kt-kernel==0.4.2.cu118
# For CUDA 12.1
pip install kt-kernel==0.4.2.cu121
# For CUDA 12.4
pip install kt-kernel==0.4.2.cu124
# For CUDA 12.6
pip install kt-kernel==0.4.2.cu126
```
> **Note**: Replace `0.4.2` with the [latest version](https://pypi.org/project/kt-kernel/#history) if available.
**Features:**
-**Automatic CPU detection**: Detects your CPU and loads the optimal kernel variant
-**Multi-variant wheel**: Includes AMX, AVX512, and AVX2 variants in a single package
-**No compilation needed**: Pre-built wheels for Python 3.10, 3.11, 3.12
-**Multiple CUDA versions**: Choose the version matching your environment
**Requirements:**
- CUDA 11.8+ or 12.x runtime (must match the package version you install)
- PyTorch 2.0+ (install separately, must match CUDA version)
- Linux x86-64
**CPU Variants Included:**
| Variant | CPU Support | Use Case |
|---------|-------------|----------|
| **AMX** | Intel Sapphire Rapids+ | Best performance on latest Intel CPUs |
| **AVX512** | Intel Skylake-X/Ice Lake/Cascade Lake | AVX512-capable CPUs without AMX |
| **AVX2** | Intel Haswell+, AMD Zen+ | Maximum compatibility |
**Check which variant is loaded:**
```python
import kt_kernel
print(f"CPU variant: {kt_kernel.__cpu_variant__}") # 'amx', 'avx512', or 'avx2'
print(f"Version: {kt_kernel.__version__}")
```
**Environment Variables:**
```bash
# Override automatic CPU detection
export KT_KERNEL_CPU_VARIANT=avx2 # or 'avx512', 'amx'
# Enable debug output
export KT_KERNEL_DEBUG=1
python -c "import kt_kernel"
```
---
### Option 2: Install from Source (For AMD, ARM, or Custom Builds)
If you need AMD (BLIS), ARM (KML), or custom CUDA versions, build from source:
#### Prerequisites
First, initialize git submodules:
```bash
git submodule update --init --recursive
```
### Quick Installation (Recommended)
#### Quick Installation
Step 0: Create and activate a conda environment (recommended):
@@ -65,7 +126,7 @@ conda activate kt-kernel
You can now install in two clear steps using the same script.
Option A: Two-step (specify dependencies installation and build separately)
**Option A: Two-step** (specify dependencies installation and build separately)
```bash
# 1) Install system prerequisites (cmake, hwloc, pkg-config)
@@ -76,7 +137,7 @@ Option A: Two-step (specify dependencies installation and build separately)
./install.sh build
```
Option B: One-step
**Option B: One-step**
```bash
./install.sh

View File

@@ -161,6 +161,34 @@ build_step() {
echo "Skipping clean of $REPO_ROOT/build (requested by --no-clean)"
fi
# Check for multi-variant build mode (Docker environment)
if [ "${CPUINFER_BUILD_ALL_VARIANTS:-0}" = "1" ]; then
echo "=========================================="
echo "Building ALL CPU variants (AMX/AVX512/AVX2)"
echo "=========================================="
echo ""
echo "This will build three variants in a single wheel:"
echo " - AMX variant (Intel Sapphire Rapids+)"
echo " - AVX512 variant (Intel Skylake-X/Ice Lake+)"
echo " - AVX2 variant (maximum compatibility)"
echo ""
echo "Runtime CPU detection will automatically select the best variant."
echo ""
export CPUINFER_FORCE_REBUILD=1
export CPUINFER_BUILD_TYPE=${CPUINFER_BUILD_TYPE:-Release}
export CPUINFER_PARALLEL=${CPUINFER_PARALLEL:-8}
echo "Building with:"
echo " CPUINFER_BUILD_ALL_VARIANTS=1"
echo " CPUINFER_BUILD_TYPE=$CPUINFER_BUILD_TYPE"
echo " CPUINFER_PARALLEL=$CPUINFER_PARALLEL"
echo ""
pip install . -v
return 0
fi
if [ "$MANUAL_MODE" = "0" ]; then
# Auto-detection mode
echo "=========================================="

View File

@@ -5,7 +5,8 @@ build-backend = "setuptools.build_meta"
[project]
name = "kt-kernel"
version = "0.1.0"
# Version is dynamically read from ../version.py via setup.py
dynamic = ["version"]
description = "KT-Kernel: High-performance kernel operations for KTransformers (AMX/AVX/KML optimizations)"
readme = "README.md"
authors = [{ name = "kvcache-ai" }]

View File

@@ -5,6 +5,9 @@
KT-Kernel provides high-performance kernel operations for KTransformers,
including CPU-optimized MoE inference with AMX, AVX, and KML support.
The package automatically detects your CPU capabilities and loads the optimal
kernel variant (AMX, AVX512, or AVX2) at runtime.
Example usage:
>>> from kt_kernel import KTMoEWrapper
>>> wrapper = KTMoEWrapper(
@@ -20,11 +23,41 @@ Example usage:
... chunked_prefill_size=512,
... method="AMXINT4"
... )
Check which CPU variant is loaded:
>>> import kt_kernel
>>> print(kt_kernel.__cpu_variant__) # 'amx', 'avx512', or 'avx2'
Environment Variables:
KT_KERNEL_CPU_VARIANT: Override automatic detection ('amx', 'avx512', 'avx2')
KT_KERNEL_DEBUG: Enable debug output ('1' to enable)
"""
from __future__ import annotations
# Detect CPU and load optimal extension variant
from ._cpu_detect import initialize as _initialize_cpu
_kt_kernel_ext, __cpu_variant__ = _initialize_cpu()
# Make the extension module available to other modules in this package
import sys
sys.modules['kt_kernel_ext'] = _kt_kernel_ext
# Also expose kt_kernel_ext as an attribute for backward compatibility
kt_kernel_ext = _kt_kernel_ext
# Import main API
from .experts import KTMoEWrapper
__version__ = "0.1.0"
__all__ = ["KTMoEWrapper"]
# Read version from project root version.py
import os
_root_version_file = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(__file__))), 'version.py')
if os.path.exists(_root_version_file):
_version_ns = {}
with open(_root_version_file, 'r', encoding='utf-8') as f:
exec(f.read(), _version_ns)
__version__ = _version_ns.get('__version__', '0.4.2')
else:
__version__ = "0.4.2"
__all__ = ["KTMoEWrapper", "kt_kernel_ext", "__cpu_variant__", "__version__"]

View File

@@ -0,0 +1,233 @@
"""
CPU feature detection and optimal kernel loader for kt-kernel.
This module automatically detects CPU capabilities and loads the best available
kernel variant (AMX, AVX512, or AVX2) at runtime.
Environment Variables:
KT_KERNEL_CPU_VARIANT: Override automatic detection ('amx', 'avx512', 'avx2')
KT_KERNEL_DEBUG: Enable debug output ('1' to enable)
Example:
>>> import kt_kernel
>>> print(kt_kernel.__cpu_variant__) # Shows detected variant
# Override detection
>>> import os
>>> os.environ['KT_KERNEL_CPU_VARIANT'] = 'avx2'
>>> import kt_kernel # Will use AVX2 variant
"""
import os
import sys
from pathlib import Path
def detect_cpu_features():
"""
Detect CPU features to determine the best kernel variant.
Detection hierarchy:
1. AMX: Intel Sapphire Rapids+ with AMX support
2. AVX512: CPUs with AVX512F support
3. AVX2: Fallback for maximum compatibility
Returns:
str: 'amx', 'avx512', or 'avx2'
"""
# Check environment override
variant = os.environ.get('KT_KERNEL_CPU_VARIANT', '').lower()
if variant in ['amx', 'avx512', 'avx2']:
if os.environ.get('KT_KERNEL_DEBUG') == '1':
print(f"[kt-kernel] Using environment override: {variant}")
return variant
# Try to read /proc/cpuinfo on Linux
try:
with open('/proc/cpuinfo', 'r') as f:
cpuinfo = f.read().lower()
# Check for AMX support (Intel Sapphire Rapids+)
# AMX requires amx_tile, amx_int8, and amx_bf16
amx_flags = ['amx_tile', 'amx_int8', 'amx_bf16']
has_amx = all(flag in cpuinfo for flag in amx_flags)
if has_amx:
if os.environ.get('KT_KERNEL_DEBUG') == '1':
print("[kt-kernel] Detected AMX support via /proc/cpuinfo")
return 'amx'
# Check for AVX512 support
# AVX512F is the foundation for all AVX512 variants
if 'avx512f' in cpuinfo:
if os.environ.get('KT_KERNEL_DEBUG') == '1':
print("[kt-kernel] Detected AVX512 support via /proc/cpuinfo")
return 'avx512'
# Check for AVX2 support
if 'avx2' in cpuinfo:
if os.environ.get('KT_KERNEL_DEBUG') == '1':
print("[kt-kernel] Detected AVX2 support via /proc/cpuinfo")
return 'avx2'
# Fallback to AVX2 (should be rare on modern CPUs)
if os.environ.get('KT_KERNEL_DEBUG') == '1':
print("[kt-kernel] No AVX2/AVX512/AMX detected, using AVX2 fallback")
return 'avx2'
except FileNotFoundError:
# /proc/cpuinfo doesn't exist (not Linux or in container)
# Try cpufeature package as fallback
if os.environ.get('KT_KERNEL_DEBUG') == '1':
print("[kt-kernel] /proc/cpuinfo not found, trying cpufeature package")
try:
import cpufeature
# Check for AMX
if cpufeature.CPUFeature.get('AMX_TILE', False):
if os.environ.get('KT_KERNEL_DEBUG') == '1':
print("[kt-kernel] Detected AMX support via cpufeature")
return 'amx'
# Check for AVX512
if cpufeature.CPUFeature.get('AVX512F', False):
if os.environ.get('KT_KERNEL_DEBUG') == '1':
print("[kt-kernel] Detected AVX512 support via cpufeature")
return 'avx512'
# Fallback to AVX2
if os.environ.get('KT_KERNEL_DEBUG') == '1':
print("[kt-kernel] Using AVX2 fallback via cpufeature")
return 'avx2'
except ImportError:
# cpufeature not available - ultimate fallback
if os.environ.get('KT_KERNEL_DEBUG') == '1':
print("[kt-kernel] cpufeature not available, using AVX2 fallback")
return 'avx2'
except Exception as e:
# Any other error - safe fallback
if os.environ.get('KT_KERNEL_DEBUG') == '1':
print(f"[kt-kernel] Error during CPU detection: {e}, using AVX2 fallback")
return 'avx2'
def load_extension(variant):
"""
Load the appropriate kt_kernel_ext variant.
Tries to import the specified variant, with automatic fallback to
lower-performance variants if the requested one is not available.
Supports both multi-variant builds (_kt_kernel_ext_amx.*.so) and
single-variant builds (kt_kernel_ext.*.so).
Fallback order: amx -> avx512 -> avx2 -> single-variant
Args:
variant (str): 'amx', 'avx512', or 'avx2'
Returns:
module: The loaded extension module
Raises:
ImportError: If all variants fail to load
"""
import importlib.util
import glob
# The .so files can be named in two ways:
# Multi-variant: _kt_kernel_ext_amx.cpython-311-x86_64-linux-gnu.so
# Single-variant: kt_kernel_ext.cpython-311-x86_64-linux-gnu.so
# Both export PyInit_kt_kernel_ext (the original module name)
try:
# Find the kt_kernel package directory
# We can't import kt_kernel here (circular import), so use __file__
kt_kernel_dir = os.path.dirname(os.path.abspath(__file__))
# Try multi-variant naming first
pattern = os.path.join(kt_kernel_dir, f'_kt_kernel_ext_{variant}.*.so')
so_files = glob.glob(pattern)
if not so_files:
# Try single-variant naming (fallback for builds without CPUINFER_BUILD_ALL_VARIANTS)
pattern = os.path.join(kt_kernel_dir, 'kt_kernel_ext.*.so')
so_files = glob.glob(pattern)
if so_files:
if os.environ.get('KT_KERNEL_DEBUG') == '1':
print(f"[kt-kernel] Multi-variant {variant} not found, using single-variant build")
else:
raise ImportError(f"No .so file found for variant {variant} (tried patterns: {kt_kernel_dir}/_kt_kernel_ext_{variant}.*.so and {kt_kernel_dir}/kt_kernel_ext.*.so)")
so_file = so_files[0]
if os.environ.get('KT_KERNEL_DEBUG') == '1':
print(f"[kt-kernel] Loading {variant} from: {so_file}")
# Load the module manually
# The module exports PyInit_kt_kernel_ext, so we use that as the module name
spec = importlib.util.spec_from_file_location('kt_kernel_ext', so_file)
if spec is None or spec.loader is None:
raise ImportError(f"Failed to create spec for {so_file}")
ext = importlib.util.module_from_spec(spec)
spec.loader.exec_module(ext)
if os.environ.get('KT_KERNEL_DEBUG') == '1':
print(f"[kt-kernel] Successfully loaded {variant.upper()} variant")
return ext
except (ImportError, ModuleNotFoundError, FileNotFoundError) as e:
if os.environ.get('KT_KERNEL_DEBUG') == '1':
print(f"[kt-kernel] Failed to load {variant} variant: {e}")
# Automatic fallback to next best variant
if variant == 'amx':
if os.environ.get('KT_KERNEL_DEBUG') == '1':
print("[kt-kernel] Falling back from AMX to AVX512")
return load_extension('avx512')
elif variant == 'avx512':
if os.environ.get('KT_KERNEL_DEBUG') == '1':
print("[kt-kernel] Falling back from AVX512 to AVX2")
return load_extension('avx2')
else:
# AVX2 is the last fallback - if this fails, we can't continue
raise ImportError(
f"Failed to load kt_kernel extension (variant: {variant}). "
f"Original error: {e}\n"
f"This usually means the kt_kernel package is not properly installed."
)
def initialize():
"""
Detect CPU capabilities and load the optimal extension variant.
This is the main entry point called by kt_kernel.__init__.py.
Returns:
tuple: (extension_module, variant_name)
- extension_module: The loaded C++ extension module
- variant_name: String indicating which variant was loaded ('amx', 'avx512', 'avx2')
Example:
>>> ext, variant = initialize()
>>> print(f"Loaded {variant} variant")
>>> wrapper = ext.AMXMoEWrapper(...)
"""
# Detect CPU features
variant = detect_cpu_features()
if os.environ.get('KT_KERNEL_DEBUG') == '1':
print(f"[kt-kernel] Selected CPU variant: {variant}")
# Load the appropriate extension
ext = load_extension(variant)
if os.environ.get('KT_KERNEL_DEBUG') == '1':
print(f"[kt-kernel] Extension module loaded: {ext.__name__}")
return ext, variant

View File

@@ -229,6 +229,133 @@ class CMakeBuild(build_ext):
return info
def build_extension(self, ext: CMakeExtension):
"""
Main entry point for building the extension.
Checks if multi-variant build is requested (CPUINFER_BUILD_ALL_VARIANTS=1)
and routes to the appropriate build method.
"""
if _env_get_bool("CPUINFER_BUILD_ALL_VARIANTS", False):
# Build all 3 variants (AMX, AVX512, AVX2)
self.build_multi_variants(ext)
else:
# Build single variant (original behavior)
self._build_single_variant(ext)
def build_multi_variants(self, ext: CMakeExtension):
"""
Build all 3 CPU variants (AMX, AVX512, AVX2) in a single wheel.
This method is called when CPUINFER_BUILD_ALL_VARIANTS=1 is set.
It builds three separate extensions with different CPU instruction sets
and renames the output .so files with variant suffixes.
"""
print("=" * 80)
print("Building kt-kernel with ALL CPU variants (AMX, AVX512, AVX2)")
print("=" * 80)
# Define the 3 variants to build
variants = [
{
'name': 'amx',
'env': {
'CPUINFER_CPU_INSTRUCT': 'NATIVE',
'CPUINFER_ENABLE_AMX': 'ON',
},
'description': 'AMX variant (Intel Sapphire Rapids+)'
},
{
'name': 'avx512',
'env': {
'CPUINFER_CPU_INSTRUCT': 'AVX512',
'CPUINFER_ENABLE_AMX': 'OFF',
},
'description': 'AVX512 variant (Intel Skylake-X/Ice Lake/Cascade Lake)'
},
{
'name': 'avx2',
'env': {
'CPUINFER_CPU_INSTRUCT': 'AVX2',
'CPUINFER_ENABLE_AMX': 'OFF',
},
'description': 'AVX2 variant (maximum compatibility)'
}
]
# Save original environment
original_env = os.environ.copy()
extdir = Path(self.get_ext_fullpath(ext.name)).parent.resolve()
for i, variant in enumerate(variants, 1):
print(f"\n{'=' * 80}")
print(f"Building variant {i}/3: {variant['description']}")
print(f"{'=' * 80}\n")
# Set variant-specific environment variables
os.environ.update(variant['env'])
# Use a unique build directory for this variant
original_build_temp = self.build_temp
self.build_temp = str(Path(self.build_temp) / f"variant_{variant['name']}")
try:
# Build this variant (calls the single-variant build logic)
self._build_single_variant(ext)
# Rename the generated .so file to include variant suffix
# Original: kt_kernel_ext.cpython-311-x86_64-linux-gnu.so
# Renamed: _kt_kernel_ext_amx.cpython-311-x86_64-linux-gnu.so
# Extract the base extension name (without package prefix)
# ext.name is "kt_kernel.kt_kernel_ext", we want "kt_kernel_ext"
base_ext_name = ext.name.split('.')[-1]
# Find the newly built .so file
import time
time.sleep(0.5) # Give filesystem time to sync
built_candidates = [
f for f in Path(extdir).glob("*.so")
if f.name.startswith(base_ext_name) and not f.name.startswith(f"_{base_ext_name}_")
]
if not built_candidates:
print(f"WARNING: No .so file found for {base_ext_name} in {extdir}")
print(f"Files in {extdir}:")
for f in Path(extdir).glob("*.so"):
print(f" {f.name}")
for so_file in built_candidates:
# Extract the python tag part (e.g., ".cpython-311-x86_64-linux-gnu.so")
suffix = so_file.name.replace(base_ext_name, "")
new_name = f"_{base_ext_name}_{variant['name']}{suffix}"
new_path = extdir / new_name
print(f"-- Renaming {so_file.name} -> {new_name}")
if new_path.exists():
print(f" WARNING: Target file already exists, removing: {new_path}")
new_path.unlink()
so_file.rename(new_path)
print(f" ✓ Successfully renamed to {new_name}")
finally:
# Restore build_temp for next iteration
self.build_temp = original_build_temp
# Restore original environment
os.environ.clear()
os.environ.update(original_env)
print(f"\n{'=' * 80}")
print("✓ Successfully built all 3 CPU variants")
print(f"{'=' * 80}\n")
def _build_single_variant(self, ext: CMakeExtension):
"""
Build a single CPU variant. This contains the core build logic
extracted from the original build_extension method.
"""
# Auto-detect CUDA toolkit if user did not explicitly set CPUINFER_USE_CUDA
def detect_cuda_toolkit() -> bool:
# Respect CUDA_HOME
@@ -276,6 +403,10 @@ class CMakeBuild(build_ext):
auto_cuda = detect_cuda_toolkit()
os.environ["CPUINFER_USE_CUDA"] = "1" if auto_cuda else "0"
print(f"-- CPUINFER_USE_CUDA not set; auto-detected CUDA toolkit: {'YES' if auto_cuda else 'NO'}")
elif cuda_env:
print("-- CPUINFER_USE_CUDA explicitly enabled")
else:
print("-- CPUINFER_USE_CUDA explicitly disabled")
extdir = Path(self.get_ext_fullpath(ext.name)).parent.resolve()
cfg = default_build_type()
@@ -431,7 +562,15 @@ class CMakeBuild(build_ext):
# Version (simple). If you later add a python package dir, you can read from it.
################################################################################
VERSION = os.environ.get("CPUINFER_VERSION", "0.1.0")
# Import version from shared version.py at project root
_version_file = Path(__file__).resolve().parent.parent / "version.py"
if _version_file.exists():
_version_ns = {}
with open(_version_file, "r", encoding="utf-8") as f:
exec(f.read(), _version_ns)
VERSION = os.environ.get("CPUINFER_VERSION", _version_ns.get("__version__", "0.4.2"))
else:
VERSION = os.environ.get("CPUINFER_VERSION", "0.4.2")
################################################################################
# Setup
@@ -449,7 +588,7 @@ setup(
"kt_kernel": "python",
"kt_kernel.utils": "python/utils",
},
ext_modules=[CMakeExtension("kt_kernel_ext", str(REPO_ROOT))],
ext_modules=[CMakeExtension("kt_kernel.kt_kernel_ext", str(REPO_ROOT))],
cmdclass={"build_ext": CMakeBuild},
zip_safe=False,
classifiers=[

View File

@@ -16,7 +16,8 @@ register_cpu_ci(est_time=30, suite="default")
# Check if kt_kernel_ext is available
try:
import kt_kernel_ext
import kt_kernel # Import kt_kernel first to register kt_kernel_ext
kt_kernel_ext = kt_kernel.kt_kernel_ext # Access the extension module
HAS_KT_KERNEL = True
except ImportError:
HAS_KT_KERNEL = False

View File

@@ -19,7 +19,8 @@ register_cpu_ci(est_time=120, suite="default")
# Check if dependencies are available
try:
import torch
import kt_kernel_ext
import kt_kernel # Import kt_kernel first to register kt_kernel_ext
kt_kernel_ext = kt_kernel.kt_kernel_ext # Access the extension module
HAS_DEPS = True
except ImportError as e:
HAS_DEPS = False

View File

@@ -19,7 +19,8 @@ register_cpu_ci(est_time=120, suite="default")
# Check if dependencies are available
try:
import torch
import kt_kernel_ext
import kt_kernel # Import kt_kernel first to register kt_kernel_ext
kt_kernel_ext = kt_kernel.kt_kernel_ext # Access the extension module
HAS_DEPS = True
except ImportError as e:
HAS_DEPS = False

View File

@@ -19,7 +19,8 @@ register_cpu_ci(est_time=120, suite="default")
# Check if dependencies are available
try:
import torch
import kt_kernel_ext
import kt_kernel # Import kt_kernel first to register kt_kernel_ext
kt_kernel_ext = kt_kernel.kt_kernel_ext # Access the extension module
HAS_DEPS = True
except ImportError as e:
HAS_DEPS = False

View File

@@ -19,7 +19,8 @@ register_cpu_ci(est_time=120, suite="default")
# Check if dependencies are available
try:
import torch
import kt_kernel_ext
import kt_kernel # Import kt_kernel first to register kt_kernel_ext
kt_kernel_ext = kt_kernel.kt_kernel_ext # Access the extension module
HAS_DEPS = True
except ImportError as e:
HAS_DEPS = False

View File

@@ -23,7 +23,8 @@ register_cpu_ci(est_time=300, suite="default")
# Check if dependencies are available
try:
import torch
import kt_kernel_ext
import kt_kernel # Import kt_kernel first to register kt_kernel_ext
kt_kernel_ext = kt_kernel.kt_kernel_ext # Access the extension module
from tqdm import tqdm
HAS_DEPS = True
except ImportError as e:

View File

@@ -23,7 +23,8 @@ register_cpu_ci(est_time=300, suite="default")
# Check if dependencies are available
try:
import torch
import kt_kernel_ext
import kt_kernel # Import kt_kernel first to register kt_kernel_ext
kt_kernel_ext = kt_kernel.kt_kernel_ext # Access the extension module
from tqdm import tqdm
HAS_DEPS = True

View File

@@ -24,7 +24,8 @@ register_cpu_ci(est_time=300, suite="default")
# Check if dependencies are available
try:
import torch
import kt_kernel_ext
import kt_kernel # Import kt_kernel first to register kt_kernel_ext
kt_kernel_ext = kt_kernel.kt_kernel_ext # Access the extension module
from tqdm import tqdm
HAS_DEPS = True
except ImportError as e:

View File

@@ -23,7 +23,8 @@ register_cpu_ci(est_time=300, suite="default")
# Check if dependencies are available
try:
import torch
import kt_kernel_ext
import kt_kernel # Import kt_kernel first to register kt_kernel_ext
kt_kernel_ext = kt_kernel.kt_kernel_ext # Access the extension module
from tqdm import tqdm
HAS_DEPS = True
except ImportError as e:

View File

@@ -1,11 +1,20 @@
#!/usr/bin/env python
# coding=utf-8
'''
Description :
Description :
Author : kkk1nak0
Date : 2024-08-15 07:34:46
Version : 1.0.0
LastEditors : chenxl
LastEditors : chenxl
LastEditTime : 2025-02-15 03:53:02
'''
__version__ = "0.4.1"
import sys
import os
# Import version from shared version.py at project root
_root_dir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
sys.path.insert(0, _root_dir)
try:
from version import __version__
finally:
sys.path.pop(0)

6
version.py Normal file
View File

@@ -0,0 +1,6 @@
"""
KTransformers version information.
Shared across kt-kernel and kt-sft modules.
"""
__version__ = "0.4.3"