diff --git a/.github/workflows/docker-image.yml b/.github/workflows/docker-image.yml index 8f58b1e..6d4f645 100644 --- a/.github/workflows/docker-image.yml +++ b/.github/workflows/docker-image.yml @@ -5,9 +5,24 @@ on: types: [published] workflow_dispatch: inputs: - choose: - description: 'Will you push the image to DockerHub? 0 for No, 1 for Yes' + push_to_dockerhub: + description: 'Push image to DockerHub? (true/false)' required: true + default: 'false' + type: boolean + cuda_version: + description: 'CUDA version (e.g., 12.8.1)' + required: false + default: '12.8.1' + type: string + push_simplified_tag: + description: 'Also push simplified tag? (true/false)' + required: false + default: 'true' + type: boolean + ubuntu_mirror: + description: 'Use Tsinghua Ubuntu mirror? (0/1)' + required: false default: '0' type: string @@ -20,79 +35,108 @@ jobs: test: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 - name: Run tests run: | if [ -f docker-compose.test.yml ]; then docker-compose --file docker-compose.test.yml build docker-compose --file docker-compose.test.yml run sut else - docker build . --file Dockerfile + docker build . --file docker/Dockerfile fi - docker_task: + build-and-push: needs: test - name: ${{ matrix.instruct}} + name: Build and Push Multi-Variant Docker Image runs-on: ubuntu-latest - strategy: - fail-fast: false - matrix: - include: - # for amd64 - - {instruct: "FANCY", platform: "linux/amd64"} - - {instruct: "AVX512", platform: "linux/amd64"} - - {instruct: "AVX2", platform: "linux/amd64"} - - {instruct: "NATIVE", platform: "linux/amd64"} - # for arm64 - - {instruct: "NATIVE", platform: "linux/arm64"} steps: - - name: Move Docker data directory - run: | - sudo systemctl stop docker - sudo mkdir -p /mnt/docker - sudo rsync -avz /var/lib/docker/ /mnt/docker - sudo rm -rf /var/lib/docker - sudo ln -s /mnt/docker /var/lib/docker - sudo systemctl start docker + - name: Checkout repository + uses: actions/checkout@v4 - - - name: Set up QEMU - uses: docker/setup-qemu-action@v3 + - name: Move Docker data directory + run: | + sudo systemctl stop docker + sudo mkdir -p /mnt/docker + sudo rsync -avz /var/lib/docker/ /mnt/docker + sudo rm -rf /var/lib/docker + sudo ln -s /mnt/docker /var/lib/docker + sudo systemctl start docker - - - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v3 + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 - - - name: Login to Docker Hub - uses: docker/login-action@v3 - with: - username: ${{ secrets.DOCKERHUB_USERNAME }} - password: ${{ secrets.DOCKERHUB_TOKEN }} - - - name: Build and push for amd64 - if: matrix.platform == 'linux/amd64' - uses: docker/build-push-action@v6 - with: - push: true - platforms: | - linux/amd64 - tags: | - ${{ env.DOCKERHUB_REPO }}:latest-${{ matrix.instruct }} - ${{ env.DOCKERHUB_REPO }}:${{ github.event.release.tag_name }}-${{ matrix.instruct }} - build-args: | - CPU_INSTRUCT=${{ matrix.instruct }} - - - name: Build and push for arm64 - if: matrix.platform == 'linux/arm64' - uses: docker/build-push-action@v6 - with: - push: true - platforms: | - linux/arm64 - tags: | - ${{ env.DOCKERHUB_REPO }}:latest-${{ matrix.instruct }} - ${{ env.DOCKERHUB_REPO }}:${{ github.event.release.tag_name }}-${{ matrix.instruct }} - build-args: | - CPU_INSTRUCT=${{ matrix.instruct }} + - name: Login to Docker Hub + uses: docker/login-action@v3 + with: + username: ${{ secrets.DOCKERHUB_USERNAME }} + password: ${{ secrets.DOCKERHUB_TOKEN }} + + - name: Determine build parameters + id: params + run: | + # Determine if we should push + if [ "${{ github.event_name }}" = "release" ]; then + echo "should_push=true" >> $GITHUB_OUTPUT + echo "push_simplified=true" >> $GITHUB_OUTPUT + elif [ "${{ github.event_name }}" = "workflow_dispatch" ]; then + echo "should_push=${{ inputs.push_to_dockerhub }}" >> $GITHUB_OUTPUT + echo "push_simplified=${{ inputs.push_simplified_tag }}" >> $GITHUB_OUTPUT + else + echo "should_push=false" >> $GITHUB_OUTPUT + echo "push_simplified=false" >> $GITHUB_OUTPUT + fi + + # Determine CUDA version + if [ "${{ github.event_name }}" = "workflow_dispatch" ] && [ -n "${{ inputs.cuda_version }}" ]; then + echo "cuda_version=${{ inputs.cuda_version }}" >> $GITHUB_OUTPUT + else + echo "cuda_version=12.8.1" >> $GITHUB_OUTPUT + fi + + # Determine Ubuntu mirror setting + if [ "${{ github.event_name }}" = "workflow_dispatch" ] && [ -n "${{ inputs.ubuntu_mirror }}" ]; then + echo "ubuntu_mirror=${{ inputs.ubuntu_mirror }}" >> $GITHUB_OUTPUT + else + echo "ubuntu_mirror=0" >> $GITHUB_OUTPUT + fi + + - name: Build and push Docker image + run: | + cd docker + + # Build command arguments + BUILD_ARGS=( + --cuda-version "${{ steps.params.outputs.cuda_version }}" + --ubuntu-mirror "${{ steps.params.outputs.ubuntu_mirror }}" + --repository "${{ env.DOCKERHUB_REPO }}" + ) + + # Add simplified tag option if enabled + if [ "${{ steps.params.outputs.push_simplified }}" = "true" ]; then + BUILD_ARGS+=(--also-push-simplified) + fi + + # Add HTTP proxy if available + if [ -n "${{ secrets.HTTP_PROXY }}" ]; then + BUILD_ARGS+=(--http-proxy "${{ secrets.HTTP_PROXY }}") + fi + + # Add HTTPS proxy if available + if [ -n "${{ secrets.HTTPS_PROXY }}" ]; then + BUILD_ARGS+=(--https-proxy "${{ secrets.HTTPS_PROXY }}") + fi + + # Dry run if not pushing + if [ "${{ steps.params.outputs.should_push }}" != "true" ]; then + BUILD_ARGS+=(--dry-run) + fi + + # Execute build script + ./push-to-dockerhub.sh "${BUILD_ARGS[@]}" + + - name: Display image information + if: steps.params.outputs.should_push == 'true' + run: | + echo "::notice title=Docker Image::Image pushed successfully to ${{ env.DOCKERHUB_REPO }}" + echo "Pull command: docker pull ${{ env.DOCKERHUB_REPO }}:v\$(VERSION)-cu\$(CUDA_SHORT)" diff --git a/.github/workflows/install.yml b/.github/workflows/install.yml deleted file mode 100644 index 3c2b9cf..0000000 --- a/.github/workflows/install.yml +++ /dev/null @@ -1,71 +0,0 @@ -name: Install / Test KTransformers -run-name: Install / Test KTransformers -on: - workflow_dispatch: - inputs: - job_to_run: - description: "Which job to run?" - required: true - default: "test" - type: choice - options: - - create-install-test - - install-test - - test -jobs: - Install-Test-KTransformers: - runs-on: self-hosted - steps: - - run: echo "🎉 The job was automatically triggered by a ${{ github.event_name }} event." - - run: echo "🔎 The name of your branch is ${{ github.ref }} and your repository is ${{ github.repository }}." - - name: Check out repository code - uses: actions/checkout@v4 - - run: echo "💡 The ${{ github.repository }} repository has been cloned to the runner." - - name: Remove old conda environment - continue-on-error: true - if: contains(inputs.job_to_run, 'create') - run: | - source /home/qujing3/anaconda3/etc/profile.d/conda.sh - conda env remove --name ktransformers-dev -y - - name: Create conda environment - if: contains(inputs.job_to_run, 'create') - run: | - source /home/qujing3/anaconda3/etc/profile.d/conda.sh - conda create --name ktransformers-dev python=3.11 - conda activate ktransformers-dev - conda install -c conda-forge libstdcxx-ng -y - - name: Install dependencies - if: contains(inputs.job_to_run, 'create') - run: | - source /home/qujing3/anaconda3/etc/profile.d/conda.sh - conda activate ktransformers-dev - pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu126 - pip3 install packaging ninja cpufeature numpy - pip install ~/flash_attn-2.7.4.post1+cu12torch2.6cxx11abiTRUE-cp311-cp311-linux_x86_64.whl - - name: Install KTransformers - if: contains(inputs.job_to_run, 'install') - run: | - source /home/qujing3/anaconda3/etc/profile.d/conda.sh - conda activate ktransformers-dev - pip3 uninstall ktransformers -y - cd ${{ github.workspace }} - git submodule init - git submodule update - bash install.sh - - name: Test Local Chat 1 - run: | - set -e - source /home/qujing3/anaconda3/etc/profile.d/conda.sh - conda activate ktransformers-dev - export PATH=/usr/local/cuda-12.4/bin:$PATH - export LD_LIBRARY_PATH=/usr/local/cuda-12.4/lib64:$LD_LIBRARY_PATH - export CUDA_HOME=/usr/local/cuda-12.4 - cd ${{ github.workspace }} - echo "Running Local Chat 1 (book.txt) ..." - python ktransformers/local_chat_test.py --model_path /home/qujing3/models/DeepSeek-R1-Q4_K_M/config --gguf_path /home/qujing3/models/DeepSeek-R1-Q4_K_M/ --max_new_tokens 256 --cpu_infer 64 --prompt_file /home/qujing3/prompts/book.txt > log1.txt - sed -n '/Prompt:/,$p' log1.txt - echo "Running Local Chat 2 [force think] (chinese.txt) ..." - python ktransformers/local_chat_test.py --model_path /home/qujing3/models/DeepSeek-R1-Q4_K_M/config --gguf_path /home/qujing3/models/DeepSeek-R1-Q4_K_M/ --max_new_tokens 256 --cpu_infer 64 --prompt_file /home/qujing3/prompts/chinese.txt -f > log2.txt - sed -n '/Prompt:/,$p' log2.txt - - - run: echo "This job's status is ${{ job.status }}." diff --git a/.github/workflows/package_wheel_release.yml b/.github/workflows/package_wheel_release.yml deleted file mode 100644 index d471563..0000000 --- a/.github/workflows/package_wheel_release.yml +++ /dev/null @@ -1,231 +0,0 @@ -name: Build Wheels -on: - workflow_dispatch: - inputs: - release: - description: 'Release? 1 = yes, 0 = no' - default: '0' - required: true - type: string -jobs: - build_wheels: - name: ${{ matrix.os }} Python=${{ matrix.pyver }} CUDA=${{ matrix.cuda }} CPU_INSTRUCT=${{ matrix.instruct }} Torch=${{ matrix.torch }} - runs-on: ${{ matrix.os }} - strategy: - fail-fast: false - matrix: - include: - # Ubuntu - - { os: ubuntu-20.04, pyver: '3.12', cuda: '12.5.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'FANCY', torch_cu: '124'} - - { os: ubuntu-20.04, pyver: '3.12', cuda: '12.5.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '124'} - - { os: ubuntu-20.04, pyver: '3.12', cuda: '12.5.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '124'} - - { os: ubuntu-20.04, pyver: '3.12', cuda: '12.4.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'FANCY', torch_cu: '124'} - - { os: ubuntu-20.04, pyver: '3.12', cuda: '12.4.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '124'} - - { os: ubuntu-20.04, pyver: '3.12', cuda: '12.4.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '124'} - - { os: ubuntu-20.04, pyver: '3.12', cuda: '12.2.2', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'FANCY', torch_cu: '121'} - - { os: ubuntu-20.04, pyver: '3.12', cuda: '12.2.2', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '121'} - - { os: ubuntu-20.04, pyver: '3.12', cuda: '12.2.2', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '121'} - - { os: ubuntu-20.04, pyver: '3.12', cuda: '12.1.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'FANCY', torch_cu: '121'} - - { os: ubuntu-20.04, pyver: '3.12', cuda: '12.1.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '121'} - - { os: ubuntu-20.04, pyver: '3.12', cuda: '12.1.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '121'} - - { os: ubuntu-20.04, pyver: '3.12', cuda: '12.2.2', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'FANCY', torch_cu: '121'} - - { os: ubuntu-20.04, pyver: '3.12', cuda: '12.2.2', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '121'} - - { os: ubuntu-20.04, pyver: '3.12', cuda: '12.2.2', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '121'} - - { os: ubuntu-20.04, pyver: '3.12', cuda: '12.1.1', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'FANCY', torch_cu: '121'} - - { os: ubuntu-20.04, pyver: '3.12', cuda: '12.1.1', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '121'} - - { os: ubuntu-20.04, pyver: '3.12', cuda: '12.1.1', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '121'} - - { os: ubuntu-20.04, pyver: '3.11', cuda: '12.5.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'FANCY', torch_cu: '124'} - - { os: ubuntu-20.04, pyver: '3.11', cuda: '12.5.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '124'} - - { os: ubuntu-20.04, pyver: '3.11', cuda: '12.5.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '124'} - - { os: ubuntu-20.04, pyver: '3.11', cuda: '12.4.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'FANCY', torch_cu: '124'} - - { os: ubuntu-20.04, pyver: '3.11', cuda: '12.4.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '124'} - - { os: ubuntu-20.04, pyver: '3.11', cuda: '12.4.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '124'} - - { os: ubuntu-20.04, pyver: '3.11', cuda: '12.2.2', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'FANCY', torch_cu: '121'} - - { os: ubuntu-20.04, pyver: '3.11', cuda: '12.2.2', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '121'} - - { os: ubuntu-20.04, pyver: '3.11', cuda: '12.2.2', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '121'} - - { os: ubuntu-20.04, pyver: '3.11', cuda: '12.1.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'FANCY', torch_cu: '121'} - - { os: ubuntu-20.04, pyver: '3.11', cuda: '12.1.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '121'} - - { os: ubuntu-20.04, pyver: '3.11', cuda: '12.1.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '121'} - - { os: ubuntu-20.04, pyver: '3.11', cuda: '12.2.2', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'FANCY', torch_cu: '121'} - - { os: ubuntu-20.04, pyver: '3.11', cuda: '12.2.2', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '121'} - - { os: ubuntu-20.04, pyver: '3.11', cuda: '12.2.2', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '121'} - - { os: ubuntu-20.04, pyver: '3.11', cuda: '12.1.1', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'FANCY', torch_cu: '121'} - - { os: ubuntu-20.04, pyver: '3.11', cuda: '12.1.1', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '121'} - - { os: ubuntu-20.04, pyver: '3.11', cuda: '12.1.1', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '121'} - - { os: ubuntu-20.04, pyver: '3.10', cuda: '12.5.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'FANCY', torch_cu: '124'} - - { os: ubuntu-20.04, pyver: '3.10', cuda: '12.5.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '124'} - - { os: ubuntu-20.04, pyver: '3.10', cuda: '12.5.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '124'} - - { os: ubuntu-20.04, pyver: '3.10', cuda: '12.4.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'FANCY', torch_cu: '124'} - - { os: ubuntu-20.04, pyver: '3.10', cuda: '12.4.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '124'} - - { os: ubuntu-20.04, pyver: '3.10', cuda: '12.4.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '124'} - - { os: ubuntu-20.04, pyver: '3.10', cuda: '12.2.2', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'FANCY', torch_cu: '121'} - - { os: ubuntu-20.04, pyver: '3.10', cuda: '12.2.2', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '121'} - - { os: ubuntu-20.04, pyver: '3.10', cuda: '12.2.2', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '121'} - - { os: ubuntu-20.04, pyver: '3.10', cuda: '12.1.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'FANCY', torch_cu: '121'} - - { os: ubuntu-20.04, pyver: '3.10', cuda: '12.1.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '121'} - - { os: ubuntu-20.04, pyver: '3.10', cuda: '12.1.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '121'} - - { os: ubuntu-20.04, pyver: '3.10', cuda: '12.2.2', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'FANCY', torch_cu: '121'} - - { os: ubuntu-20.04, pyver: '3.10', cuda: '12.2.2', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '121'} - - { os: ubuntu-20.04, pyver: '3.10', cuda: '12.2.2', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '121'} - - { os: ubuntu-20.04, pyver: '3.10', cuda: '12.1.1', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'FANCY', torch_cu: '121'} - - { os: ubuntu-20.04, pyver: '3.10', cuda: '12.1.1', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '121'} - - { os: ubuntu-20.04, pyver: '3.10', cuda: '12.1.1', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '121'} - - # Windows - - { os: windows-2022, pyver: '3.12', cuda: '12.5.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '124'} - - { os: windows-2022, pyver: '3.12', cuda: '12.5.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '124'} - - { os: windows-2022, pyver: '3.12', cuda: '12.4.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '124'} - - { os: windows-2022, pyver: '3.12', cuda: '12.4.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '124'} - - { os: windows-2022, pyver: '3.12', cuda: '12.2.2', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '121'} - - { os: windows-2022, pyver: '3.12', cuda: '12.2.2', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '121'} - - { os: windows-2022, pyver: '3.12', cuda: '12.1.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '121'} - - { os: windows-2022, pyver: '3.12', cuda: '12.1.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '121'} - - { os: windows-2022, pyver: '3.12', cuda: '12.2.2', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '121'} - - { os: windows-2022, pyver: '3.12', cuda: '12.2.2', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '121'} - - { os: windows-2022, pyver: '3.12', cuda: '12.1.1', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '121'} - - { os: windows-2022, pyver: '3.12', cuda: '12.1.1', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '121'} - - { os: windows-2022, pyver: '3.11', cuda: '12.5.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '124'} - - { os: windows-2022, pyver: '3.11', cuda: '12.5.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '124'} - - { os: windows-2022, pyver: '3.11', cuda: '12.4.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '124'} - - { os: windows-2022, pyver: '3.11', cuda: '12.4.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '124'} - - { os: windows-2022, pyver: '3.11', cuda: '12.2.2', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '121'} - - { os: windows-2022, pyver: '3.11', cuda: '12.2.2', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '121'} - - { os: windows-2022, pyver: '3.11', cuda: '12.1.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '121'} - - { os: windows-2022, pyver: '3.11', cuda: '12.1.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '121'} - - { os: windows-2022, pyver: '3.11', cuda: '12.2.2', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '121'} - - { os: windows-2022, pyver: '3.11', cuda: '12.2.2', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '121'} - - { os: windows-2022, pyver: '3.11', cuda: '12.1.1', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '121'} - - { os: windows-2022, pyver: '3.11', cuda: '12.1.1', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '121'} - - { os: windows-2022, pyver: '3.10', cuda: '12.5.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '124'} - - { os: windows-2022, pyver: '3.10', cuda: '12.5.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '124'} - - { os: windows-2022, pyver: '3.10', cuda: '12.4.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '124'} - - { os: windows-2022, pyver: '3.10', cuda: '12.4.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '124'} - - { os: windows-2022, pyver: '3.10', cuda: '12.2.2', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '121'} - - { os: windows-2022, pyver: '3.10', cuda: '12.2.2', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '121'} - - { os: windows-2022, pyver: '3.10', cuda: '12.1.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '121'} - - { os: windows-2022, pyver: '3.10', cuda: '12.1.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '121'} - - { os: windows-2022, pyver: '3.10', cuda: '12.2.2', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '121'} - - { os: windows-2022, pyver: '3.10', cuda: '12.2.2', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '121'} - - { os: windows-2022, pyver: '3.10', cuda: '12.1.1', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '121'} - - { os: windows-2022, pyver: '3.10', cuda: '12.1.1', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '121'} - - defaults: - run: - shell: pwsh - - steps: - - uses: actions/checkout@v3 - - - name: Free Disk Space - uses: jlumbroso/free-disk-space@v1.3.1 - if: runner.os == 'Linux' - with: - tool-cache: true - android: true - dotnet: true - haskell: true - large-packages: false - swap-storage: true - - - uses: actions/setup-python@v4 - with: - python-version: ${{ matrix.pyver }} - - - name: check_space - run: | - if($IsLinux) {df -h} - if($IsWindows) {Get-PSDrive -PSProvider 'FileSystem'} - - - uses: actions/setup-node@v4 - with: - node-version: 20 - - - name: Setup Mamba - if: matrix.cuda != '' - uses: conda-incubator/setup-miniconda@v3 - with: - activate-environment: "ktransformers" - python-version: ${{ matrix.pyver }} - miniforge-variant: Miniforge3 - miniforge-version: latest - use-mamba: true - add-pip-as-python-dependency: true - auto-activate-base: false - - - - - name: build web - run: | - cd ktransformers/website/ - npm install - npm run build - cd ../../ - - - name: build for cuda - if: matrix.cuda != '' - env: - USE_BALANCE_SERVE: "1" - run: | - git submodule init - git submodule update - if($IsWindows){ - $originalPath = Get-Location - Import-Module 'C:\Program Files\Microsoft Visual Studio\2022\Enterprise\Common7\Tools\Microsoft.VisualStudio.DevShell.dll' - Enter-VsDevShell -VsInstallPath 'C:\Program Files\Microsoft Visual Studio\2022\Enterprise' -DevCmdArguments '-arch=x64 -host_arch=x64' - $env:DISTUTILS_USE_SDK=1 - Set-Location $originalPath - } - $cudaVersion = '${{ matrix.cuda }}' - $env:MAMBA_NO_LOW_SPEED_LIMIT = 1 - mamba install -y -c nvidia/label/cuda-$cudaVersion cuda-toolkit cuda-runtime - $env:CUDA_PATH = $env:CONDA_PREFIX - $env:CUDA_HOME = $env:CONDA_PREFIX - if ($IsLinux) { - $env:LD_LIBRARY_PATH = $env:CONDA_PREFIX + '/lib:' + $env:LD_LIBRARY_PATH - $env:LD_LIBRARY_PATH = $env:CONDA_PREFIX + '/lib/python${{ matrix.pyver }}/site-packages/nvidia/nvjitlink/lib:' + $env:LD_LIBRARY_PATH - if (!(Test-Path $env:CUDA_HOME/lib64)) { - New-Item -ItemType SymbolicLink -Path $env:CUDA_HOME/lib64 -Target $env:CUDA_HOME/lib - } - } - if ($IsWindows) { - if (Test-Path -Path "$env:CUDA_PATH/Library/bin/nvcc.exe"){ - $env:CUDA_PATH = "$env:CUDA_PATH/Library" - $env:CUDA_HOME = $env:CUDA_PATH - } - $env:PATH = "$env:CUDA_PATH/bin;" + $env:PATH - $directory = "$env:CUDA_PATH/lib/x64/" - if (-not (Test-Path -Path $directory)) { - New-Item -ItemType Directory -Path $directory - Write-Output "Directory '$directory' created." - } - cp $env:CUDA_PATH/lib/*.lib $env:CUDA_PATH/lib/x64/ - $env:INCLUDE =$env:CUDA_PATH + "/include/targets/x64;" + $env:INCLUDE - $env:INCLUDE =$env:CONDA_PREFIX + "/include;" + $env:INCLUDE - } - python -m pip install torch==${{ matrix.torch }} torchvision torchaudio --index-url https://download.pytorch.org/whl/cu${{ matrix.torch_cu }} - python -m pip install cpufeature build wheel ninja packaging setuptools - $env:KTRANSFORMERS_FORCE_BUILD = "TRUE" - $env:CPU_INSTRUCT = '${{ matrix.instruct }}' - $env:TORCH_CUDA_ARCH_LIST = '${{ matrix.cudaarch }}' - python -m build --no-isolation --verbose - - - - name: create Rlease dir - run: | - if ($IsWindows) { - $env:date = $(Get-Date -Format "yyyy-MM-dd") - New-Item -ItemType Directory -Force -Path "$Env:USERPROFILE\.ssh" - $Env:SSH_PATH = "$Env:USERPROFILE\.ssh\id_rsa" - Set-Content -Path $Env:SSH_PATH -Value "${{ secrets.SSH_PRIVATE_KEY }}" - (Get-Content -Path $Env:SSH_PATH).Replace("`r`n","`n") | Set-Content -Path $Env:SSH_PATH - chmod 600 $Env:SSH_PATH - } - if ($IsLinux) { - $env:date = $(date +%Y-%m-%d) - mkdir -p ~/.ssh/ - echo "${{ secrets.SSH_PRIVATE_KEY }}" > ~/.ssh/id_rsa - chmod 600 ~/.ssh/id_rsa - } - - ssh -p ${{ secrets.SSH_PORT }} -o StrictHostKeyChecking=no root@${{ secrets.SSH_SERVER }} "mkdir -p /mnt/data/release-$env:date" - scp -P ${{ secrets.SSH_PORT }} -o StrictHostKeyChecking=no dist/*.whl root@${{ secrets.SSH_SERVER }}:/mnt/data/release-$env:date/ \ No newline at end of file diff --git a/.github/workflows/package_wheel_test.yml b/.github/workflows/package_wheel_test.yml deleted file mode 100644 index cd8db62..0000000 --- a/.github/workflows/package_wheel_test.yml +++ /dev/null @@ -1,141 +0,0 @@ -name: Build Wheels Tests -on: - workflow_dispatch: - inputs: - release: - description: 'Release? 1 = yes, 0 = no' - default: '0' - required: true - type: string -jobs: - build_wheels: - name: ${{ matrix.os }} Python=${{ matrix.pyver }} CUDA=${{ matrix.cuda }} CPU_INSTRUCT=${{ matrix.instruct }} Torch=${{ matrix.torch }} - runs-on: ${{ matrix.os }} - strategy: - fail-fast: false - matrix: - include: - # Ubuntu - - { os: ubuntu-20.04, pyver: '3.12', cuda: '12.5.1', torch: '2.4.0', cudaarch: '8.9;9.0+PTX', instruct: 'FANCY', torch_cu: '124'} - - { os: ubuntu-20.04, pyver: '3.12', cuda: '12.2.2', torch: '2.3.0', cudaarch: '8.9;9.0+PTX', instruct: 'FANCY', torch_cu: '121'} - - { os: windows-2022, pyver: '3.11', cuda: '12.5.1', torch: '2.4.0', cudaarch: '8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '124'} - - { os: windows-2022, pyver: '3.12', cuda: '12.1.1', torch: '2.3.0', cudaarch: '8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '121'} - - defaults: - run: - shell: pwsh - - steps: - - uses: actions/checkout@v3 - - - name: Free Disk Space - uses: jlumbroso/free-disk-space@v1.3.1 - if: runner.os == 'Linux' - with: - tool-cache: true - android: true - dotnet: true - haskell: true - large-packages: false - swap-storage: true - - - uses: actions/setup-python@v4 - with: - python-version: ${{ matrix.pyver }} - - - name: check_space - run: | - if($IsLinux) {df -h} - if($IsWindows) {Get-PSDrive -PSProvider 'FileSystem'} - - - uses: actions/setup-node@v4 - with: - node-version: 20 - - - name: Setup Mamba - if: matrix.cuda != '' - uses: conda-incubator/setup-miniconda@v3 - with: - activate-environment: "ktransformers" - python-version: ${{ matrix.pyver }} - miniforge-variant: Miniforge3 - miniforge-version: latest - use-mamba: true - add-pip-as-python-dependency: true - auto-activate-base: false - - - - - name: build web - run: | - cd ktransformers/website/ - npm install - npm run build - cd ../../ - - - name: build for cuda - if: matrix.cuda != '' - run: | - git submodule init - git submodule update - if($IsWindows){ - $originalPath = Get-Location - Import-Module 'C:\Program Files\Microsoft Visual Studio\2022\Enterprise\Common7\Tools\Microsoft.VisualStudio.DevShell.dll' - Enter-VsDevShell -VsInstallPath 'C:\Program Files\Microsoft Visual Studio\2022\Enterprise' -DevCmdArguments '-arch=x64 -host_arch=x64' - $env:DISTUTILS_USE_SDK=1 - Set-Location $originalPath - } - $cudaVersion = '${{ matrix.cuda }}' - $env:MAMBA_NO_LOW_SPEED_LIMIT = 1 - mamba install -y -c nvidia/label/cuda-$cudaVersion cuda-toolkit cuda-runtime - $env:CUDA_PATH = $env:CONDA_PREFIX - $env:CUDA_HOME = $env:CONDA_PREFIX - if ($IsLinux) { - $env:LD_LIBRARY_PATH = $env:CONDA_PREFIX + '/lib:' + $env:LD_LIBRARY_PATH - $env:LD_LIBRARY_PATH = $env:CONDA_PREFIX + '/lib/python${{ matrix.pyver }}/site-packages/nvidia/nvjitlink/lib:' + $env:LD_LIBRARY_PATH - if (!(Test-Path $env:CUDA_HOME/lib64)) { - New-Item -ItemType SymbolicLink -Path $env:CUDA_HOME/lib64 -Target $env:CUDA_HOME/lib - } - } - if ($IsWindows) { - if (Test-Path -Path "$env:CUDA_PATH/Library/bin/nvcc.exe"){ - $env:CUDA_PATH = "$env:CUDA_PATH/Library" - $env:CUDA_HOME = $env:CUDA_PATH - } - $env:PATH = "$env:CUDA_PATH/bin;" + $env:PATH - $directory = "$env:CUDA_PATH/lib/x64/" - if (-not (Test-Path -Path $directory)) { - New-Item -ItemType Directory -Path $directory - Write-Output "Directory '$directory' created." - } - cp $env:CUDA_PATH/lib/*.lib $env:CUDA_PATH/lib/x64/ - $env:INCLUDE =$env:CUDA_PATH + "/include/targets/x64;" + $env:INCLUDE - $env:INCLUDE =$env:CONDA_PREFIX + "/include;" + $env:INCLUDE - } - python -m pip install torch==${{ matrix.torch }} torchvision torchaudio --index-url https://download.pytorch.org/whl/cu${{ matrix.torch_cu }} - python -m pip install cpufeature build wheel ninja packaging setuptools - $env:KTRANSFORMERS_FORCE_BUILD = "TRUE" - $env:CPU_INSTRUCT = '${{ matrix.instruct }}' - $env:TORCH_CUDA_ARCH_LIST = '${{ matrix.cudaarch }}' - python -m build --no-isolation --verbose - - - - name: create Rlease dir - run: | - if ($IsWindows) { - $env:date = $(Get-Date -Format "yyyy-MM-dd") - New-Item -ItemType Directory -Force -Path "$Env:USERPROFILE\.ssh" - $Env:SSH_PATH = "$Env:USERPROFILE\.ssh\id_rsa" - Set-Content -Path $Env:SSH_PATH -Value "${{ secrets.SSH_PRIVATE_KEY }}" - (Get-Content -Path $Env:SSH_PATH).Replace("`r`n","`n") | Set-Content -Path $Env:SSH_PATH - chmod 600 $Env:SSH_PATH - } - if ($IsLinux) { - $env:date = $(date +%Y-%m-%d) - mkdir -p ~/.ssh/ - echo "${{ secrets.SSH_PRIVATE_KEY }}" > ~/.ssh/id_rsa - chmod 600 ~/.ssh/id_rsa - } - - ssh -p ${{ secrets.SSH_PORT }} -o StrictHostKeyChecking=no root@${{ secrets.SSH_SERVER }} "mkdir -p /mnt/data/release-$env:date" - scp -P ${{ secrets.SSH_PORT }} -o StrictHostKeyChecking=no dist/*.whl root@${{ secrets.SSH_SERVER }}:/mnt/data/release-$env:date/ \ No newline at end of file diff --git a/.github/workflows/release-fake-tag.yml b/.github/workflows/release-fake-tag.yml new file mode 100644 index 0000000..2d23cdc --- /dev/null +++ b/.github/workflows/release-fake-tag.yml @@ -0,0 +1,36 @@ +name: Release Fake Tag + +on: + push: + branches: + - main + paths: + - "version.py" + workflow_dispatch: + +permissions: + contents: write + +jobs: + publish: + if: github.repository == 'kvcache-ai/ktransformers' + runs-on: ubuntu-latest + environment: 'prod' + steps: + - name: Checkout repository + uses: actions/checkout@v4 + with: + token: ${{ secrets.GITHUB_TOKEN }} + + - name: Get version + id: get_version + run: | + version=$(cat version.py | grep '__version__' | cut -d'"' -f2) + echo "TAG=v$version" >> $GITHUB_OUTPUT + + - name: Create and push tag + run: | + git config user.name "ktransformers-bot" + git config user.email "ktransformers-bot@users.noreply.github.com" + git tag ${{ steps.get_version.outputs.TAG }} + git push origin ${{ steps.get_version.outputs.TAG }} diff --git a/.github/workflows/release-pypi.yml b/.github/workflows/release-pypi.yml new file mode 100644 index 0000000..3c1f7e4 --- /dev/null +++ b/.github/workflows/release-pypi.yml @@ -0,0 +1,163 @@ +name: Release to PyPI + +on: + push: + branches: + - main + paths: + - "version.py" + workflow_dispatch: + inputs: + test_pypi: + description: 'Publish to TestPyPI instead of PyPI (for testing)' + required: false + default: 'false' + type: choice + options: + - 'true' + - 'false' + +permissions: + contents: read + +jobs: + build-kt-kernel: + name: Build kt-kernel CPU-only (Python ${{ matrix.python-version }}) + runs-on: ubuntu-20.04 + strategy: + fail-fast: false + matrix: + python-version: ['3.10', '3.11', '3.12'] + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + with: + submodules: recursive + + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + + - name: Install system dependencies + run: | + sudo apt-get update + sudo apt-get install -y cmake libhwloc-dev pkg-config libnuma-dev + + - name: Install Python build tools + run: | + python -m pip install --upgrade pip + pip install build wheel setuptools + + - name: Build kt-kernel wheel (CPU-only, multi-variant) + working-directory: kt-kernel + env: + CPUINFER_BUILD_ALL_VARIANTS: '1' + CPUINFER_USE_CUDA: '0' + CPUINFER_BUILD_TYPE: 'Release' + CPUINFER_PARALLEL: '4' + CPUINFER_FORCE_REBUILD: '1' + run: | + echo "Building kt-kernel CPU-only with all CPU variants (AMX, AVX512, AVX2)" + python -m build --wheel --no-isolation -v + + - name: List generated wheels + working-directory: kt-kernel + run: | + echo "Generated wheels:" + ls -lh dist/ + + - name: Test wheel import + working-directory: kt-kernel + run: | + pip install dist/*.whl + python -c "import kt_kernel; print('✓ Import successful'); print(f'CPU variant detected: {kt_kernel.__cpu_variant__}'); print(f'Version: {kt_kernel.__version__}')" + + - name: Verify wheel contains all variants + working-directory: kt-kernel + run: | + echo "Checking wheel contents for CPU variants..." + python -m zipfile -l dist/*.whl | grep "_kt_kernel_ext_" || echo "ERROR: No variant .so files found!" + python -m zipfile -l dist/*.whl | grep "_kt_kernel_ext_amx.cpython" && echo "✓ AMX variant found" || echo "✗ AMX variant missing" + python -m zipfile -l dist/*.whl | grep "_kt_kernel_ext_avx512.cpython" && echo "✓ AVX512 variant found" || echo "✗ AVX512 variant missing" + python -m zipfile -l dist/*.whl | grep "_kt_kernel_ext_avx2.cpython" && echo "✓ AVX2 variant found" || echo "✗ AVX2 variant missing" + + - name: Upload wheel artifact + uses: actions/upload-artifact@v3 + with: + name: kt-kernel-wheels-py${{ matrix.python-version }} + path: kt-kernel/dist/*.whl + retention-days: 7 + + publish-pypi: + name: Publish to PyPI + needs: build-kt-kernel + runs-on: ubuntu-latest + if: github.repository == 'kvcache-ai/ktransformers' && github.ref == 'refs/heads/main' + environment: prod + permissions: + id-token: write # For trusted publishing (OIDC) + contents: read + + steps: + - name: Download all wheel artifacts + uses: actions/download-artifact@v3 + with: + path: artifacts/ + + - name: Organize wheels into dist/ + run: | + mkdir -p dist/ + find artifacts/ -name "*.whl" -exec cp {} dist/ \; + echo "Wheels to publish:" + ls -lh dist/ + + - name: Get version from wheel + id: get_version + run: | + # Extract version from first wheel filename + wheel_name=$(ls dist/*.whl | head -1 | xargs basename) + # Extract version (format: kt_kernel-X.Y.Z-...) + version=$(echo "$wheel_name" | sed 's/kt_kernel-\([0-9.]*\)-.*/\1/') + echo "VERSION=$version" >> $GITHUB_OUTPUT + echo "Publishing version: $version" + + - name: Publish to TestPyPI (if requested) + if: github.event.inputs.test_pypi == 'true' + uses: pypa/gh-action-pypi-publish@release/v1 + with: + repository-url: https://test.pypi.org/legacy/ + password: ${{ secrets.TEST_PYPI_API_TOKEN }} + skip-existing: true + print-hash: true + + - name: Publish to PyPI + if: github.event.inputs.test_pypi != 'true' + uses: pypa/gh-action-pypi-publish@release/v1 + with: + password: ${{ secrets.PYPI_API_TOKEN }} + skip-existing: true + print-hash: true + + - name: Create release summary + run: | + echo "## 🎉 kt-kernel v${{ steps.get_version.outputs.VERSION }} Published to PyPI" >> $GITHUB_STEP_SUMMARY + echo "" >> $GITHUB_STEP_SUMMARY + echo "### Installation" >> $GITHUB_STEP_SUMMARY + echo '```bash' >> $GITHUB_STEP_SUMMARY + echo "pip install kt-kernel==${{ steps.get_version.outputs.VERSION }}" >> $GITHUB_STEP_SUMMARY + echo '```' >> $GITHUB_STEP_SUMMARY + echo "" >> $GITHUB_STEP_SUMMARY + echo "### Published Wheels" >> $GITHUB_STEP_SUMMARY + echo "Total: $(ls -1 dist/*.whl | wc -l) wheels (3 Python versions: 3.10, 3.11, 3.12)" >> $GITHUB_STEP_SUMMARY + echo "" >> $GITHUB_STEP_SUMMARY + echo "### Features" >> $GITHUB_STEP_SUMMARY + echo "**CPU-only build with multi-variant support:**" >> $GITHUB_STEP_SUMMARY + echo "- ✅ AMX (Intel Sapphire Rapids+)" >> $GITHUB_STEP_SUMMARY + echo "- ✅ AVX512 (Intel Skylake-X/Ice Lake/Cascade Lake)" >> $GITHUB_STEP_SUMMARY + echo "- ✅ AVX2 (Maximum compatibility)" >> $GITHUB_STEP_SUMMARY + echo "" >> $GITHUB_STEP_SUMMARY + echo "**Runtime CPU detection:** Automatically selects the best variant for your CPU" >> $GITHUB_STEP_SUMMARY + echo "" >> $GITHUB_STEP_SUMMARY + echo "PyPI link: https://pypi.org/project/kt-kernel/#history" >> $GITHUB_STEP_SUMMARY diff --git a/.github/workflows/score.yml b/.github/workflows/score.yml deleted file mode 100644 index bf66b6b..0000000 --- a/.github/workflows/score.yml +++ /dev/null @@ -1,24 +0,0 @@ -name: Human Eval Score -run-name: Human Eval Score -on: workflow_dispatch -jobs: - Human-Eval-Score: - runs-on: self-hosted - steps: - - run: echo "🎉 The job was automatically triggered by a ${{ github.event_name }} event." - - run: echo "🔎 The name of your branch is ${{ github.ref }} and your repository is ${{ github.repository }}." - - name: Check out repository code - uses: actions/checkout@v4 - - run: echo "💡 The ${{ github.repository }} repository has been cloned to the runner." - - name: Human Eval Run - run: | - set -e - source /home/qujing3/anaconda3/etc/profile.d/conda.sh - conda activate ktransformers-dev - export PATH=/usr/local/cuda-12.4/bin:$PATH - export LD_LIBRARY_PATH=/usr/local/cuda-12.4/lib64:$LD_LIBRARY_PATH - export CUDA_HOME=/usr/local/cuda-12.4 - cd ${{ github.workspace }} - python ktransformers/tests/score.py - - - run: echo "This job's status is ${{ job.status }}." diff --git a/docker/Dockerfile b/docker/Dockerfile new file mode 100644 index 0000000..a2f464f --- /dev/null +++ b/docker/Dockerfile @@ -0,0 +1,408 @@ +ARG CUDA_VERSION=12.8.1 +FROM docker.1ms.run/nvidia/cuda:${CUDA_VERSION}-cudnn-devel-ubuntu24.04 AS base + +ARG TARGETARCH +ARG GRACE_BLACKWELL=0 +ARG HOPPER_SBO=0 +ARG CPU_VARIANT=x86-intel-multi +ARG BUILD_ALL_CPU_VARIANTS=1 + +# Proxy settings for build-time network access +ARG HTTP_PROXY +ARG HTTPS_PROXY +ARG http_proxy +ARG https_proxy +ENV HTTP_PROXY=${HTTP_PROXY} \ + HTTPS_PROXY=${HTTPS_PROXY} \ + http_proxy=${http_proxy} \ + https_proxy=${https_proxy} + +ARG GRACE_BLACKWELL_DEEPEP_BRANCH=gb200_blog_part_2 +ARG HOPPER_SBO_DEEPEP_COMMIT=9f2fc4b3182a51044ae7ecb6610f7c9c3258c4d6 +ARG DEEPEP_COMMIT=9af0e0d0e74f3577af1979c9b9e1ac2cad0104ee +ARG BUILD_AND_DOWNLOAD_PARALLEL=8 +ARG SGL_KERNEL_VERSION=0.3.19 +ARG SGL_VERSION=0.5.6.post1 +ARG USE_LATEST_SGLANG=0 +ARG GDRCOPY_VERSION=2.5.1 +ARG UBUNTU_MIRROR +ARG GITHUB_ARTIFACTORY=github.com +ARG FLASHINFER_VERSION=0.5.3 + +# ktransformers wheel version (cu128torch28 for CUDA 12.8 + PyTorch 2.8) +ARG KTRANSFORMERS_VERSION=0.4.2 +ARG KTRANSFORMERS_WHEEL=ktransformers-0.4.2+cu128torch28fancy-cp312-cp312-linux_x86_64.whl + +# flash_attn wheel for fine-tune env +ARG FLASH_ATTN_WHEEL=flash_attn-2.8.3+cu12torch2.8cxx11abiTRUE-cp312-cp312-linux_x86_64.whl + +ENV DEBIAN_FRONTEND=noninteractive \ + CUDA_HOME=/usr/local/cuda \ + GDRCOPY_HOME=/usr/src/gdrdrv-${GDRCOPY_VERSION}/ \ + FLASHINFER_VERSION=${FLASHINFER_VERSION} + +# Add GKE default lib and bin locations +ENV PATH="${PATH}:/usr/local/nvidia/bin" \ + LD_LIBRARY_PATH="${LD_LIBRARY_PATH}:/usr/local/nvidia/lib:/usr/local/nvidia/lib64" + +# Replace Ubuntu sources with Tsinghua mirror for Ubuntu 24.04 (noble) +RUN if [ -n "$UBUNTU_MIRROR" ]; then \ + echo "deb https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ noble main restricted universe multiverse" > /etc/apt/sources.list && \ + echo "deb https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ noble-updates main restricted universe multiverse" >> /etc/apt/sources.list && \ + echo "deb https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ noble-backports main restricted universe multiverse" >> /etc/apt/sources.list && \ + echo "deb http://security.ubuntu.com/ubuntu/ noble-security main restricted universe multiverse" >> /etc/apt/sources.list && \ + rm -f /etc/apt/sources.list.d/ubuntu.sources; \ +fi + +# Install system dependencies (organized by category for better caching) +RUN --mount=type=cache,target=/var/cache/apt,id=base-apt \ + echo 'tzdata tzdata/Areas select America' | debconf-set-selections \ + && echo 'tzdata tzdata/Zones/America select Los_Angeles' | debconf-set-selections \ + && apt-get update && apt-get install -y --no-install-recommends --allow-change-held-packages \ + # Core system utilities + tzdata \ + ca-certificates \ + software-properties-common \ + netcat-openbsd \ + kmod \ + unzip \ + openssh-server \ + curl \ + wget \ + lsof \ + locales \ + # Build essentials + build-essential \ + cmake \ + perl \ + patchelf \ + ccache \ + git \ + git-lfs \ + # MPI and NUMA + libopenmpi-dev \ + libnuma1 \ + libnuma-dev \ + numactl \ + # transformers multimodal VLM + ffmpeg \ + # InfiniBand/RDMA + libibverbs-dev \ + libibverbs1 \ + libibumad3 \ + librdmacm1 \ + libnl-3-200 \ + libnl-route-3-200 \ + libnl-route-3-dev \ + libnl-3-dev \ + ibverbs-providers \ + infiniband-diags \ + perftest \ + # Development libraries + libgoogle-glog-dev \ + libgtest-dev \ + libjsoncpp-dev \ + libunwind-dev \ + libboost-all-dev \ + libssl-dev \ + libgrpc-dev \ + libgrpc++-dev \ + libprotobuf-dev \ + protobuf-compiler \ + protobuf-compiler-grpc \ + pybind11-dev \ + libhiredis-dev \ + libcurl4-openssl-dev \ + libczmq4 \ + libczmq-dev \ + libfabric-dev \ + # Package building tools + devscripts \ + debhelper \ + fakeroot \ + dkms \ + check \ + libsubunit0 \ + libsubunit-dev \ + # Development tools + gdb \ + ninja-build \ + vim \ + tmux \ + htop \ + zsh \ + tree \ + less \ + rdma-core \ + # NCCL + libnccl2 \ + libnccl-dev \ + && rm -rf /var/lib/apt/lists/* \ + && apt-get clean + +# GDRCopy installation +RUN mkdir -p /tmp/gdrcopy && cd /tmp \ + && curl --retry 3 --retry-delay 2 -fsSL -o v${GDRCOPY_VERSION}.tar.gz \ + https://${GITHUB_ARTIFACTORY}/NVIDIA/gdrcopy/archive/refs/tags/v${GDRCOPY_VERSION}.tar.gz \ + && tar -xzf v${GDRCOPY_VERSION}.tar.gz && rm v${GDRCOPY_VERSION}.tar.gz \ + && cd gdrcopy-${GDRCOPY_VERSION}/packages \ + && CUDA=/usr/local/cuda ./build-deb-packages.sh \ + && dpkg -i gdrdrv-dkms_*.deb libgdrapi_*.deb gdrcopy-tests_*.deb gdrcopy_*.deb \ + && cd / && rm -rf /tmp/gdrcopy + +# Fix DeepEP IBGDA symlink +RUN ln -sf /usr/lib/$(uname -m)-linux-gnu/libmlx5.so.1 /usr/lib/$(uname -m)-linux-gnu/libmlx5.so + +# Set up locale +RUN locale-gen en_US.UTF-8 +ENV LANG=en_US.UTF-8 \ + LANGUAGE=en_US:en \ + LC_ALL=en_US.UTF-8 + +######################################################## +########## Install Miniconda ########################### +######################################################## + +RUN mkdir -p /opt/miniconda3 \ + && wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O /opt/miniconda3/miniconda.sh \ + && bash /opt/miniconda3/miniconda.sh -b -u -p /opt/miniconda3 \ + && rm /opt/miniconda3/miniconda.sh + +# Add conda to PATH +ENV PATH="/opt/miniconda3/bin:${PATH}" + +# Accept conda TOS +RUN conda tos accept --override-channels --channel https://repo.anaconda.com/pkgs/main \ + && conda tos accept --override-channels --channel https://repo.anaconda.com/pkgs/r + +# Configure conda to use Tsinghua mirror +RUN conda config --add channels https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/main \ + && conda config --add channels https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free \ + && conda config --set show_channel_urls yes + +######################################################## +########## Dual Conda Environment Setup ################ +######################################################## + +FROM base AS framework + +ARG CUDA_VERSION +ARG BUILD_AND_DOWNLOAD_PARALLEL +ARG SGL_KERNEL_VERSION +ARG SGL_VERSION +ARG USE_LATEST_SGLANG +ARG FLASHINFER_VERSION +ARG GRACE_BLACKWELL +ARG GRACE_BLACKWELL_DEEPEP_BRANCH +ARG HOPPER_SBO +ARG HOPPER_SBO_DEEPEP_COMMIT +ARG DEEPEP_COMMIT +ARG GITHUB_ARTIFACTORY +ARG KTRANSFORMERS_VERSION +ARG KTRANSFORMERS_WHEEL +ARG FLASH_ATTN_WHEEL + +WORKDIR /workspace + +# Create two conda environments with Python 3.12 +RUN conda create -n serve python=3.12 -y \ + && conda create -n fine-tune python=3.12 -y + +# Set pip mirror for both conda envs +RUN /opt/miniconda3/envs/serve/bin/pip config set global.index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple \ + && /opt/miniconda3/envs/fine-tune/bin/pip config set global.index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple + +# Clone repositories +# Use kvcache-ai/sglang fork with kimi_k2 branch +RUN git clone https://${GITHUB_ARTIFACTORY}/kvcache-ai/sglang.git /workspace/sglang \ + && cd /workspace/sglang && git checkout kimi_k2 + +RUN git clone --depth 1 https://${GITHUB_ARTIFACTORY}/hiyouga/LLaMA-Factory.git /workspace/LLaMA-Factory \ + && git clone --depth 1 https://${GITHUB_ARTIFACTORY}/kvcache-ai/ktransformers.git /workspace/ktransformers \ + && cd /workspace/ktransformers && git submodule update --init --recursive + +# Download ktransformers wheel and flash_attn wheel for fine-tune env +RUN curl --retry 3 --retry-delay 2 -fsSL -o /workspace/${KTRANSFORMERS_WHEEL} \ + https://${GITHUB_ARTIFACTORY}/kvcache-ai/ktransformers/releases/download/v${KTRANSFORMERS_VERSION}/${KTRANSFORMERS_WHEEL} \ + && curl --retry 3 --retry-delay 2 -fsSL -o /workspace/${FLASH_ATTN_WHEEL} \ + https://${GITHUB_ARTIFACTORY}/Dao-AILab/flash-attention/releases/download/v2.8.3/${FLASH_ATTN_WHEEL} + +######################################################## +# Environment 1: serve (sglang + kt-kernel) +######################################################## + +# Upgrade pip and install basic tools in serve env +RUN --mount=type=cache,target=/root/.cache/pip \ + /opt/miniconda3/envs/serve/bin/pip install --upgrade pip setuptools wheel html5lib six + +# Install sgl-kernel +RUN --mount=type=cache,target=/root/.cache/pip \ + case "$CUDA_VERSION" in \ + 12.6.1) CUINDEX=126 ;; \ + 12.8.1) CUINDEX=128 ;; \ + 12.9.1) CUINDEX=129 ;; \ + 13.0.1) CUINDEX=130 ;; \ + *) echo "Unsupported CUDA version: $CUDA_VERSION" && exit 1 ;; \ + esac \ + && if [ "$CUDA_VERSION" = "12.6.1" ]; then \ + /opt/miniconda3/envs/serve/bin/pip install https://${GITHUB_ARTIFACTORY}/sgl-project/whl/releases/download/v${SGL_KERNEL_VERSION}/sgl_kernel-${SGL_KERNEL_VERSION}+cu124-cp310-abi3-manylinux2014_$(uname -m).whl --force-reinstall --no-deps \ + ; \ + elif [ "$CUDA_VERSION" = "12.8.1" ] || [ "$CUDA_VERSION" = "12.9.1" ]; then \ + /opt/miniconda3/envs/serve/bin/pip install sgl-kernel==${SGL_KERNEL_VERSION} \ + ; \ + elif [ "$CUDA_VERSION" = "13.0.1" ]; then \ + /opt/miniconda3/envs/serve/bin/pip install https://github.com/sgl-project/whl/releases/download/v${SGL_KERNEL_VERSION}/sgl_kernel-${SGL_KERNEL_VERSION}+cu130-cp310-abi3-manylinux2014_$(uname -m).whl --force-reinstall --no-deps \ + ; \ + fi + +# Install SGLang in serve env +RUN --mount=type=cache,target=/root/.cache/pip \ + case "$CUDA_VERSION" in \ + 12.6.1) CUINDEX=126 ;; \ + 12.8.1) CUINDEX=128 ;; \ + 12.9.1) CUINDEX=129 ;; \ + 13.0.1) CUINDEX=130 ;; \ + esac \ + && cd /workspace/sglang \ + && /opt/miniconda3/envs/serve/bin/pip install -e "python[all]" --extra-index-url https://download.pytorch.org/whl/cu${CUINDEX} + +# Download FlashInfer cubin for serve env +RUN --mount=type=cache,target=/root/.cache/pip \ + FLASHINFER_CUBIN_DOWNLOAD_THREADS=${BUILD_AND_DOWNLOAD_PARALLEL} FLASHINFER_LOGGING_LEVEL=warning \ + /opt/miniconda3/envs/serve/bin/python -m flashinfer --download-cubin + +# Install DeepEP in serve env +RUN set -eux; \ + if [ "$GRACE_BLACKWELL" = "1" ]; then \ + git clone https://github.com/fzyzcjy/DeepEP.git /workspace/DeepEP && \ + cd /workspace/DeepEP && \ + git checkout ${GRACE_BLACKWELL_DEEPEP_BRANCH} && \ + sed -i 's/#define NUM_CPU_TIMEOUT_SECS 100/#define NUM_CPU_TIMEOUT_SECS 1000/' csrc/kernels/configs.cuh; \ + elif [ "$HOPPER_SBO" = "1" ]; then \ + git clone https://github.com/deepseek-ai/DeepEP.git -b antgroup-opt /workspace/DeepEP && \ + cd /workspace/DeepEP && \ + git checkout ${HOPPER_SBO_DEEPEP_COMMIT} && \ + sed -i 's/#define NUM_CPU_TIMEOUT_SECS 100/#define NUM_CPU_TIMEOUT_SECS 1000/' csrc/kernels/configs.cuh; \ + else \ + curl --retry 3 --retry-delay 2 -fsSL -o /tmp/${DEEPEP_COMMIT}.zip \ + https://${GITHUB_ARTIFACTORY}/deepseek-ai/DeepEP/archive/${DEEPEP_COMMIT}.zip && \ + unzip -q /tmp/${DEEPEP_COMMIT}.zip -d /tmp && rm /tmp/${DEEPEP_COMMIT}.zip && \ + mv /tmp/DeepEP-${DEEPEP_COMMIT} /workspace/DeepEP && \ + cd /workspace/DeepEP && \ + sed -i 's/#define NUM_CPU_TIMEOUT_SECS 100/#define NUM_CPU_TIMEOUT_SECS 1000/' csrc/kernels/configs.cuh; \ + fi + +RUN --mount=type=cache,target=/root/.cache/pip \ + cd /workspace/DeepEP && \ + case "$CUDA_VERSION" in \ + 12.6.1) CHOSEN_TORCH_CUDA_ARCH_LIST='9.0' ;; \ + 12.8.1) CHOSEN_TORCH_CUDA_ARCH_LIST='9.0;10.0' ;; \ + 12.9.1|13.0.1) CHOSEN_TORCH_CUDA_ARCH_LIST='9.0;10.0;10.3' ;; \ + *) echo "Unsupported CUDA version: $CUDA_VERSION" && exit 1 ;; \ + esac && \ + . /opt/miniconda3/etc/profile.d/conda.sh && conda activate serve && \ + TORCH_CUDA_ARCH_LIST="${CHOSEN_TORCH_CUDA_ARCH_LIST}" MAX_JOBS=${BUILD_AND_DOWNLOAD_PARALLEL} \ + pip install --no-build-isolation . + +# Install NCCL for serve env +RUN --mount=type=cache,target=/root/.cache/pip \ + if [ "${CUDA_VERSION%%.*}" = "12" ]; then \ + /opt/miniconda3/envs/serve/bin/pip install nvidia-nccl-cu12==2.28.3 --force-reinstall --no-deps ; \ + elif [ "${CUDA_VERSION%%.*}" = "13" ]; then \ + /opt/miniconda3/envs/serve/bin/pip install nvidia-nccl-cu13==2.28.3 --force-reinstall --no-deps ; \ + fi + +# Install kt-kernel in serve env with all CPU variants +RUN . /opt/miniconda3/etc/profile.d/conda.sh && conda activate serve \ + && cd /workspace/ktransformers/kt-kernel \ + && CPUINFER_BUILD_ALL_VARIANTS=1 ./install.sh build + +######################################################## +# Environment 2: fine-tune (LLaMA-Factory + ktransformers) +######################################################## + +# Install dependency libraries for ktransformers (CUDA 11.8 runtime required) +RUN conda install -n fine-tune -y -c conda-forge libstdcxx-ng gcc_impl_linux-64 \ + && conda install -n fine-tune -y -c nvidia/label/cuda-11.8.0 cuda-runtime + +# Install PyTorch 2.8 in fine-tune env +RUN --mount=type=cache,target=/root/.cache/pip \ + case "$CUDA_VERSION" in \ + 12.6.1) CUINDEX=126 ;; \ + 12.8.1) CUINDEX=128 ;; \ + 12.9.1) CUINDEX=129 ;; \ + 13.0.1) CUINDEX=130 ;; \ + esac \ + && /opt/miniconda3/envs/fine-tune/bin/pip install --upgrade pip setuptools wheel \ + && /opt/miniconda3/envs/fine-tune/bin/pip install \ + torch==2.8.0 \ + torchvision \ + torchaudio \ + --extra-index-url https://download.pytorch.org/whl/cu${CUINDEX} + +# Install LLaMA-Factory in fine-tune env +RUN --mount=type=cache,target=/root/.cache/pip \ + cd /workspace/LLaMA-Factory \ + && /opt/miniconda3/envs/fine-tune/bin/pip install -e ".[torch,metrics]" --no-build-isolation + +# Install ktransformers wheel in fine-tune env +RUN --mount=type=cache,target=/root/.cache/pip \ + /opt/miniconda3/envs/fine-tune/bin/pip install /workspace/${KTRANSFORMERS_WHEEL} + +# Install flash_attn wheel in fine-tune env +RUN --mount=type=cache,target=/root/.cache/pip \ + /opt/miniconda3/envs/fine-tune/bin/pip install /workspace/${FLASH_ATTN_WHEEL} + +# Install NCCL for fine-tune env +RUN --mount=type=cache,target=/root/.cache/pip \ + if [ "${CUDA_VERSION%%.*}" = "12" ]; then \ + /opt/miniconda3/envs/fine-tune/bin/pip install nvidia-nccl-cu12==2.28.3 --force-reinstall --no-deps ; \ + elif [ "${CUDA_VERSION%%.*}" = "13" ]; then \ + /opt/miniconda3/envs/fine-tune/bin/pip install nvidia-nccl-cu13==2.28.3 --force-reinstall --no-deps ; \ + fi + +######################################################## +# Cleanup and final setup +######################################################## + +# Clean up downloaded wheels +RUN rm -f /workspace/${KTRANSFORMERS_WHEEL} /workspace/${FLASH_ATTN_WHEEL} + +# Initialize conda for bash +RUN /opt/miniconda3/bin/conda init bash + +# Create shell aliases for convenience +RUN echo '\n# Conda environment aliases\nalias serve="conda activate serve"\nalias finetune="conda activate fine-tune"' >> /root/.bashrc + +######################################################## +# Extract version information for image naming +######################################################## + +# Extract versions from each component and save to versions.env +RUN set -x && \ + # SGLang version (from version.py file) + cd /workspace/sglang/python/sglang && \ + SGLANG_VERSION=$(python3 -c "exec(open('version.py').read()); print(__version__)" 2>/dev/null || echo "unknown") && \ + echo "SGLANG_VERSION=$SGLANG_VERSION" > /workspace/versions.env && \ + echo "Extracted SGLang version: $SGLANG_VERSION" && \ + \ + # KTransformers version (from version.py in repo) + cd /workspace/ktransformers && \ + KTRANSFORMERS_VERSION=$(python3 -c "exec(open('version.py').read()); print(__version__)" 2>/dev/null || echo "unknown") && \ + echo "KTRANSFORMERS_VERSION=$KTRANSFORMERS_VERSION" >> /workspace/versions.env && \ + echo "Extracted KTransformers version: $KTRANSFORMERS_VERSION" && \ + \ + # LLaMA-Factory version (from fine-tune environment) + . /opt/miniconda3/etc/profile.d/conda.sh && conda activate fine-tune && \ + cd /workspace/LLaMA-Factory && \ + LLAMAFACTORY_VERSION=$(python -c "import sys; sys.path.insert(0, 'src'); from llamafactory import __version__; print(__version__)" 2>/dev/null || echo "unknown") && \ + echo "LLAMAFACTORY_VERSION=$LLAMAFACTORY_VERSION" >> /workspace/versions.env && \ + echo "Extracted LLaMA-Factory version: $LLAMAFACTORY_VERSION" && \ + \ + # Display all versions + echo "=== Version Summary ===" && \ + cat /workspace/versions.env + +WORKDIR /workspace + +CMD ["/bin/bash"] diff --git a/docker/README-packaging.md b/docker/README-packaging.md new file mode 100644 index 0000000..afe7e77 --- /dev/null +++ b/docker/README-packaging.md @@ -0,0 +1,387 @@ +# KTransformers Docker Packaging Guide + +This directory contains scripts for building and distributing KTransformers Docker images with standardized naming conventions. + +## Overview + +The packaging system provides: + +- **Automated version detection** from sglang, ktransformers, and LLaMA-Factory +- **Multi-CPU variant support** (AMX, AVX512, AVX2) with runtime auto-detection +- **Standardized naming convention** for easy identification and management +- **Two distribution methods**: + - Local tar file export for offline distribution + - DockerHub publishing for online distribution + +## Naming Convention + +Docker images follow this naming pattern: + +``` +sglang-v{sglang版本}_ktransformers-v{ktransformers版本}_{cpu信息}_{gpu信息}_{功能模式}_{时间戳} +``` + +### Example Names + +**Tar file:** +``` +sglang-v0.5.6_ktransformers-v0.4.3_x86-intel-multi_cu128_sft_llamafactory-v0.9.3_20241212143022.tar +``` + +**DockerHub tags:** +``` +Full tag: +kvcache/ktransformers:sglang-v0.5.6_ktransformers-v0.4.3_x86-intel-multi_cu128_sft_llamafactory-v0.9.3_20241212143022 + +Simplified tag: +kvcache/ktransformers:v0.4.3-cu128 +``` + +### Name Components + +| Component | Description | Example | +|-----------|-------------|---------| +| sglang version | SGLang package version | `v0.5.6` | +| ktransformers version | KTransformers version | `v0.4.3` | +| cpu info | CPU instruction set support | `x86-intel-multi` (includes AMX/AVX512/AVX2) | +| gpu info | CUDA version | `cu128` (CUDA 12.8) | +| functionality | Feature mode | `sft_llamafactory-v0.9.3` or `infer` | +| timestamp | Build time (Beijing/UTC+8) | `20241212143022` | + +## Files + +| File | Purpose | +|------|---------| +| `Dockerfile` | Main Dockerfile with multi-CPU build and version extraction | +| `docker-utils.sh` | Shared utility functions for both scripts | +| `build-docker-tar.sh` | Build and export Docker image to tar file | +| `push-to-dockerhub.sh` | Build and push Docker image to DockerHub | + +## Prerequisites + +- Docker installed and running +- For DockerHub push: Docker Hub account and login (`docker login`) +- Sufficient disk space (at least 20GB recommended) +- Internet access (or local mirrors configured) + +## Quick Start + +### Build Local Tar File + +```bash +cd docker + +# Basic build +./build-docker-tar.sh + +# With specific CUDA version and mirror +./build-docker-tar.sh \ + --cuda-version 12.8.1 \ + --ubuntu-mirror 1 + +# With proxy +./build-docker-tar.sh \ + --cuda-version 12.8.1 \ + --ubuntu-mirror 1 \ + --http-proxy "http://127.0.0.1:16981" \ + --https-proxy "http://127.0.0.1:16981" \ + --output-dir /path/to/output +``` + +### Push to DockerHub + +```bash +cd docker + +# Basic push (requires --repository) +./push-to-dockerhub.sh \ + --repository kvcache/ktransformers + +# With simplified tag +./push-to-dockerhub.sh \ + --cuda-version 12.8.1 \ + --repository kvcache/ktransformers \ + --also-push-simplified + +# Skip build if image exists +./push-to-dockerhub.sh \ + --repository kvcache/ktransformers \ + --skip-build +``` + +## Script Options + +### build-docker-tar.sh + +``` +Build Configuration: + --cuda-version VERSION CUDA version (default: 12.8.1) + --ubuntu-mirror 0|1 Use Tsinghua mirror (default: 0) + --http-proxy URL HTTP proxy URL + --https-proxy URL HTTPS proxy URL + --cpu-variant VARIANT CPU variant (default: x86-intel-multi) + --functionality TYPE Mode: sft or infer (default: sft) + +Paths: + --dockerfile PATH Path to Dockerfile (default: ./Dockerfile) + --context-dir PATH Build context directory (default: .) + --output-dir PATH Output directory for tar (default: .) + +Options: + --dry-run Preview without building + --keep-image Keep Docker image after export + --build-arg KEY=VALUE Additional build arguments + -h, --help Show help message +``` + +### push-to-dockerhub.sh + +``` +All options from build-docker-tar.sh, plus: + +Registry Settings: + --registry REGISTRY Docker registry (default: docker.io) + --repository REPO Repository name (REQUIRED) + +Options: + --skip-build Skip build if image exists + --also-push-simplified Also push simplified tag + --max-retries N Max push retries (default: 3) + --retry-delay SECONDS Delay between retries (default: 5) +``` + +## Usage Examples + +### Example 1: Local Development Build + +For testing on your local machine: + +```bash +./build-docker-tar.sh \ + --cuda-version 12.8.1 \ + --output-dir ./builds \ + --keep-image +``` + +This will: +1. Build the Docker image +2. Export to tar in `./builds/` directory +3. Keep the Docker image for local testing + +### Example 2: Production Build for Distribution + +For creating a production build with mirrors and proxy: + +```bash +./build-docker-tar.sh \ + --cuda-version 12.8.1 \ + --ubuntu-mirror 1 \ + --http-proxy "http://127.0.0.1:16981" \ + --https-proxy "http://127.0.0.1:16981" \ + --output-dir /mnt/data/releases +``` + +### Example 3: Publish to DockerHub + +For publishing to DockerHub: + +```bash +# First, login to Docker Hub +docker login + +# Then push +./push-to-dockerhub.sh \ + --cuda-version 12.8.1 \ + --repository kvcache/ktransformers \ + --also-push-simplified +``` + +This creates two tags: +- Full: `kvcache/ktransformers:sglang-v0.5.6_ktransformers-v0.4.3_x86-intel-multi_cu128_sft_llamafactory-v0.9.3_20241212143022` +- Simplified: `kvcache/ktransformers:v0.4.3-cu128` + +### Example 4: Dry Run + +Preview the build without actually building: + +```bash +./build-docker-tar.sh --cuda-version 12.8.1 --dry-run +``` + +### Example 5: Custom Build Arguments + +Pass additional Docker build arguments: + +```bash +./build-docker-tar.sh \ + --cuda-version 12.8.1 \ + --build-arg SGL_VERSION=0.5.7 \ + --build-arg FLASHINFER_VERSION=0.5.4 +``` + +## Using the Built Images + +### Load from Tar File + +```bash +# Load the image +docker load -i sglang-v0.5.6_ktransformers-v0.4.3_x86-intel-multi_cu128_sft_llamafactory-v0.9.3_20241212143022.tar + +# Run the container +docker run -it --rm \ + --gpus all \ + sglang-v0.5.6_ktransformers-v0.4.3_x86-intel-multi_cu128_sft_llamafactory-v0.9.3_20241212143022 \ + /bin/bash +``` + +### Pull from DockerHub + +```bash +# Pull with full tag +docker pull kvcache/ktransformers:sglang-v0.5.6_ktransformers-v0.4.3_x86-intel-multi_cu128_sft_llamafactory-v0.9.3_20241212143022 + +# Or pull with simplified tag +docker pull kvcache/ktransformers:v0.4.3-cu128 + +# Run the container +docker run -it --rm \ + --gpus all \ + kvcache/ktransformers:v0.4.3-cu128 \ + /bin/bash +``` + +### Inside the Container + +The image contains two conda environments: + +```bash +# Activate serve environment (for inference with sglang) +conda activate serve +# or use the alias: +serve + +# Activate fine-tune environment (for training with LLaMA-Factory) +conda activate fine-tune +# or use the alias: +finetune +``` + +## Multi-CPU Variant Support + +The Docker image includes all three CPU variants: +- **AMX** - For Intel Sapphire Rapids and newer (4th Gen Xeon+) +- **AVX512** - For Intel Skylake-X, Ice Lake, Cascade Lake +- **AVX2** - Maximum compatibility for older CPUs + +The runtime automatically detects your CPU and loads the appropriate variant. To override: + +```bash +# Force use of AVX2 variant +export KT_KERNEL_CPU_VARIANT=avx2 +python your_script.py + +# Enable debug output to see which variant is loaded +export KT_KERNEL_DEBUG=1 +python your_script.py +``` + +## Version Extraction + +Versions are automatically extracted during Docker build from: + +- **SGLang**: From `sglang.__version__` in serve environment +- **KTransformers**: From `version.py` in ktransformers repository +- **LLaMA-Factory**: From `llamafactory.__version__` in fine-tune environment + +The versions are saved to `/workspace/versions.env` in the image: + +```bash +# View versions in running container +cat /workspace/versions.env + +# Output: +SGLANG_VERSION=0.5.6 +KTRANSFORMERS_VERSION=0.4.3 +LLAMAFACTORY_VERSION=0.9.3 +``` + +## Troubleshooting + +### Build Fails with Out of Disk Space + +Check available disk space: +```bash +df -h +``` + +The build requires approximately 15-20GB of disk space. Clean up Docker: +```bash +docker system prune -a +``` + +### Version Extraction Fails + +If version extraction fails (shows "unknown"), check: + +1. The cloned repositories have the correct branches +2. Python packages are properly installed in conda environments +3. Version files exist in expected locations + +You can manually verify by running: +```bash +docker run --rm /bin/bash -c " + source /opt/miniconda3/etc/profile.d/conda.sh && + conda activate serve && + python -c 'import sglang; print(sglang.__version__)' +" +``` + +### Push to DockerHub Fails + +1. **Check login**: `docker login` +2. **Check repository name**: Must include namespace (e.g., `kvcache/ktransformers`, not just `ktransformers`) +3. **Network issues**: Use `--max-retries` and `--retry-delay` options +4. **Rate limiting**: DockerHub has pull/push rate limits for free accounts + +## Advanced Topics + +### Custom Dockerfile Location + +```bash +./build-docker-tar.sh \ + --dockerfile /path/to/custom/Dockerfile \ + --context-dir /path/to/build/context +``` + +### Building Only Inference Image (Future) + +Currently, the image always includes both serve and fine-tune environments. To create an inference-only image, modify the Dockerfile to skip the fine-tune environment section. + +### Customizing CPU Variants + +To build only specific CPU variants, modify `kt-kernel/install.sh` or set environment variables in the Dockerfile. + +### CI/CD Integration + +The scripts are designed for manual execution but can be integrated into CI/CD pipelines: + +```yaml +# Example GitHub Actions workflow +- name: Build and push Docker image + run: | + cd docker + ./push-to-dockerhub.sh \ + --cuda-version ${{ matrix.cuda_version }} \ + --repository ${{ secrets.DOCKER_REPOSITORY }} \ + --also-push-simplified +``` + +## Support + +For issues and questions: +- File an issue at: https://github.com/kvcache-ai/ktransformers/issues +- Check documentation: https://github.com/kvcache-ai/ktransformers + +## License + +This packaging system is part of KTransformers and follows the same license. diff --git a/docker/build-docker-tar.sh b/docker/build-docker-tar.sh new file mode 100755 index 0000000..2673074 --- /dev/null +++ b/docker/build-docker-tar.sh @@ -0,0 +1,498 @@ +#!/usr/bin/env bash +# +# build-docker-tar.sh - Build Docker image and export to tar file +# +# This script builds a Docker image for ktransformers with standardized naming +# and exports it to a tar file for distribution. +# +# Features: +# - Automatic version detection from built image +# - Standardized naming convention +# - Multi-CPU variant support (AMX/AVX512/AVX2) +# - Configurable build parameters +# - Comprehensive error handling +# +# Usage: +# ./build-docker-tar.sh [OPTIONS] +# +# Example: +# ./build-docker-tar.sh \ +# --cuda-version 12.8.1 \ +# --ubuntu-mirror 1 \ +# --http-proxy "http://127.0.0.1:16981" \ +# --output-dir /path/to/output + +set -euo pipefail + +# Get script directory +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" + +# Source utility functions +# shellcheck source=docker-utils.sh +source "$SCRIPT_DIR/docker-utils.sh" + +################################################################################ +# Default Configuration +################################################################################ + +# Build parameters +CUDA_VERSION="12.8.1" +UBUNTU_MIRROR="0" +HTTP_PROXY="" +HTTPS_PROXY="" +CPU_VARIANT="x86-intel-multi" +FUNCTIONALITY="sft" + +# Paths +DOCKERFILE="$SCRIPT_DIR/Dockerfile" +CONTEXT_DIR="$SCRIPT_DIR" +OUTPUT_DIR="." + +# Options +DRY_RUN=false +KEEP_IMAGE=false +EXTRA_BUILD_ARGS=() + +################################################################################ +# Help Message +################################################################################ + +usage() { + cat <&2 + log_info "Temporary tag: $temp_tag" >&2 + + # Prepare build arguments + local build_args=() + build_args+=("--build-arg" "CUDA_VERSION=$CUDA_VERSION") + build_args+=("--build-arg" "UBUNTU_MIRROR=$UBUNTU_MIRROR") + build_args+=("--build-arg" "CPU_VARIANT=$CPU_VARIANT") + build_args+=("--build-arg" "BUILD_ALL_CPU_VARIANTS=1") + + # Add proxy settings if provided + if [ -n "$HTTP_PROXY" ]; then + build_args+=("--build-arg" "HTTP_PROXY=$HTTP_PROXY") + fi + if [ -n "$HTTPS_PROXY" ]; then + build_args+=("--build-arg" "HTTPS_PROXY=$HTTPS_PROXY") + fi + + # Add extra build args + build_args+=("${EXTRA_BUILD_ARGS[@]}") + + # Add network host + build_args+=("--network" "host") + + # Build command + local build_cmd=( + docker build + -f "$DOCKERFILE" + "${build_args[@]}" + -t "$temp_tag" + "$CONTEXT_DIR" + ) + + # Display build command + { + log_info "Build command:" + printf ' %s \\\n' "${build_cmd[@]:0:${#build_cmd[@]}-1}" + printf ' %s\n' "${build_cmd[-1]}" + } >&2 + + if [ "$DRY_RUN" = true ]; then + log_warning "DRY RUN: Skipping actual build" >&2 + echo "$temp_tag" + return 0 + fi + + # Execute build + log_info "Starting Docker build (this may take 30-60 minutes)..." >&2 + if "${build_cmd[@]}" >&2; then + log_success "Docker image built successfully" >&2 + echo "$temp_tag" + else + log_error "Docker build failed" >&2 + exit 1 + fi +} + +################################################################################ +# Extract Versions and Generate Name +################################################################################ + +generate_tar_name() { + local image_tag="$1" + local timestamp="$2" + + if [ "$DRY_RUN" = true ]; then + log_warning "DRY RUN: Using placeholder versions" + # Use placeholder versions for dry run + local versions="SGLANG_VERSION=0.5.6 +KTRANSFORMERS_VERSION=0.4.3 +LLAMAFACTORY_VERSION=0.9.3" + else + # Extract versions from image + local versions + versions=$(extract_versions_from_image "$image_tag") + + if [ $? -ne 0 ]; then + log_error "Failed to extract versions from image" + exit 1 + fi + + # Validate versions + if ! validate_versions "$versions"; then + log_error "Version validation failed" + exit 1 + fi + fi + + # Generate standardized image name + local tar_name + tar_name=$(generate_image_name "$versions" "$CUDA_VERSION" "$CPU_VARIANT" "$FUNCTIONALITY" "$timestamp") + + if [ -z "$tar_name" ]; then + log_error "Failed to generate image name" + exit 1 + fi + + echo "$tar_name" +} + +################################################################################ +# Export to Tar +################################################################################ + +export_to_tar() { + local image_tag="$1" + local tar_name="$2" + local tar_path="$OUTPUT_DIR/${tar_name}.tar" + + log_step "Exporting image to tar file" >&2 + log_info "Output: $tar_path" >&2 + + if [ "$DRY_RUN" = true ]; then + log_warning "DRY RUN: Skipping actual export" >&2 + return 0 + fi + + # Check if tar file already exists + if [ -f "$tar_path" ]; then + log_warning "Tar file already exists: $tar_path" >&2 + read -p "Overwrite? (y/N) " -n 1 -r + echo + if [[ ! $REPLY =~ ^[Yy]$ ]]; then + log_error "Export cancelled by user" >&2 + exit 1 + fi + rm -f "$tar_path" + fi + + # Tag image with the standardized name before saving + log_info "Tagging image with standardized name: $tar_name" >&2 + if ! docker tag "$image_tag" "$tar_name"; then + log_error "Failed to tag image" >&2 + exit 1 + fi + + # Export image with the standardized tag + log_info "Exporting image (this may take several minutes)..." >&2 + if docker save -o "$tar_path" "$tar_name"; then + log_success "Image exported successfully" >&2 + + # Get file size + local size + size=$(du -h "$tar_path" | cut -f1) + log_info "Tar file size: $size" >&2 + else + log_error "Failed to export image" >&2 + exit 1 + fi + + echo "$tar_path" +} + +################################################################################ +# Cleanup +################################################################################ + +cleanup() { + local image_tag="$1" + + if [ "$KEEP_IMAGE" = true ]; then + log_info "Keeping Docker image as requested: $image_tag" + else + cleanup_temp_images "$image_tag" + fi +} + +################################################################################ +# Main +################################################################################ + +main() { + log_step "KTransformers Docker Image Build and Export" + + # Parse arguments + parse_args "$@" + + # Validate configuration + validate_config + + # Generate timestamp + TIMESTAMP=$(get_beijing_timestamp) + log_info "Build timestamp: $TIMESTAMP" + + # Display configuration + display_summary "Build Configuration" \ + "CUDA Version: $CUDA_VERSION" \ + "Ubuntu Mirror: $UBUNTU_MIRROR" \ + "CPU Variant: $CPU_VARIANT" \ + "Functionality: $FUNCTIONALITY" \ + "HTTP Proxy: ${HTTP_PROXY:-}" \ + "HTTPS Proxy: ${HTTPS_PROXY:-}" \ + "Dockerfile: $DOCKERFILE" \ + "Context Dir: $CONTEXT_DIR" \ + "Output Dir: $OUTPUT_DIR" \ + "Timestamp: $TIMESTAMP" \ + "Dry Run: $DRY_RUN" + + # Build image + TEMP_TAG=$(build_image) + + # Generate tar name + TAR_NAME=$(generate_tar_name "$TEMP_TAG" "$TIMESTAMP") + log_info "Generated tar name: $TAR_NAME.tar" + + if [ "$DRY_RUN" = true ]; then + # Display dry-run summary + display_summary "DRY RUN Preview" \ + "This is what would be built:" \ + "" \ + "Temporary Docker tag: $TEMP_TAG" \ + "Tar filename: $TAR_NAME.tar" \ + "Output path: $OUTPUT_DIR/$TAR_NAME.tar" \ + "" \ + "After build, you would run:" \ + " docker load -i $OUTPUT_DIR/$TAR_NAME.tar" \ + " docker run -it --rm ${TAR_NAME} /bin/bash" + + log_success "DRY RUN: Preview complete. Remove --dry-run to build." + exit 0 + fi + + # Export to tar + TAR_PATH=$(export_to_tar "$TEMP_TAG" "$TAR_NAME") + + # Cleanup + cleanup "$TEMP_TAG" + + # Display summary + display_summary "Build Complete" \ + "Docker Image: $TEMP_TAG ($([ "$KEEP_IMAGE" = true ] && echo "kept" || echo "removed"))" \ + "Tar File: $TAR_PATH" \ + "" \ + "To load the image:" \ + " docker load -i $TAR_PATH" \ + "" \ + "To run the container:" \ + " docker run -it --rm ${TAR_NAME} /bin/bash" + + log_success "All done!" +} + +# Run main function +main "$@" diff --git a/docker/docker-utils.sh b/docker/docker-utils.sh new file mode 100755 index 0000000..988aac3 --- /dev/null +++ b/docker/docker-utils.sh @@ -0,0 +1,372 @@ +#!/usr/bin/env bash +# +# docker-utils.sh - Shared utility functions for Docker image build and publish scripts +# +# This script provides common functions for: +# - Timestamp generation (Beijing timezone) +# - Version extraction from Docker images +# - Image name generation following naming conventions +# - Colored logging +# - Validation and error handling +# +# Usage: source docker-utils.sh + +set -euo pipefail + +# Color codes for logging +COLOR_RED='\033[0;31m' +COLOR_GREEN='\033[0;32m' +COLOR_YELLOW='\033[1;33m' +COLOR_BLUE='\033[0;34m' +COLOR_CYAN='\033[0;36m' +COLOR_RESET='\033[0m' + +################################################################################ +# Logging Functions +################################################################################ + +log_info() { + echo -e "${COLOR_BLUE}[INFO]${COLOR_RESET} $*" +} + +log_success() { + echo -e "${COLOR_GREEN}[SUCCESS]${COLOR_RESET} $*" +} + +log_warning() { + echo -e "${COLOR_YELLOW}[WARNING]${COLOR_RESET} $*" +} + +log_error() { + echo -e "${COLOR_RED}[ERROR]${COLOR_RESET} $*" >&2 +} + +log_step() { + echo -e "\n${COLOR_CYAN}==>${COLOR_RESET} $*" +} + +################################################################################ +# Timestamp Functions +################################################################################ + +# Generate timestamp in Beijing timezone (UTC+8) +# Format: YYYYMMDDHHMMSS +# Example: 20241212143022 +get_beijing_timestamp() { + # Try to use TZ environment variable approach + if date --version &>/dev/null 2>&1; then + # GNU date (Linux) + TZ='Asia/Shanghai' date '+%Y%m%d%H%M%S' + else + # BSD date (macOS) + TZ='Asia/Shanghai' date '+%Y%m%d%H%M%S' + fi +} + +################################################################################ +# CUDA Version Parsing +################################################################################ + +# Parse CUDA version to short format +# Input: 12.8.1 or 12.8 or 13.0.1 +# Output: cu128 or cu130 +parse_cuda_short_version() { + local cuda_version="$1" + + # Extract major and minor version + local major minor + major=$(echo "$cuda_version" | cut -d. -f1) + minor=$(echo "$cuda_version" | cut -d. -f2) + + # Validate + if [[ ! "$major" =~ ^[0-9]+$ ]] || [[ ! "$minor" =~ ^[0-9]+$ ]]; then + log_error "Invalid CUDA version format: $cuda_version" + log_error "Expected format: X.Y.Z (e.g., 12.8.1)" + return 1 + fi + + echo "cu${major}${minor}" +} + +################################################################################ +# Version Extraction +################################################################################ + +# Extract versions from built Docker image +# Input: image tag (e.g., ktransformers:temp-build-20241212) +# Output: Sets environment variables or prints to stdout +# SGLANG_VERSION=x.y.z +# KTRANSFORMERS_VERSION=x.y.z +# LLAMAFACTORY_VERSION=x.y.z +extract_versions_from_image() { + local image_tag="$1" + + log_step "Extracting versions from image: $image_tag" + + # Check if image exists + if ! docker image inspect "$image_tag" &>/dev/null; then + log_error "Image not found: $image_tag" + return 1 + fi + + # Extract versions.env file from the image + local versions_content + versions_content=$(docker run --rm "$image_tag" cat /workspace/versions.env 2>/dev/null) + + if [ -z "$versions_content" ]; then + log_error "Failed to extract versions from image" + log_error "The /workspace/versions.env file may not exist in the image" + return 1 + fi + + # Parse and display versions + log_info "Extracted versions:" + echo "$versions_content" | while IFS= read -r line; do + log_info " $line" + done + + # Output the content (caller can parse this or eval it) + echo "$versions_content" +} + +# Validate that all required versions were extracted +# Input: versions string (output from extract_versions_from_image) +validate_versions() { + local versions="$1" + local all_valid=true + + # Check each required version + for var in SGLANG_VERSION KTRANSFORMERS_VERSION LLAMAFACTORY_VERSION; do + local value + value=$(echo "$versions" | grep "^${var}=" | cut -d= -f2) + + if [ -z "$value" ]; then + log_error "Missing version: $var" + all_valid=false + elif [ "$value" = "unknown" ]; then + log_warning "Version is 'unknown': $var" + # Don't fail, but warn user + fi + done + + if [ "$all_valid" = false ]; then + return 1 + fi + + return 0 +} + +################################################################################ +# Image Naming +################################################################################ + +# Generate standardized image name +# Input: +# $1: versions string (from extract_versions_from_image) +# $2: cuda_version (e.g., 12.8.1) +# $3: cpu_variant (e.g., x86-intel-multi) +# $4: functionality (e.g., sft_llamafactory or infer) +# $5: timestamp (optional, will generate if not provided) +# Output: Standardized image name +# Format: sglang-v{ver}_ktransformers-v{ver}_{cpu}_{gpu}_{func}_{timestamp} +generate_image_name() { + local versions="$1" + local cuda_version="$2" + local cpu_variant="$3" + local functionality="$4" + local timestamp="${5:-$(get_beijing_timestamp)}" + + # Parse versions from the versions string + local sglang_ver ktrans_ver llama_ver + sglang_ver=$(echo "$versions" | grep "^SGLANG_VERSION=" | cut -d= -f2) + ktrans_ver=$(echo "$versions" | grep "^KTRANSFORMERS_VERSION=" | cut -d= -f2) + llama_ver=$(echo "$versions" | grep "^LLAMAFACTORY_VERSION=" | cut -d= -f2) + + # Validate versions were extracted + if [ -z "$sglang_ver" ] || [ -z "$ktrans_ver" ] || [ -z "$llama_ver" ]; then + log_error "Failed to parse versions from input" + return 1 + fi + + # Parse CUDA short version + local cuda_short + cuda_short=$(parse_cuda_short_version "$cuda_version") + + # Build functionality string + local func_str + if [ "$functionality" = "sft" ]; then + func_str="sft_llamafactory-v${llama_ver}" + else + func_str="infer" + fi + + # Generate full image name + # Format: sglang-v{ver}_ktransformers-v{ver}_{cpu}_{gpu}_{func}_{timestamp} + local image_name + image_name="sglang-v${sglang_ver}_ktransformers-v${ktrans_ver}_${cpu_variant}_${cuda_short}_${func_str}_${timestamp}" + + echo "$image_name" +} + +# Generate simplified tag for DockerHub +# Input: +# $1: ktransformers_version (e.g., 0.4.3) +# $2: cuda_version (e.g., 12.8.1) +# Output: Simplified tag (e.g., v0.4.3-cu128) +generate_simplified_tag() { + local ktrans_ver="$1" + local cuda_version="$2" + + local cuda_short + cuda_short=$(parse_cuda_short_version "$cuda_version") + + echo "v${ktrans_ver}-${cuda_short}" +} + +################################################################################ +# Validation Functions +################################################################################ + +# Check if Docker daemon is running +check_docker_running() { + if ! docker info &>/dev/null; then + log_error "Docker daemon is not running" + log_error "Please start Docker and try again" + return 1 + fi + return 0 +} + +# Check if user is logged into Docker registry +# Input: registry (optional, default: docker.io) +check_docker_login() { + local registry="${1:-docker.io}" + + # Try to check auth by attempting a trivial operation + if ! docker login --help &>/dev/null; then + log_error "Docker CLI is not available" + return 1 + fi + + # Note: This is a best-effort check + # docker login status is not always easy to check programmatically + log_info "Assuming Docker login is configured" + log_info "If push fails, please run: docker login $registry" + + return 0 +} + +# Validate CUDA version format +validate_cuda_version() { + local cuda_version="$1" + + if [[ ! "$cuda_version" =~ ^[0-9]+\.[0-9]+(\.[0-9]+)?$ ]]; then + log_error "Invalid CUDA version format: $cuda_version" + log_error "Expected format: X.Y or X.Y.Z (e.g., 12.8 or 12.8.1)" + return 1 + fi + + return 0 +} + +# Check available disk space +# Input: required space in GB +check_disk_space() { + local required_gb="$1" + local output_dir="${2:-.}" + + # Get available space in GB (works on Linux and macOS) + local available_kb + if df -k "$output_dir" &>/dev/null; then + available_kb=$(df -k "$output_dir" | tail -1 | awk '{print $4}') + local available_gb=$((available_kb / 1024 / 1024)) + + log_info "Available disk space: ${available_gb}GB" + + if [ "$available_gb" -lt "$required_gb" ]; then + log_warning "Low disk space: ${available_gb}GB available, ${required_gb}GB recommended" + return 1 + fi + else + log_warning "Unable to check disk space" + fi + + return 0 +} + +# Check if file/directory exists and is writable +check_writable() { + local path="$1" + + if [ -e "$path" ]; then + if [ ! -w "$path" ]; then + log_error "Path exists but is not writable: $path" + return 1 + fi + else + # Try to create parent directory to test writability + local parent_dir + parent_dir=$(dirname "$path") + if [ ! -w "$parent_dir" ]; then + log_error "Parent directory is not writable: $parent_dir" + return 1 + fi + fi + + return 0 +} + +################################################################################ +# Cleanup Functions +################################################################################ + +# Remove intermediate Docker images +cleanup_temp_images() { + local image_tag="$1" + + log_step "Cleaning up temporary image: $image_tag" + + if docker image inspect "$image_tag" &>/dev/null; then + docker rmi "$image_tag" &>/dev/null || true + log_success "Cleaned up temporary image" + fi +} + +################################################################################ +# Display Functions +################################################################################ + +# Display a summary box +display_summary() { + local title="$1" + shift + local lines=("$@") + + local width=80 + local border=$(printf '=%.0s' $(seq 1 $width)) + + echo "" + echo "$border" + echo " $title" + echo "$border" + for line in "${lines[@]}"; do + echo " $line" + done + echo "$border" + echo "" +} + +################################################################################ +# Export functions +################################################################################ + +# Export all functions so they can be used by scripts that source this file +export -f log_info log_success log_warning log_error log_step +export -f get_beijing_timestamp +export -f parse_cuda_short_version +export -f extract_versions_from_image validate_versions +export -f generate_image_name generate_simplified_tag +export -f check_docker_running check_docker_login validate_cuda_version +export -f check_disk_space check_writable +export -f cleanup_temp_images +export -f display_summary diff --git a/docker/push-to-dockerhub.sh b/docker/push-to-dockerhub.sh new file mode 100755 index 0000000..8b8fc3e --- /dev/null +++ b/docker/push-to-dockerhub.sh @@ -0,0 +1,1142 @@ +#!/usr/bin/env bash +# +# push-to-dockerhub.sh - Build and push Docker image to DockerHub +# +# This script builds a Docker image for ktransformers with standardized naming +# and pushes it to DockerHub with both full and simplified tags. +# +# Features: +# - Automatic version detection +# - Standardized naming convention +# - Multi-CPU variant support (AMX/AVX512/AVX2) +# - Full and simplified tag support +# - Retry logic for network failures +# - Comprehensive error handling +# +# Usage: +# ./push-to-dockerhub.sh [OPTIONS] +# +# Example: +# ./push-to-dockerhub.sh \ +# --cuda-version 12.8.1 \ +# --repository kvcache/ktransformers \ +# --also-push-simplified + +set -euo pipefail + +# Get script directory +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" + +# Source utility functions +# shellcheck source=docker-utils.sh +source "$SCRIPT_DIR/docker-utils.sh" + +################################################################################ +# Default Configuration +################################################################################ + +# Build parameters +CUDA_VERSION="12.8.1" +UBUNTU_MIRROR="0" +HTTP_PROXY="" +HTTPS_PROXY="" +CPU_VARIANT="x86-intel-multi" +FUNCTIONALITY="sft" + +# Paths +DOCKERFILE="$SCRIPT_DIR/Dockerfile" +CONTEXT_DIR="$SCRIPT_DIR" + +# Registry settings +REGISTRY="docker.io" +REPOSITORY="" # Must be provided by user + +# Options +DRY_RUN=false +SKIP_BUILD=false +ALSO_PUSH_SIMPLIFIED=false +MAX_RETRIES=3 +RETRY_DELAY=5 +EXTRA_BUILD_ARGS=() + +################################################################################ +# Help Message +################################################################################ + +usage() { + cat <&2 + log_info "Temporary tag: $temp_tag" >&2 + + # Prepare build arguments + local build_args=() + build_args+=("--build-arg" "CUDA_VERSION=$CUDA_VERSION") + build_args+=("--build-arg" "UBUNTU_MIRROR=$UBUNTU_MIRROR") + build_args+=("--build-arg" "CPU_VARIANT=$CPU_VARIANT") + build_args+=("--build-arg" "BUILD_ALL_CPU_VARIANTS=1") + + # Add proxy settings if provided + if [ -n "$HTTP_PROXY" ]; then + build_args+=("--build-arg" "HTTP_PROXY=$HTTP_PROXY") + fi + if [ -n "$HTTPS_PROXY" ]; then + build_args+=("--build-arg" "HTTPS_PROXY=$HTTPS_PROXY") + fi + + # Add extra build args + build_args+=("${EXTRA_BUILD_ARGS[@]}") + + # Add network host + build_args+=("--network" "host") + + # Build command + local build_cmd=( + docker build + -f "$DOCKERFILE" + "${build_args[@]}" + -t "$temp_tag" + "$CONTEXT_DIR" + ) + + # Display build command + { + log_info "Build command:" + echo " ${build_cmd[*]}" + } >&2 + + if [ "$DRY_RUN" = true ]; then + log_warning "DRY RUN: Skipping actual build" >&2 + return 0 + fi + + # Execute build + log_info "Starting Docker build (this may take 30-60 minutes)..." >&2 + if "${build_cmd[@]}" >&2; then + log_success "Docker image built successfully" >&2 + echo "$temp_tag" + else + log_error "Docker build failed" >&2 + exit 1 + fi +} + +################################################################################ +# Generate Tags +################################################################################ + +generate_tags() { + local image_tag="$1" + local timestamp="$2" + + if [ "$DRY_RUN" = true ]; then + log_warning "DRY RUN: Using placeholder versions" + # Use placeholder versions for dry run + local versions="SGLANG_VERSION=0.5.6 +KTRANSFORMERS_VERSION=0.4.3 +LLAMAFACTORY_VERSION=0.9.3" + else + # Extract versions from image + local versions + versions=$(extract_versions_from_image "$image_tag") + + if [ $? -ne 0 ]; then + log_error "Failed to extract versions from image" + exit 1 + fi + + # Validate versions + if ! validate_versions "$versions"; then + log_error "Version validation failed" + exit 1 + fi + fi + + # Generate full tag + local full_tag + full_tag=$(generate_image_name "$versions" "$CUDA_VERSION" "$CPU_VARIANT" "$FUNCTIONALITY" "$timestamp") + + if [ -z "$full_tag" ]; then + log_error "Failed to generate image name" + exit 1 + fi + + echo "FULL_TAG=$full_tag" + + # Generate simplified tag if requested + if [ "$ALSO_PUSH_SIMPLIFIED" = true ]; then + local ktrans_ver + ktrans_ver=$(echo "$versions" | grep "^KTRANSFORMERS_VERSION=" | cut -d= -f2) + + local simplified_tag + simplified_tag=$(generate_simplified_tag "$ktrans_ver" "$CUDA_VERSION") + + echo "SIMPLIFIED_TAG=$simplified_tag" + fi +} + +################################################################################ +# Push to Registry +################################################################################ + +push_image_with_retry() { + local source_tag="$1" + local target_tag="$2" + local attempt=1 + + log_step "Pushing image: $target_tag" + + if [ "$DRY_RUN" = true ]; then + log_warning "DRY RUN: Skipping actual push" + log_info "Would execute:" + echo " docker tag $source_tag $target_tag" + echo " docker push $target_tag" + return 0 + fi + + # Tag the image + log_info "Tagging image..." + if ! docker tag "$source_tag" "$target_tag"; then + log_error "Failed to tag image" + return 1 + fi + + # Push with retry logic + while [ $attempt -le "$MAX_RETRIES" ]; do + log_info "Push attempt $attempt/$MAX_RETRIES..." + + if docker push "$target_tag"; then + log_success "Successfully pushed: $target_tag" + return 0 + else + log_warning "Push failed (attempt $attempt/$MAX_RETRIES)" + + if [ $attempt -lt "$MAX_RETRIES" ]; then + log_info "Retrying in ${RETRY_DELAY} seconds..." + sleep "$RETRY_DELAY" + fi + + ((attempt++)) + fi + done + + log_error "Failed to push after $MAX_RETRIES attempts" + return 1 +} + +################################################################################ +# Main +################################################################################ + +main() { + log_step "KTransformers Docker Image Build and Push" + + # Parse arguments + parse_args "$@" + + # Validate configuration + validate_config + + # Generate timestamp + TIMESTAMP=$(get_beijing_timestamp) + log_info "Build timestamp: $TIMESTAMP" + + # Display configuration + display_summary "Push Configuration" \ + "CUDA Version: $CUDA_VERSION" \ + "Ubuntu Mirror: $UBUNTU_MIRROR" \ + "CPU Variant: $CPU_VARIANT" \ + "Functionality: $FUNCTIONALITY" \ + "Registry: $REGISTRY" \ + "Repository: $REPOSITORY" \ + "Push Simplified: $ALSO_PUSH_SIMPLIFIED" \ + "Skip Build: $SKIP_BUILD" \ + "HTTP Proxy: ${HTTP_PROXY:-}" \ + "HTTPS Proxy: ${HTTPS_PROXY:-}" \ + "Dockerfile: $DOCKERFILE" \ + "Context Dir: $CONTEXT_DIR" \ + "Timestamp: $TIMESTAMP" \ + "Dry Run: $DRY_RUN" + + # Build image + TEMP_TAG=$(build_image) + + if [ "$DRY_RUN" = true ]; then + TEMP_TAG="ktransformers:temp-dryrun" + fi + + # Generate tags + log_step "Generating tags" + TAG_INFO=$(generate_tags "$TEMP_TAG" "$TIMESTAMP") + + # Parse tag info + FULL_TAG=$(echo "$TAG_INFO" | grep "^FULL_TAG=" | cut -d= -f2) + SIMPLIFIED_TAG=$(echo "$TAG_INFO" | grep "^SIMPLIFIED_TAG=" | cut -d= -f2 || echo "") + + log_info "Full tag: $FULL_TAG" + if [ -n "$SIMPLIFIED_TAG" ]; then + log_info "Simplified tag: $SIMPLIFIED_TAG" + fi + + # Push full tag + FULL_IMAGE="$REGISTRY/$REPOSITORY:$FULL_TAG" + if ! push_image_with_retry "$TEMP_TAG" "$FULL_IMAGE"; then + log_error "Failed to push full tag" + exit 1 + fi + + # Push simplified tag if requested + if [ -n "$SIMPLIFIED_TAG" ]; then + SIMPLIFIED_IMAGE="$REGISTRY/$REPOSITORY:$SIMPLIFIED_TAG" + if ! push_image_with_retry "$TEMP_TAG" "$SIMPLIFIED_IMAGE"; then + log_warning "Failed to push simplified tag, but continuing..." + fi + fi + + # Cleanup temporary image + if [ "$DRY_RUN" = false ]; then + log_step "Cleaning up temporary image" + cleanup_temp_images "$TEMP_TAG" + fi + + # Display summary + local summary_lines=( + "Successfully pushed images:" + "" + "Full tag:" + " $FULL_IMAGE" + "" + ) + + if [ -n "$SIMPLIFIED_TAG" ]; then + summary_lines+=( + "Simplified tag:" + " $SIMPLIFIED_IMAGE" + "" + ) + fi + + summary_lines+=( + "To pull the image:" + " docker pull $FULL_IMAGE" + "" + "To run the container:" + " docker run -it --rm $FULL_IMAGE /bin/bash" + ) + + display_summary "Push Complete" "${summary_lines[@]}" + + log_success "All done!" +} + +# Run main function +main "$@" +#!/usr/bin/env bash +# +# push-to-dockerhub.sh - Build and push Docker image to DockerHub +# +# This script builds a Docker image for ktransformers with standardized naming +# and pushes it to DockerHub with both full and simplified tags. +# +# Features: +# - Automatic version detection +# - Standardized naming convention +# - Multi-CPU variant support (AMX/AVX512/AVX2) +# - Full and simplified tag support +# - Retry logic for network failures +# - Comprehensive error handling +# +# Usage: +# ./push-to-dockerhub.sh [OPTIONS] +# +# Example: +# ./push-to-dockerhub.sh \ +# --cuda-version 12.8.1 \ +# --repository kvcache/ktransformers \ +# --also-push-simplified + +set -euo pipefail + +# Get script directory +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" + +# Source utility functions +# shellcheck source=docker-utils.sh +source "$SCRIPT_DIR/docker-utils.sh" + +################################################################################ +# Default Configuration +################################################################################ + +# Build parameters +CUDA_VERSION="12.8.1" +UBUNTU_MIRROR="0" +HTTP_PROXY="" +HTTPS_PROXY="" +CPU_VARIANT="x86-intel-multi" +FUNCTIONALITY="sft" + +# Paths +DOCKERFILE="$SCRIPT_DIR/Dockerfile" +CONTEXT_DIR="$SCRIPT_DIR" + +# Registry settings +REGISTRY="docker.io" +REPOSITORY="" # Must be provided by user + +# Options +DRY_RUN=false +SKIP_BUILD=false +ALSO_PUSH_SIMPLIFIED=false +MAX_RETRIES=3 +RETRY_DELAY=5 +EXTRA_BUILD_ARGS=() + +################################################################################ +# Help Message +################################################################################ + +usage() { + cat <&2 + log_info "Temporary tag: $temp_tag" >&2 + + # Prepare build arguments + local build_args=() + build_args+=("--build-arg" "CUDA_VERSION=$CUDA_VERSION") + build_args+=("--build-arg" "UBUNTU_MIRROR=$UBUNTU_MIRROR") + build_args+=("--build-arg" "CPU_VARIANT=$CPU_VARIANT") + build_args+=("--build-arg" "BUILD_ALL_CPU_VARIANTS=1") + + # Add proxy settings if provided + if [ -n "$HTTP_PROXY" ]; then + build_args+=("--build-arg" "HTTP_PROXY=$HTTP_PROXY") + fi + if [ -n "$HTTPS_PROXY" ]; then + build_args+=("--build-arg" "HTTPS_PROXY=$HTTPS_PROXY") + fi + + # Add extra build args + build_args+=("${EXTRA_BUILD_ARGS[@]}") + + # Add network host + build_args+=("--network" "host") + + # Build command + local build_cmd=( + docker build + -f "$DOCKERFILE" + "${build_args[@]}" + -t "$temp_tag" + "$CONTEXT_DIR" + ) + + # Display build command + { + log_info "Build command:" + echo " ${build_cmd[*]}" + } >&2 + + if [ "$DRY_RUN" = true ]; then + log_warning "DRY RUN: Skipping actual build" >&2 + return 0 + fi + + # Execute build + log_info "Starting Docker build (this may take 30-60 minutes)..." >&2 + if "${build_cmd[@]}" >&2; then + log_success "Docker image built successfully" >&2 + echo "$temp_tag" + else + log_error "Docker build failed" >&2 + exit 1 + fi +} + +################################################################################ +# Generate Tags +################################################################################ + +generate_tags() { + local image_tag="$1" + local timestamp="$2" + + if [ "$DRY_RUN" = true ]; then + log_warning "DRY RUN: Using placeholder versions" + # Use placeholder versions for dry run + local versions="SGLANG_VERSION=0.5.6 +KTRANSFORMERS_VERSION=0.4.3 +LLAMAFACTORY_VERSION=0.9.3" + else + # Extract versions from image + local versions + versions=$(extract_versions_from_image "$image_tag") + + if [ $? -ne 0 ]; then + log_error "Failed to extract versions from image" + exit 1 + fi + + # Validate versions + if ! validate_versions "$versions"; then + log_error "Version validation failed" + exit 1 + fi + fi + + # Generate full tag + local full_tag + full_tag=$(generate_image_name "$versions" "$CUDA_VERSION" "$CPU_VARIANT" "$FUNCTIONALITY" "$timestamp") + + if [ -z "$full_tag" ]; then + log_error "Failed to generate image name" + exit 1 + fi + + echo "FULL_TAG=$full_tag" + + # Generate simplified tag if requested + if [ "$ALSO_PUSH_SIMPLIFIED" = true ]; then + local ktrans_ver + ktrans_ver=$(echo "$versions" | grep "^KTRANSFORMERS_VERSION=" | cut -d= -f2) + + local simplified_tag + simplified_tag=$(generate_simplified_tag "$ktrans_ver" "$CUDA_VERSION") + + echo "SIMPLIFIED_TAG=$simplified_tag" + fi +} + +################################################################################ +# Push to Registry +################################################################################ + +push_image_with_retry() { + local source_tag="$1" + local target_tag="$2" + local attempt=1 + + log_step "Pushing image: $target_tag" + + if [ "$DRY_RUN" = true ]; then + log_warning "DRY RUN: Skipping actual push" + log_info "Would execute:" + echo " docker tag $source_tag $target_tag" + echo " docker push $target_tag" + return 0 + fi + + # Tag the image + log_info "Tagging image..." + if ! docker tag "$source_tag" "$target_tag"; then + log_error "Failed to tag image" + return 1 + fi + + # Push with retry logic + while [ $attempt -le "$MAX_RETRIES" ]; do + log_info "Push attempt $attempt/$MAX_RETRIES..." + + if docker push "$target_tag"; then + log_success "Successfully pushed: $target_tag" + return 0 + else + log_warning "Push failed (attempt $attempt/$MAX_RETRIES)" + + if [ $attempt -lt "$MAX_RETRIES" ]; then + log_info "Retrying in ${RETRY_DELAY} seconds..." + sleep "$RETRY_DELAY" + fi + + ((attempt++)) + fi + done + + log_error "Failed to push after $MAX_RETRIES attempts" + return 1 +} + +################################################################################ +# Main +################################################################################ + +main() { + log_step "KTransformers Docker Image Build and Push" + + # Parse arguments + parse_args "$@" + + # Validate configuration + validate_config + + # Generate timestamp + TIMESTAMP=$(get_beijing_timestamp) + log_info "Build timestamp: $TIMESTAMP" + + # Display configuration + display_summary "Push Configuration" \ + "CUDA Version: $CUDA_VERSION" \ + "Ubuntu Mirror: $UBUNTU_MIRROR" \ + "CPU Variant: $CPU_VARIANT" \ + "Functionality: $FUNCTIONALITY" \ + "Registry: $REGISTRY" \ + "Repository: $REPOSITORY" \ + "Push Simplified: $ALSO_PUSH_SIMPLIFIED" \ + "Skip Build: $SKIP_BUILD" \ + "HTTP Proxy: ${HTTP_PROXY:-}" \ + "HTTPS Proxy: ${HTTPS_PROXY:-}" \ + "Dockerfile: $DOCKERFILE" \ + "Context Dir: $CONTEXT_DIR" \ + "Timestamp: $TIMESTAMP" \ + "Dry Run: $DRY_RUN" + + # Build image + TEMP_TAG=$(build_image) + + if [ "$DRY_RUN" = true ]; then + TEMP_TAG="ktransformers:temp-dryrun" + fi + + # Generate tags + log_step "Generating tags" + TAG_INFO=$(generate_tags "$TEMP_TAG" "$TIMESTAMP") + + # Parse tag info + FULL_TAG=$(echo "$TAG_INFO" | grep "^FULL_TAG=" | cut -d= -f2) + SIMPLIFIED_TAG=$(echo "$TAG_INFO" | grep "^SIMPLIFIED_TAG=" | cut -d= -f2 || echo "") + + log_info "Full tag: $FULL_TAG" + if [ -n "$SIMPLIFIED_TAG" ]; then + log_info "Simplified tag: $SIMPLIFIED_TAG" + fi + + # Push full tag + FULL_IMAGE="$REGISTRY/$REPOSITORY:$FULL_TAG" + if ! push_image_with_retry "$TEMP_TAG" "$FULL_IMAGE"; then + log_error "Failed to push full tag" + exit 1 + fi + + # Push simplified tag if requested + if [ -n "$SIMPLIFIED_TAG" ]; then + SIMPLIFIED_IMAGE="$REGISTRY/$REPOSITORY:$SIMPLIFIED_TAG" + if ! push_image_with_retry "$TEMP_TAG" "$SIMPLIFIED_IMAGE"; then + log_warning "Failed to push simplified tag, but continuing..." + fi + fi + + # Cleanup temporary image + if [ "$DRY_RUN" = false ]; then + log_step "Cleaning up temporary image" + cleanup_temp_images "$TEMP_TAG" + fi + + # Display summary + local summary_lines=( + "Successfully pushed images:" + "" + "Full tag:" + " $FULL_IMAGE" + "" + ) + + if [ -n "$SIMPLIFIED_TAG" ]; then + summary_lines+=( + "Simplified tag:" + " $SIMPLIFIED_IMAGE" + "" + ) + fi + + summary_lines+=( + "To pull the image:" + " docker pull $FULL_IMAGE" + "" + "To run the container:" + " docker run -it --rm $FULL_IMAGE /bin/bash" + ) + + display_summary "Push Complete" "${summary_lines[@]}" + + log_success "All done!" +} + +# Run main function +main "$@" diff --git a/kt-kernel/CMakeLists.txt b/kt-kernel/CMakeLists.txt index c9aae98..cc64569 100644 --- a/kt-kernel/CMakeLists.txt +++ b/kt-kernel/CMakeLists.txt @@ -28,7 +28,7 @@ option(KTRANSFORMERS_CPU_MOE_AMD "ktransformers: CPU use moe kernel for amd" OFF # LTO control option(CPUINFER_ENABLE_LTO "Enable link time optimization (IPO)" OFF) -project(kt_kernel_ext VERSION 0.1.0) +project(kt_kernel_ext VERSION 0.4.2) # Choose compilers BEFORE project() so CMake honors them if(USE_CONDA_TOOLCHAIN) if(NOT DEFINED ENV{CONDA_PREFIX} OR NOT EXISTS "$ENV{CONDA_PREFIX}") diff --git a/kt-kernel/MANIFEST.in b/kt-kernel/MANIFEST.in new file mode 100644 index 0000000..d051e6d --- /dev/null +++ b/kt-kernel/MANIFEST.in @@ -0,0 +1,37 @@ +# MANIFEST.in for kt-kernel +# Ensures source distribution includes all necessary files for building from source + +# Core build files +include CMakeLists.txt +include CMakePresets.json +include setup.py +include pyproject.toml +include requirements.txt +include README.md +include LICENSE + +# CMake modules and configuration +recursive-include cmake *.cmake *.in + +# C++ source files +recursive-include cpu_backend *.h *.hpp *.cpp *.c *.cc +recursive-include operators *.h *.hpp *.cpp *.c *.cc +include ext_bindings.cpp + +# Python package +recursive-include python *.py + +# Third-party dependencies (vendored) +recursive-include third_party * + +# Exclude compiled and cache files +global-exclude *.pyc +global-exclude *.pyo +global-exclude __pycache__ +global-exclude .git* +global-exclude *.so +global-exclude *.o +global-exclude *.a +global-exclude build +global-exclude dist +global-exclude *.egg-info diff --git a/kt-kernel/README.md b/kt-kernel/README.md index 4aee314..bd87e0c 100644 --- a/kt-kernel/README.md +++ b/kt-kernel/README.md @@ -47,14 +47,75 @@ High-performance kernel operations for KTransformers, featuring CPU-optimized Mo ## Installation -### Prerequisites +### Option 1: Install from PyPI (Recommended for Most Users) + +Choose the version matching your CUDA installation: + +```bash +# For CUDA 11.8 +pip install kt-kernel==0.4.2.cu118 + +# For CUDA 12.1 +pip install kt-kernel==0.4.2.cu121 + +# For CUDA 12.4 +pip install kt-kernel==0.4.2.cu124 + +# For CUDA 12.6 +pip install kt-kernel==0.4.2.cu126 +``` + +> **Note**: Replace `0.4.2` with the [latest version](https://pypi.org/project/kt-kernel/#history) if available. + +**Features:** +- ✅ **Automatic CPU detection**: Detects your CPU and loads the optimal kernel variant +- ✅ **Multi-variant wheel**: Includes AMX, AVX512, and AVX2 variants in a single package +- ✅ **No compilation needed**: Pre-built wheels for Python 3.10, 3.11, 3.12 +- ✅ **Multiple CUDA versions**: Choose the version matching your environment + +**Requirements:** +- CUDA 11.8+ or 12.x runtime (must match the package version you install) +- PyTorch 2.0+ (install separately, must match CUDA version) +- Linux x86-64 + +**CPU Variants Included:** +| Variant | CPU Support | Use Case | +|---------|-------------|----------| +| **AMX** | Intel Sapphire Rapids+ | Best performance on latest Intel CPUs | +| **AVX512** | Intel Skylake-X/Ice Lake/Cascade Lake | AVX512-capable CPUs without AMX | +| **AVX2** | Intel Haswell+, AMD Zen+ | Maximum compatibility | + +**Check which variant is loaded:** +```python +import kt_kernel +print(f"CPU variant: {kt_kernel.__cpu_variant__}") # 'amx', 'avx512', or 'avx2' +print(f"Version: {kt_kernel.__version__}") +``` + +**Environment Variables:** +```bash +# Override automatic CPU detection +export KT_KERNEL_CPU_VARIANT=avx2 # or 'avx512', 'amx' + +# Enable debug output +export KT_KERNEL_DEBUG=1 +python -c "import kt_kernel" +``` + +--- + +### Option 2: Install from Source (For AMD, ARM, or Custom Builds) + +If you need AMD (BLIS), ARM (KML), or custom CUDA versions, build from source: + +#### Prerequisites First, initialize git submodules: ```bash git submodule update --init --recursive ``` -### Quick Installation (Recommended) +#### Quick Installation Step 0: Create and activate a conda environment (recommended): @@ -65,7 +126,7 @@ conda activate kt-kernel You can now install in two clear steps using the same script. -Option A: Two-step (specify dependencies installation and build separately) +**Option A: Two-step** (specify dependencies installation and build separately) ```bash # 1) Install system prerequisites (cmake, hwloc, pkg-config) @@ -76,7 +137,7 @@ Option A: Two-step (specify dependencies installation and build separately) ./install.sh build ``` -Option B: One-step +**Option B: One-step** ```bash ./install.sh diff --git a/kt-kernel/install.sh b/kt-kernel/install.sh index 99c2a38..da09772 100755 --- a/kt-kernel/install.sh +++ b/kt-kernel/install.sh @@ -161,6 +161,34 @@ build_step() { echo "Skipping clean of $REPO_ROOT/build (requested by --no-clean)" fi + # Check for multi-variant build mode (Docker environment) + if [ "${CPUINFER_BUILD_ALL_VARIANTS:-0}" = "1" ]; then + echo "==========================================" + echo "Building ALL CPU variants (AMX/AVX512/AVX2)" + echo "==========================================" + echo "" + echo "This will build three variants in a single wheel:" + echo " - AMX variant (Intel Sapphire Rapids+)" + echo " - AVX512 variant (Intel Skylake-X/Ice Lake+)" + echo " - AVX2 variant (maximum compatibility)" + echo "" + echo "Runtime CPU detection will automatically select the best variant." + echo "" + + export CPUINFER_FORCE_REBUILD=1 + export CPUINFER_BUILD_TYPE=${CPUINFER_BUILD_TYPE:-Release} + export CPUINFER_PARALLEL=${CPUINFER_PARALLEL:-8} + + echo "Building with:" + echo " CPUINFER_BUILD_ALL_VARIANTS=1" + echo " CPUINFER_BUILD_TYPE=$CPUINFER_BUILD_TYPE" + echo " CPUINFER_PARALLEL=$CPUINFER_PARALLEL" + echo "" + + pip install . -v + return 0 + fi + if [ "$MANUAL_MODE" = "0" ]; then # Auto-detection mode echo "==========================================" diff --git a/kt-kernel/pyproject.toml b/kt-kernel/pyproject.toml index 8e44460..3c7b537 100644 --- a/kt-kernel/pyproject.toml +++ b/kt-kernel/pyproject.toml @@ -5,7 +5,8 @@ build-backend = "setuptools.build_meta" [project] name = "kt-kernel" -version = "0.1.0" +# Version is dynamically read from ../version.py via setup.py +dynamic = ["version"] description = "KT-Kernel: High-performance kernel operations for KTransformers (AMX/AVX/KML optimizations)" readme = "README.md" authors = [{ name = "kvcache-ai" }] diff --git a/kt-kernel/python/__init__.py b/kt-kernel/python/__init__.py index 80456c7..327a47a 100644 --- a/kt-kernel/python/__init__.py +++ b/kt-kernel/python/__init__.py @@ -5,6 +5,9 @@ KT-Kernel provides high-performance kernel operations for KTransformers, including CPU-optimized MoE inference with AMX, AVX, and KML support. +The package automatically detects your CPU capabilities and loads the optimal +kernel variant (AMX, AVX512, or AVX2) at runtime. + Example usage: >>> from kt_kernel import KTMoEWrapper >>> wrapper = KTMoEWrapper( @@ -20,11 +23,41 @@ Example usage: ... chunked_prefill_size=512, ... method="AMXINT4" ... ) + + Check which CPU variant is loaded: + >>> import kt_kernel + >>> print(kt_kernel.__cpu_variant__) # 'amx', 'avx512', or 'avx2' + +Environment Variables: + KT_KERNEL_CPU_VARIANT: Override automatic detection ('amx', 'avx512', 'avx2') + KT_KERNEL_DEBUG: Enable debug output ('1' to enable) """ from __future__ import annotations +# Detect CPU and load optimal extension variant +from ._cpu_detect import initialize as _initialize_cpu +_kt_kernel_ext, __cpu_variant__ = _initialize_cpu() + +# Make the extension module available to other modules in this package +import sys +sys.modules['kt_kernel_ext'] = _kt_kernel_ext + +# Also expose kt_kernel_ext as an attribute for backward compatibility +kt_kernel_ext = _kt_kernel_ext + +# Import main API from .experts import KTMoEWrapper -__version__ = "0.1.0" -__all__ = ["KTMoEWrapper"] +# Read version from project root version.py +import os +_root_version_file = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(__file__))), 'version.py') +if os.path.exists(_root_version_file): + _version_ns = {} + with open(_root_version_file, 'r', encoding='utf-8') as f: + exec(f.read(), _version_ns) + __version__ = _version_ns.get('__version__', '0.4.2') +else: + __version__ = "0.4.2" + +__all__ = ["KTMoEWrapper", "kt_kernel_ext", "__cpu_variant__", "__version__"] diff --git a/kt-kernel/python/_cpu_detect.py b/kt-kernel/python/_cpu_detect.py new file mode 100644 index 0000000..f0fdab3 --- /dev/null +++ b/kt-kernel/python/_cpu_detect.py @@ -0,0 +1,233 @@ +""" +CPU feature detection and optimal kernel loader for kt-kernel. + +This module automatically detects CPU capabilities and loads the best available +kernel variant (AMX, AVX512, or AVX2) at runtime. + +Environment Variables: + KT_KERNEL_CPU_VARIANT: Override automatic detection ('amx', 'avx512', 'avx2') + KT_KERNEL_DEBUG: Enable debug output ('1' to enable) + +Example: + >>> import kt_kernel + >>> print(kt_kernel.__cpu_variant__) # Shows detected variant + + # Override detection + >>> import os + >>> os.environ['KT_KERNEL_CPU_VARIANT'] = 'avx2' + >>> import kt_kernel # Will use AVX2 variant +""" +import os +import sys +from pathlib import Path + + +def detect_cpu_features(): + """ + Detect CPU features to determine the best kernel variant. + + Detection hierarchy: + 1. AMX: Intel Sapphire Rapids+ with AMX support + 2. AVX512: CPUs with AVX512F support + 3. AVX2: Fallback for maximum compatibility + + Returns: + str: 'amx', 'avx512', or 'avx2' + """ + # Check environment override + variant = os.environ.get('KT_KERNEL_CPU_VARIANT', '').lower() + if variant in ['amx', 'avx512', 'avx2']: + if os.environ.get('KT_KERNEL_DEBUG') == '1': + print(f"[kt-kernel] Using environment override: {variant}") + return variant + + # Try to read /proc/cpuinfo on Linux + try: + with open('/proc/cpuinfo', 'r') as f: + cpuinfo = f.read().lower() + + # Check for AMX support (Intel Sapphire Rapids+) + # AMX requires amx_tile, amx_int8, and amx_bf16 + amx_flags = ['amx_tile', 'amx_int8', 'amx_bf16'] + has_amx = all(flag in cpuinfo for flag in amx_flags) + + if has_amx: + if os.environ.get('KT_KERNEL_DEBUG') == '1': + print("[kt-kernel] Detected AMX support via /proc/cpuinfo") + return 'amx' + + # Check for AVX512 support + # AVX512F is the foundation for all AVX512 variants + if 'avx512f' in cpuinfo: + if os.environ.get('KT_KERNEL_DEBUG') == '1': + print("[kt-kernel] Detected AVX512 support via /proc/cpuinfo") + return 'avx512' + + # Check for AVX2 support + if 'avx2' in cpuinfo: + if os.environ.get('KT_KERNEL_DEBUG') == '1': + print("[kt-kernel] Detected AVX2 support via /proc/cpuinfo") + return 'avx2' + + # Fallback to AVX2 (should be rare on modern CPUs) + if os.environ.get('KT_KERNEL_DEBUG') == '1': + print("[kt-kernel] No AVX2/AVX512/AMX detected, using AVX2 fallback") + return 'avx2' + + except FileNotFoundError: + # /proc/cpuinfo doesn't exist (not Linux or in container) + # Try cpufeature package as fallback + if os.environ.get('KT_KERNEL_DEBUG') == '1': + print("[kt-kernel] /proc/cpuinfo not found, trying cpufeature package") + + try: + import cpufeature + + # Check for AMX + if cpufeature.CPUFeature.get('AMX_TILE', False): + if os.environ.get('KT_KERNEL_DEBUG') == '1': + print("[kt-kernel] Detected AMX support via cpufeature") + return 'amx' + + # Check for AVX512 + if cpufeature.CPUFeature.get('AVX512F', False): + if os.environ.get('KT_KERNEL_DEBUG') == '1': + print("[kt-kernel] Detected AVX512 support via cpufeature") + return 'avx512' + + # Fallback to AVX2 + if os.environ.get('KT_KERNEL_DEBUG') == '1': + print("[kt-kernel] Using AVX2 fallback via cpufeature") + return 'avx2' + + except ImportError: + # cpufeature not available - ultimate fallback + if os.environ.get('KT_KERNEL_DEBUG') == '1': + print("[kt-kernel] cpufeature not available, using AVX2 fallback") + return 'avx2' + + except Exception as e: + # Any other error - safe fallback + if os.environ.get('KT_KERNEL_DEBUG') == '1': + print(f"[kt-kernel] Error during CPU detection: {e}, using AVX2 fallback") + return 'avx2' + + +def load_extension(variant): + """ + Load the appropriate kt_kernel_ext variant. + + Tries to import the specified variant, with automatic fallback to + lower-performance variants if the requested one is not available. + + Supports both multi-variant builds (_kt_kernel_ext_amx.*.so) and + single-variant builds (kt_kernel_ext.*.so). + + Fallback order: amx -> avx512 -> avx2 -> single-variant + + Args: + variant (str): 'amx', 'avx512', or 'avx2' + + Returns: + module: The loaded extension module + + Raises: + ImportError: If all variants fail to load + """ + import importlib.util + import glob + + # The .so files can be named in two ways: + # Multi-variant: _kt_kernel_ext_amx.cpython-311-x86_64-linux-gnu.so + # Single-variant: kt_kernel_ext.cpython-311-x86_64-linux-gnu.so + # Both export PyInit_kt_kernel_ext (the original module name) + + try: + # Find the kt_kernel package directory + # We can't import kt_kernel here (circular import), so use __file__ + kt_kernel_dir = os.path.dirname(os.path.abspath(__file__)) + + # Try multi-variant naming first + pattern = os.path.join(kt_kernel_dir, f'_kt_kernel_ext_{variant}.*.so') + so_files = glob.glob(pattern) + + if not so_files: + # Try single-variant naming (fallback for builds without CPUINFER_BUILD_ALL_VARIANTS) + pattern = os.path.join(kt_kernel_dir, 'kt_kernel_ext.*.so') + so_files = glob.glob(pattern) + + if so_files: + if os.environ.get('KT_KERNEL_DEBUG') == '1': + print(f"[kt-kernel] Multi-variant {variant} not found, using single-variant build") + else: + raise ImportError(f"No .so file found for variant {variant} (tried patterns: {kt_kernel_dir}/_kt_kernel_ext_{variant}.*.so and {kt_kernel_dir}/kt_kernel_ext.*.so)") + + so_file = so_files[0] + + if os.environ.get('KT_KERNEL_DEBUG') == '1': + print(f"[kt-kernel] Loading {variant} from: {so_file}") + + # Load the module manually + # The module exports PyInit_kt_kernel_ext, so we use that as the module name + spec = importlib.util.spec_from_file_location('kt_kernel_ext', so_file) + if spec is None or spec.loader is None: + raise ImportError(f"Failed to create spec for {so_file}") + + ext = importlib.util.module_from_spec(spec) + spec.loader.exec_module(ext) + + if os.environ.get('KT_KERNEL_DEBUG') == '1': + print(f"[kt-kernel] Successfully loaded {variant.upper()} variant") + return ext + + except (ImportError, ModuleNotFoundError, FileNotFoundError) as e: + if os.environ.get('KT_KERNEL_DEBUG') == '1': + print(f"[kt-kernel] Failed to load {variant} variant: {e}") + + # Automatic fallback to next best variant + if variant == 'amx': + if os.environ.get('KT_KERNEL_DEBUG') == '1': + print("[kt-kernel] Falling back from AMX to AVX512") + return load_extension('avx512') + elif variant == 'avx512': + if os.environ.get('KT_KERNEL_DEBUG') == '1': + print("[kt-kernel] Falling back from AVX512 to AVX2") + return load_extension('avx2') + else: + # AVX2 is the last fallback - if this fails, we can't continue + raise ImportError( + f"Failed to load kt_kernel extension (variant: {variant}). " + f"Original error: {e}\n" + f"This usually means the kt_kernel package is not properly installed." + ) + + +def initialize(): + """ + Detect CPU capabilities and load the optimal extension variant. + + This is the main entry point called by kt_kernel.__init__.py. + + Returns: + tuple: (extension_module, variant_name) + - extension_module: The loaded C++ extension module + - variant_name: String indicating which variant was loaded ('amx', 'avx512', 'avx2') + + Example: + >>> ext, variant = initialize() + >>> print(f"Loaded {variant} variant") + >>> wrapper = ext.AMXMoEWrapper(...) + """ + # Detect CPU features + variant = detect_cpu_features() + + if os.environ.get('KT_KERNEL_DEBUG') == '1': + print(f"[kt-kernel] Selected CPU variant: {variant}") + + # Load the appropriate extension + ext = load_extension(variant) + + if os.environ.get('KT_KERNEL_DEBUG') == '1': + print(f"[kt-kernel] Extension module loaded: {ext.__name__}") + + return ext, variant diff --git a/kt-kernel/setup.py b/kt-kernel/setup.py index 2ef9eef..36c93fa 100644 --- a/kt-kernel/setup.py +++ b/kt-kernel/setup.py @@ -229,6 +229,133 @@ class CMakeBuild(build_ext): return info def build_extension(self, ext: CMakeExtension): + """ + Main entry point for building the extension. + + Checks if multi-variant build is requested (CPUINFER_BUILD_ALL_VARIANTS=1) + and routes to the appropriate build method. + """ + if _env_get_bool("CPUINFER_BUILD_ALL_VARIANTS", False): + # Build all 3 variants (AMX, AVX512, AVX2) + self.build_multi_variants(ext) + else: + # Build single variant (original behavior) + self._build_single_variant(ext) + + def build_multi_variants(self, ext: CMakeExtension): + """ + Build all 3 CPU variants (AMX, AVX512, AVX2) in a single wheel. + + This method is called when CPUINFER_BUILD_ALL_VARIANTS=1 is set. + It builds three separate extensions with different CPU instruction sets + and renames the output .so files with variant suffixes. + """ + print("=" * 80) + print("Building kt-kernel with ALL CPU variants (AMX, AVX512, AVX2)") + print("=" * 80) + + # Define the 3 variants to build + variants = [ + { + 'name': 'amx', + 'env': { + 'CPUINFER_CPU_INSTRUCT': 'NATIVE', + 'CPUINFER_ENABLE_AMX': 'ON', + }, + 'description': 'AMX variant (Intel Sapphire Rapids+)' + }, + { + 'name': 'avx512', + 'env': { + 'CPUINFER_CPU_INSTRUCT': 'AVX512', + 'CPUINFER_ENABLE_AMX': 'OFF', + }, + 'description': 'AVX512 variant (Intel Skylake-X/Ice Lake/Cascade Lake)' + }, + { + 'name': 'avx2', + 'env': { + 'CPUINFER_CPU_INSTRUCT': 'AVX2', + 'CPUINFER_ENABLE_AMX': 'OFF', + }, + 'description': 'AVX2 variant (maximum compatibility)' + } + ] + + # Save original environment + original_env = os.environ.copy() + + extdir = Path(self.get_ext_fullpath(ext.name)).parent.resolve() + + for i, variant in enumerate(variants, 1): + print(f"\n{'=' * 80}") + print(f"Building variant {i}/3: {variant['description']}") + print(f"{'=' * 80}\n") + + # Set variant-specific environment variables + os.environ.update(variant['env']) + + # Use a unique build directory for this variant + original_build_temp = self.build_temp + self.build_temp = str(Path(self.build_temp) / f"variant_{variant['name']}") + + try: + # Build this variant (calls the single-variant build logic) + self._build_single_variant(ext) + + # Rename the generated .so file to include variant suffix + # Original: kt_kernel_ext.cpython-311-x86_64-linux-gnu.so + # Renamed: _kt_kernel_ext_amx.cpython-311-x86_64-linux-gnu.so + + # Extract the base extension name (without package prefix) + # ext.name is "kt_kernel.kt_kernel_ext", we want "kt_kernel_ext" + base_ext_name = ext.name.split('.')[-1] + + # Find the newly built .so file + import time + time.sleep(0.5) # Give filesystem time to sync + + built_candidates = [ + f for f in Path(extdir).glob("*.so") + if f.name.startswith(base_ext_name) and not f.name.startswith(f"_{base_ext_name}_") + ] + + if not built_candidates: + print(f"WARNING: No .so file found for {base_ext_name} in {extdir}") + print(f"Files in {extdir}:") + for f in Path(extdir).glob("*.so"): + print(f" {f.name}") + + for so_file in built_candidates: + # Extract the python tag part (e.g., ".cpython-311-x86_64-linux-gnu.so") + suffix = so_file.name.replace(base_ext_name, "") + new_name = f"_{base_ext_name}_{variant['name']}{suffix}" + new_path = extdir / new_name + + print(f"-- Renaming {so_file.name} -> {new_name}") + if new_path.exists(): + print(f" WARNING: Target file already exists, removing: {new_path}") + new_path.unlink() + so_file.rename(new_path) + print(f" ✓ Successfully renamed to {new_name}") + + finally: + # Restore build_temp for next iteration + self.build_temp = original_build_temp + + # Restore original environment + os.environ.clear() + os.environ.update(original_env) + + print(f"\n{'=' * 80}") + print("✓ Successfully built all 3 CPU variants") + print(f"{'=' * 80}\n") + + def _build_single_variant(self, ext: CMakeExtension): + """ + Build a single CPU variant. This contains the core build logic + extracted from the original build_extension method. + """ # Auto-detect CUDA toolkit if user did not explicitly set CPUINFER_USE_CUDA def detect_cuda_toolkit() -> bool: # Respect CUDA_HOME @@ -276,6 +403,10 @@ class CMakeBuild(build_ext): auto_cuda = detect_cuda_toolkit() os.environ["CPUINFER_USE_CUDA"] = "1" if auto_cuda else "0" print(f"-- CPUINFER_USE_CUDA not set; auto-detected CUDA toolkit: {'YES' if auto_cuda else 'NO'}") + elif cuda_env: + print("-- CPUINFER_USE_CUDA explicitly enabled") + else: + print("-- CPUINFER_USE_CUDA explicitly disabled") extdir = Path(self.get_ext_fullpath(ext.name)).parent.resolve() cfg = default_build_type() @@ -431,7 +562,15 @@ class CMakeBuild(build_ext): # Version (simple). If you later add a python package dir, you can read from it. ################################################################################ -VERSION = os.environ.get("CPUINFER_VERSION", "0.1.0") +# Import version from shared version.py at project root +_version_file = Path(__file__).resolve().parent.parent / "version.py" +if _version_file.exists(): + _version_ns = {} + with open(_version_file, "r", encoding="utf-8") as f: + exec(f.read(), _version_ns) + VERSION = os.environ.get("CPUINFER_VERSION", _version_ns.get("__version__", "0.4.2")) +else: + VERSION = os.environ.get("CPUINFER_VERSION", "0.4.2") ################################################################################ # Setup @@ -449,7 +588,7 @@ setup( "kt_kernel": "python", "kt_kernel.utils": "python/utils", }, - ext_modules=[CMakeExtension("kt_kernel_ext", str(REPO_ROOT))], + ext_modules=[CMakeExtension("kt_kernel.kt_kernel_ext", str(REPO_ROOT))], cmdclass={"build_ext": CMakeBuild}, zip_safe=False, classifiers=[ diff --git a/kt-kernel/test/per_commit/test_basic_cpu.py b/kt-kernel/test/per_commit/test_basic_cpu.py index bf26d04..46c3c0a 100644 --- a/kt-kernel/test/per_commit/test_basic_cpu.py +++ b/kt-kernel/test/per_commit/test_basic_cpu.py @@ -16,7 +16,8 @@ register_cpu_ci(est_time=30, suite="default") # Check if kt_kernel_ext is available try: - import kt_kernel_ext + import kt_kernel # Import kt_kernel first to register kt_kernel_ext + kt_kernel_ext = kt_kernel.kt_kernel_ext # Access the extension module HAS_KT_KERNEL = True except ImportError: HAS_KT_KERNEL = False diff --git a/kt-kernel/test/per_commit/test_moe_amx_accuracy_int4.py b/kt-kernel/test/per_commit/test_moe_amx_accuracy_int4.py index 5ad4ee2..9ded113 100644 --- a/kt-kernel/test/per_commit/test_moe_amx_accuracy_int4.py +++ b/kt-kernel/test/per_commit/test_moe_amx_accuracy_int4.py @@ -19,7 +19,8 @@ register_cpu_ci(est_time=120, suite="default") # Check if dependencies are available try: import torch - import kt_kernel_ext + import kt_kernel # Import kt_kernel first to register kt_kernel_ext + kt_kernel_ext = kt_kernel.kt_kernel_ext # Access the extension module HAS_DEPS = True except ImportError as e: HAS_DEPS = False diff --git a/kt-kernel/test/per_commit/test_moe_amx_accuracy_int4_1.py b/kt-kernel/test/per_commit/test_moe_amx_accuracy_int4_1.py index 42026ee..30f88aa 100644 --- a/kt-kernel/test/per_commit/test_moe_amx_accuracy_int4_1.py +++ b/kt-kernel/test/per_commit/test_moe_amx_accuracy_int4_1.py @@ -19,7 +19,8 @@ register_cpu_ci(est_time=120, suite="default") # Check if dependencies are available try: import torch - import kt_kernel_ext + import kt_kernel # Import kt_kernel first to register kt_kernel_ext + kt_kernel_ext = kt_kernel.kt_kernel_ext # Access the extension module HAS_DEPS = True except ImportError as e: HAS_DEPS = False diff --git a/kt-kernel/test/per_commit/test_moe_amx_accuracy_int4_1k.py b/kt-kernel/test/per_commit/test_moe_amx_accuracy_int4_1k.py index 4afdbe8..90e7501 100644 --- a/kt-kernel/test/per_commit/test_moe_amx_accuracy_int4_1k.py +++ b/kt-kernel/test/per_commit/test_moe_amx_accuracy_int4_1k.py @@ -19,7 +19,8 @@ register_cpu_ci(est_time=120, suite="default") # Check if dependencies are available try: import torch - import kt_kernel_ext + import kt_kernel # Import kt_kernel first to register kt_kernel_ext + kt_kernel_ext = kt_kernel.kt_kernel_ext # Access the extension module HAS_DEPS = True except ImportError as e: HAS_DEPS = False diff --git a/kt-kernel/test/per_commit/test_moe_amx_accuracy_int8.py b/kt-kernel/test/per_commit/test_moe_amx_accuracy_int8.py index b6527c8..eb91535 100644 --- a/kt-kernel/test/per_commit/test_moe_amx_accuracy_int8.py +++ b/kt-kernel/test/per_commit/test_moe_amx_accuracy_int8.py @@ -19,7 +19,8 @@ register_cpu_ci(est_time=120, suite="default") # Check if dependencies are available try: import torch - import kt_kernel_ext + import kt_kernel # Import kt_kernel first to register kt_kernel_ext + kt_kernel_ext = kt_kernel.kt_kernel_ext # Access the extension module HAS_DEPS = True except ImportError as e: HAS_DEPS = False diff --git a/kt-kernel/test/per_commit/test_moe_amx_bench_int4.py b/kt-kernel/test/per_commit/test_moe_amx_bench_int4.py index 504060d..d050ab8 100644 --- a/kt-kernel/test/per_commit/test_moe_amx_bench_int4.py +++ b/kt-kernel/test/per_commit/test_moe_amx_bench_int4.py @@ -23,7 +23,8 @@ register_cpu_ci(est_time=300, suite="default") # Check if dependencies are available try: import torch - import kt_kernel_ext + import kt_kernel # Import kt_kernel first to register kt_kernel_ext + kt_kernel_ext = kt_kernel.kt_kernel_ext # Access the extension module from tqdm import tqdm HAS_DEPS = True except ImportError as e: diff --git a/kt-kernel/test/per_commit/test_moe_amx_bench_int4_1.py b/kt-kernel/test/per_commit/test_moe_amx_bench_int4_1.py index 088f70c..8c5a231 100644 --- a/kt-kernel/test/per_commit/test_moe_amx_bench_int4_1.py +++ b/kt-kernel/test/per_commit/test_moe_amx_bench_int4_1.py @@ -23,7 +23,8 @@ register_cpu_ci(est_time=300, suite="default") # Check if dependencies are available try: import torch - import kt_kernel_ext + import kt_kernel # Import kt_kernel first to register kt_kernel_ext + kt_kernel_ext = kt_kernel.kt_kernel_ext # Access the extension module from tqdm import tqdm HAS_DEPS = True diff --git a/kt-kernel/test/per_commit/test_moe_amx_bench_int4_1k.py b/kt-kernel/test/per_commit/test_moe_amx_bench_int4_1k.py index cdc2f85..81a9d60 100644 --- a/kt-kernel/test/per_commit/test_moe_amx_bench_int4_1k.py +++ b/kt-kernel/test/per_commit/test_moe_amx_bench_int4_1k.py @@ -24,7 +24,8 @@ register_cpu_ci(est_time=300, suite="default") # Check if dependencies are available try: import torch - import kt_kernel_ext + import kt_kernel # Import kt_kernel first to register kt_kernel_ext + kt_kernel_ext = kt_kernel.kt_kernel_ext # Access the extension module from tqdm import tqdm HAS_DEPS = True except ImportError as e: diff --git a/kt-kernel/test/per_commit/test_moe_amx_bench_int8.py b/kt-kernel/test/per_commit/test_moe_amx_bench_int8.py index 2ad7423..559c50e 100644 --- a/kt-kernel/test/per_commit/test_moe_amx_bench_int8.py +++ b/kt-kernel/test/per_commit/test_moe_amx_bench_int8.py @@ -23,7 +23,8 @@ register_cpu_ci(est_time=300, suite="default") # Check if dependencies are available try: import torch - import kt_kernel_ext + import kt_kernel # Import kt_kernel first to register kt_kernel_ext + kt_kernel_ext = kt_kernel.kt_kernel_ext # Access the extension module from tqdm import tqdm HAS_DEPS = True except ImportError as e: diff --git a/kt-sft/ktransformers/__init__.py b/kt-sft/ktransformers/__init__.py index 59cad5e..ab915fa 100644 --- a/kt-sft/ktransformers/__init__.py +++ b/kt-sft/ktransformers/__init__.py @@ -1,11 +1,20 @@ #!/usr/bin/env python # coding=utf-8 ''' -Description : +Description : Author : kkk1nak0 Date : 2024-08-15 07:34:46 Version : 1.0.0 -LastEditors : chenxl +LastEditors : chenxl LastEditTime : 2025-02-15 03:53:02 ''' -__version__ = "0.4.1" +import sys +import os + +# Import version from shared version.py at project root +_root_dir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) +sys.path.insert(0, _root_dir) +try: + from version import __version__ +finally: + sys.path.pop(0) diff --git a/version.py b/version.py new file mode 100644 index 0000000..d4766bc --- /dev/null +++ b/version.py @@ -0,0 +1,6 @@ +""" +KTransformers version information. +Shared across kt-kernel and kt-sft modules. +""" + +__version__ = "0.4.3"