From eaf8ad10411b84bf4a409b5ba88071ae7f42072c Mon Sep 17 00:00:00 2001 From: Thomas Date: Mon, 10 Mar 2025 15:28:33 +0100 Subject: [PATCH] Update chat.py, include multi-line input support and context clearing through input (#738) * Update chat.py, include multi-line input support and context clearing - Enable multi-line input (mli) support through the -mli argument. When using mli, end input with the EOF char (return/Ctrl+D on Unix, return/Ctrl+Z/return on Windows) - Allow context clearing outside of amnesia by inputting "clear" * Adding qwq chat mode, adding the ability to forget thinking context --- .../workflows/build-wheels-release-rocm62.yml | 217 ++++++++++++++++++ .github/workflows/build-wheels-release.yml | 111 +++------ examples/chat.py | 24 +- examples/chat_prompts.py | 46 ++++ 4 files changed, 318 insertions(+), 80 deletions(-) create mode 100644 .github/workflows/build-wheels-release-rocm62.yml diff --git a/.github/workflows/build-wheels-release-rocm62.yml b/.github/workflows/build-wheels-release-rocm62.yml new file mode 100644 index 0000000..23a6d68 --- /dev/null +++ b/.github/workflows/build-wheels-release-rocm62.yml @@ -0,0 +1,217 @@ +name: Build Wheels & Release ROCm62 + +on: + workflow_dispatch: + inputs: + release: + description: 'Release? 1 = yes, 0 = no' + default: '0' + required: true + type: string + +permissions: + contents: write + +jobs: + build_wheels: + name: ${{ matrix.os }} P${{ matrix.pyver }} C${{ matrix.cuda }} R${{ matrix.rocm }} T${{ matrix.torch }} + runs-on: ${{ matrix.os }} + strategy: + matrix: + include: + + # Ubuntu 20.04 CUDA + + # ROCm 6.2 + - { artname: 'wheel', os: ubuntu-20.04-l, pyver: '3.10', cuda: '', rocm: '6.2', torch: '2.5.0', cudaarch: '' } + - { artname: 'wheel', os: ubuntu-20.04-l, pyver: '3.11', cuda: '', rocm: '6.2', torch: '2.5.0', cudaarch: '' } + - { artname: 'wheel', os: ubuntu-20.04-l, pyver: '3.12', cuda: '', rocm: '6.2', torch: '2.5.0', cudaarch: '' } + + # ROCm 6.2.4 + - { artname: 'wheel', os: ubuntu-20.04-l, pyver: '3.10', cuda: '', rocm: '6.2.4', torch: '2.6.0', cudaarch: '' } + - { artname: 'wheel', os: ubuntu-20.04-l, pyver: '3.11', cuda: '', rocm: '6.2.4', torch: '2.6.0', cudaarch: '' } + - { artname: 'wheel', os: ubuntu-20.04-l, pyver: '3.12', cuda: '', rocm: '6.2.4', torch: '2.6.0', cudaarch: '' } + - { artname: 'wheel', os: ubuntu-20.04-l, pyver: '3.13', cuda: '', rocm: '6.2.4', torch: '2.6.0', cudaarch: '' } + + fail-fast: false + + defaults: + run: + shell: pwsh + + steps: + # Free disk space + + - name: Free Disk Space + uses: jlumbroso/free-disk-space@v1.3.1 + if: runner.os == 'Linux' + with: + tool-cache: true + android: true + dotnet: true + haskell: true + large-packages: false + swap-storage: true + + # Setup Python + + - uses: actions/checkout@v4 + + - uses: actions/setup-python@v5.4.0 + with: + python-version: ${{ matrix.pyver }} + + # Get version string from package + + - name: Get version string + id: package_version + run: | + $versionString = Get-Content $(Join-Path 'exllamav2' 'version.py') -raw + if ($versionString -match '__version__ = "(\d+\.(?:\d+\.?(?:dev\d+)?)*)"') + { + Write-Output $('::notice file=build-wheels-release.yml,line=200,title=Package Version::Detected package version is: {0}' -f $Matches[1]) + Write-Output "PACKAGE_VERSION=$($Matches[1])" >> "$env:GITHUB_OUTPUT" + } + else + { + Write-Output '::error file=build-wheels-release.yml,line=203::Could not parse version from exllamav2/version.py! You must upload wheels manually!' + Write-Output "PACKAGE_VERSION=None" >> "$env:GITHUB_OUTPUT" + } + + # Pin VS build tools to 17.9 so builds won't fail + + - name: Install VS2022 BuildTools 17.9.7 + run: choco install -y visualstudio2022buildtools --version=117.9.7.0 --params "--add Microsoft.VisualStudio.Component.VC.Tools.x86.x64 --installChannelUri https://aka.ms/vs/17/release/180911598_-255012421/channel" + if: runner.os == 'Windows' + + # Install ROCm SDK, apparently needs to happen before setting up Python + + - name: Build for ROCm + if: matrix.rocm != '' + shell: bash + run: | + # --- Install ROCm SDK + + export ROCM_VERSION=${{ matrix.rocm }} + export TORCH_VERSION=${{ matrix.torch }} + + [ ! -d /etc/apt/keyrings ] && sudo mkdir --parents --mode=0755 /etc/apt/keyrings + wget https://repo.radeon.com/rocm/rocm.gpg.key -O - | gpg --dearmor | sudo tee /etc/apt/keyrings/rocm.gpg > /dev/null + echo "deb [arch=amd64 signed-by=/etc/apt/keyrings/rocm.gpg] https://repo.radeon.com/rocm/apt/$ROCM_VERSION focal main" | sudo tee --append /etc/apt/sources.list.d/rocm.list + echo -e 'Package: *\nPin: release o=repo.radeon.com\nPin-Priority: 600' | sudo tee /etc/apt/preferences.d/rocm-pin-600 + + sudo apt update + sudo apt install rocm-hip-sdk -y + sudo apt clean -y + + echo "/opt/rocm/bin" >> $GITHUB_PATH + echo "ROCM_PATH=/opt/rocm" >> $GITHUB_ENV + echo "ROCM_VERSION=$ROCM_VERSION" >> $GITHUB_ENV + echo "USE_ROCM=1" >> $GITHUB_ENV + + # --- Install dependencies + + python3 -m ensurepip --upgrade + pip3 install torch==${{ matrix.torch }} --index-url="https://download.pytorch.org/whl/rocm$ROCM_VERSION" + pip3 install --upgrade setuptools==69.5.1 build wheel safetensors sentencepiece ninja + pip3 cache purge + + # --- Build wheel + + python3 -m build -n --wheel -C--build-option=egg_info "-C--build-option=--tag-build=+rocm${{ matrix.rocm }}-torch${{ matrix.torch }}" + + # Build for CUDA + + - name: Setup Mamba + if: matrix.cuda != '' + uses: conda-incubator/setup-miniconda@v3.1.0 + with: + activate-environment: "exllama" + python-version: ${{ matrix.pyver }} +# miniforge-variant: Mambaforge + miniforge-version: latest +# use-mamba: true + add-pip-as-python-dependency: true + auto-activate-base: false + + - name: Build for CUDA + if: matrix.cuda != '' + run: | + # --- Spawn the VS shell + if ($IsWindows) { + Import-Module 'C:\Program Files (x86)\Microsoft Visual Studio\2022\BuildTools\Common7\Tools\Microsoft.VisualStudio.DevShell.dll' + Enter-VsDevShell -VsInstallPath 'C:\Program Files (x86)\Microsoft Visual Studio\2022\BuildTools' -DevCmdArguments '-arch=x64 -host_arch=x64' + $env:DISTUTILS_USE_SDK=1 + } + + # --- Install CUDA using Conda + $cudaVersion = '${{ matrix.cuda }}' + $cudaVersionPytorch = '${{ matrix.cuda }}'.Remove('${{ matrix.cuda }}'.LastIndexOf('.')).Replace('.','') + + $env:MAMBA_NO_LOW_SPEED_LIMIT = 1 + mamba install -y -c nvidia/label/cuda-$cudaVersion cuda-toolkit cuda-runtime + + if (!(mamba list cuda)[-1].contains('cuda')) {sleep -s 10; mamba install -y 'cuda' $cudaVersion} + if (!(mamba list cuda)[-1].contains('cuda')) {throw 'CUDA Toolkit failed to install!'} + + $env:CUDA_PATH = $env:CONDA_PREFIX + $env:CUDA_HOME = $env:CONDA_PREFIX + if ($IsLinux) {$env:LD_LIBRARY_PATH = $env:CONDA_PREFIX + '/lib:' + $env:LD_LIBRARY_PATH} + + # --- Install dependencies + + python -m ensurepip --upgrade + python -m pip install torch==${{ matrix.torch }} --index-url https://download.pytorch.org/whl/cu$cudaVersionPytorch + python -m pip install --upgrade setuptools==69.5.1 build wheel safetensors sentencepiece ninja + + # --- Build wheel + + $BUILDTAG = "+cu$cudaVersionPytorch-torch${{ matrix.torch }}" + $env:TORCH_CUDA_ARCH_LIST = '${{ matrix.cudaarch }}' + python -m build -n --wheel -C--build-option=egg_info "-C--build-option=--tag-build=$BUILDTAG" + + # Build sdist + + - name: Build sdist + if: matrix.cuda == '' && matrix.rocm == '' + run: | + # --- Spawn the VS shell + if ($IsWindows) { + Import-Module 'C:\Program Files (x86)\Microsoft Visual Studio\2022\BuildTools\Common7\Tools\Microsoft.VisualStudio.DevShell.dll' + Enter-VsDevShell -VsInstallPath 'C:\Program Files (x86)\Microsoft Visual Studio\2022\BuildTools' -DevCmdArguments '-arch=x64 -host_arch=x64' + $env:DISTUTILS_USE_SDK=1 + } + + # --- Install dependencies + + python -m pip install torch==${{ matrix.torch }} --index-url https://download.pytorch.org/whl/cpu + python -m pip install build wheel ninja + + # --- Build wheel + + $env:EXLLAMA_NOCOMPILE=1 + python -m build -n + + # Upload files + + - uses: actions/upload-artifact@v4 + if: matrix.artname == 'wheel' + with: + name: wheel-${{ matrix.os }}-py${{ matrix.pyver }}-cuda${{ matrix.cuda }}-torch${{ matrix.torch }} + path: ./dist/* + + - uses: actions/upload-artifact@v4 + if: matrix.artname == 'sdist' + with: + name: 'sdist' + path: ./dist/* + + - name: Upload files to GitHub release + if: steps.package_version.outputs.PACKAGE_VERSION != 'None' && inputs.release == '1' + uses: svenstaro/upload-release-action@2.6.1 + with: + file: ./dist/*.whl + tag: ${{ format('v{0}', steps.package_version.outputs.PACKAGE_VERSION) }} + file_glob: true + overwrite: true + release_name: ${{ steps.package_version.outputs.PACKAGE_VERSION }} diff --git a/.github/workflows/build-wheels-release.yml b/.github/workflows/build-wheels-release.yml index 10b7e13..c890cdd 100644 --- a/.github/workflows/build-wheels-release.yml +++ b/.github/workflows/build-wheels-release.yml @@ -22,41 +22,25 @@ jobs: # Ubuntu 20.04 CUDA - # Python 3.8 - - { artname: 'wheel', os: ubuntu-20.04, pyver: '3.8', cuda: '11.7.0', rocm: '', torch: '2.0.1', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6+PTX' } - - { artname: 'wheel', os: ubuntu-20.04, pyver: '3.8', cuda: '11.8.0', rocm: '', torch: '2.3.1', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX' } - - { artname: 'wheel', os: ubuntu-20.04, pyver: '3.8', cuda: '12.1.0', rocm: '', torch: '2.3.1', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX' } - - { artname: 'wheel', os: ubuntu-20.04, pyver: '3.8', cuda: '11.8.0', rocm: '', torch: '2.4.0', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX' } - - { artname: 'wheel', os: ubuntu-20.04, pyver: '3.8', cuda: '12.1.0', rocm: '', torch: '2.4.0', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX' } - - { artname: 'wheel', os: ubuntu-20.04, pyver: '3.8', cuda: '11.8.0', rocm: '', torch: '2.5.0', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX' } - - { artname: 'wheel', os: ubuntu-20.04, pyver: '3.8', cuda: '12.1.0', rocm: '', torch: '2.5.0', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX' } - - # Python 3.9 - - { artname: 'wheel', os: ubuntu-20.04, pyver: '3.9', cuda: '11.7.0', rocm: '', torch: '2.0.1', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6+PTX' } - - { artname: 'wheel', os: ubuntu-20.04, pyver: '3.9', cuda: '11.8.0', rocm: '', torch: '2.3.1', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX' } - - { artname: 'wheel', os: ubuntu-20.04, pyver: '3.9', cuda: '12.1.0', rocm: '', torch: '2.3.1', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX' } - - { artname: 'wheel', os: ubuntu-20.04, pyver: '3.9', cuda: '11.8.0', rocm: '', torch: '2.4.0', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX' } - - { artname: 'wheel', os: ubuntu-20.04, pyver: '3.9', cuda: '12.1.0', rocm: '', torch: '2.4.0', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX' } - - { artname: 'wheel', os: ubuntu-20.04, pyver: '3.9', cuda: '11.8.0', rocm: '', torch: '2.5.0', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX' } - - { artname: 'wheel', os: ubuntu-20.04, pyver: '3.9', cuda: '12.1.0', rocm: '', torch: '2.5.0', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX' } - # Python 3.10 - - { artname: 'wheel', os: ubuntu-20.04, pyver: '3.10', cuda: '11.7.0', rocm: '', torch: '2.0.1', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6+PTX' } - { artname: 'wheel', os: ubuntu-20.04, pyver: '3.10', cuda: '11.8.0', rocm: '', torch: '2.3.1', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX' } - { artname: 'wheel', os: ubuntu-20.04, pyver: '3.10', cuda: '12.1.0', rocm: '', torch: '2.3.1', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX' } - { artname: 'wheel', os: ubuntu-20.04, pyver: '3.10', cuda: '11.8.0', rocm: '', torch: '2.4.0', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX' } - { artname: 'wheel', os: ubuntu-20.04, pyver: '3.10', cuda: '12.1.0', rocm: '', torch: '2.4.0', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX' } - { artname: 'wheel', os: ubuntu-20.04, pyver: '3.10', cuda: '11.8.0', rocm: '', torch: '2.5.0', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX' } - { artname: 'wheel', os: ubuntu-20.04, pyver: '3.10', cuda: '12.1.0', rocm: '', torch: '2.5.0', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX' } + - { artname: 'wheel', os: ubuntu-20.04, pyver: '3.10', cuda: '11.8.0', rocm: '', torch: '2.6.0', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX' } + - { artname: 'wheel', os: ubuntu-20.04, pyver: '3.10', cuda: '12.4.0', rocm: '', torch: '2.6.0', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX' } # Python 3.11 - - { artname: 'wheel', os: ubuntu-20.04, pyver: '3.11', cuda: '11.7.0', rocm: '', torch: '2.0.1', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6+PTX' } - { artname: 'wheel', os: ubuntu-20.04, pyver: '3.11', cuda: '11.8.0', rocm: '', torch: '2.3.1', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX' } - { artname: 'wheel', os: ubuntu-20.04, pyver: '3.11', cuda: '12.1.0', rocm: '', torch: '2.3.1', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX' } - { artname: 'wheel', os: ubuntu-20.04, pyver: '3.11', cuda: '11.8.0', rocm: '', torch: '2.4.0', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX' } - { artname: 'wheel', os: ubuntu-20.04, pyver: '3.11', cuda: '12.1.0', rocm: '', torch: '2.4.0', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX' } - { artname: 'wheel', os: ubuntu-20.04, pyver: '3.11', cuda: '11.8.0', rocm: '', torch: '2.5.0', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX' } - { artname: 'wheel', os: ubuntu-20.04, pyver: '3.11', cuda: '12.1.0', rocm: '', torch: '2.5.0', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX' } + - { artname: 'wheel', os: ubuntu-20.04, pyver: '3.11', cuda: '11.8.0', rocm: '', torch: '2.6.0', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX' } + - { artname: 'wheel', os: ubuntu-20.04, pyver: '3.11', cuda: '12.4.0', rocm: '', torch: '2.6.0', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX' } # Python 3.12 - { artname: 'wheel', os: ubuntu-20.04, pyver: '3.12', cuda: '11.8.0', rocm: '', torch: '2.3.1', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX' } @@ -65,46 +49,36 @@ jobs: - { artname: 'wheel', os: ubuntu-20.04, pyver: '3.12', cuda: '12.1.0', rocm: '', torch: '2.4.0', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX' } - { artname: 'wheel', os: ubuntu-20.04, pyver: '3.12', cuda: '11.8.0', rocm: '', torch: '2.5.0', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX' } - { artname: 'wheel', os: ubuntu-20.04, pyver: '3.12', cuda: '12.1.0', rocm: '', torch: '2.5.0', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX' } + - { artname: 'wheel', os: ubuntu-20.04, pyver: '3.12', cuda: '11.8.0', rocm: '', torch: '2.6.0', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX' } + - { artname: 'wheel', os: ubuntu-20.04, pyver: '3.12', cuda: '12.4.0', rocm: '', torch: '2.6.0', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX' } + + # Python 3.13 + - { artname: 'wheel', os: ubuntu-20.04, pyver: '3.13', cuda: '11.8.0', rocm: '', torch: '2.5.0', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX' } + - { artname: 'wheel', os: ubuntu-20.04, pyver: '3.13', cuda: '12.1.0', rocm: '', torch: '2.5.0', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX' } + - { artname: 'wheel', os: ubuntu-20.04, pyver: '3.13', cuda: '11.8.0', rocm: '', torch: '2.6.0', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX' } + - { artname: 'wheel', os: ubuntu-20.04, pyver: '3.13', cuda: '12.4.0', rocm: '', torch: '2.6.0', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX' } # Windows 2022 CUDA - # Python 3.8 - - { artname: 'wheel', os: windows-2022, pyver: '3.8', cuda: '11.7.0', rocm: '', torch: '2.0.1', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6+PTX' } - - { artname: 'wheel', os: windows-2022, pyver: '3.8', cuda: '11.8.0', rocm: '', torch: '2.3.1', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX' } - - { artname: 'wheel', os: windows-2022, pyver: '3.8', cuda: '12.1.0', rocm: '', torch: '2.3.1', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX' } - - { artname: 'wheel', os: windows-2022, pyver: '3.8', cuda: '11.8.0', rocm: '', torch: '2.4.0', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX' } - - { artname: 'wheel', os: windows-2022, pyver: '3.8', cuda: '12.1.0', rocm: '', torch: '2.4.0', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX' } - - { artname: 'wheel', os: windows-2022, pyver: '3.8', cuda: '11.8.0', rocm: '', torch: '2.5.0', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX' } - - { artname: 'wheel', os: windows-2022, pyver: '3.8', cuda: '12.1.0', rocm: '', torch: '2.5.0', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX' } - - # Python 3.9 - - { artname: 'wheel', os: windows-2022, pyver: '3.9', cuda: '11.7.0', rocm: '', torch: '2.0.1', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6+PTX' } - - { artname: 'wheel', os: windows-2022, pyver: '3.9', cuda: '11.8.0', rocm: '', torch: '2.3.1', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX' } - - { artname: 'wheel', os: windows-2022, pyver: '3.9', cuda: '12.1.0', rocm: '', torch: '2.3.1', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX' } - - { artname: 'wheel', os: windows-2022, pyver: '3.9', cuda: '11.8.0', rocm: '', torch: '2.4.0', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX' } - - { artname: 'wheel', os: windows-2022, pyver: '3.9', cuda: '12.1.0', rocm: '', torch: '2.4.0', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX' } - - { artname: 'wheel', os: windows-2022, pyver: '3.9', cuda: '11.8.0', rocm: '', torch: '2.5.0', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX' } - - { artname: 'wheel', os: windows-2022, pyver: '3.9', cuda: '12.1.0', rocm: '', torch: '2.5.0', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX' } - # Python 3.10 - - { artname: 'wheel', os: windows-2022, pyver: '3.10', cuda: '11.7.0', rocm: '', torch: '2.0.1', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6+PTX' } - { artname: 'wheel', os: windows-2022, pyver: '3.10', cuda: '11.8.0', rocm: '', torch: '2.3.1', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX' } - { artname: 'wheel', os: windows-2022, pyver: '3.10', cuda: '12.1.0', rocm: '', torch: '2.3.1', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX' } - { artname: 'wheel', os: windows-2022, pyver: '3.10', cuda: '11.8.0', rocm: '', torch: '2.4.0', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX' } - { artname: 'wheel', os: windows-2022, pyver: '3.10', cuda: '12.1.0', rocm: '', torch: '2.4.0', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX' } - { artname: 'wheel', os: windows-2022, pyver: '3.10', cuda: '11.8.0', rocm: '', torch: '2.5.0', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX' } - { artname: 'wheel', os: windows-2022, pyver: '3.10', cuda: '12.1.0', rocm: '', torch: '2.5.0', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX' } + - { artname: 'wheel', os: windows-2022, pyver: '3.10', cuda: '11.8.0', rocm: '', torch: '2.6.0', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX' } + - { artname: 'wheel', os: windows-2022, pyver: '3.10', cuda: '12.4.0', rocm: '', torch: '2.6.0', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX' } # Python 3.11 - - { artname: 'wheel', os: windows-2022, pyver: '3.11', cuda: '11.8.0', rocm: '', torch: '2.4.0', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX' } - - { artname: 'wheel', os: windows-2022, pyver: '3.11', cuda: '12.1.0', rocm: '', torch: '2.4.0', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX' } - - { artname: 'wheel', os: windows-2022, pyver: '3.11', cuda: '11.7.0', rocm: '', torch: '2.0.1', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6+PTX' } - { artname: 'wheel', os: windows-2022, pyver: '3.11', cuda: '11.8.0', rocm: '', torch: '2.3.1', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX' } - { artname: 'wheel', os: windows-2022, pyver: '3.11', cuda: '12.1.0', rocm: '', torch: '2.3.1', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX' } - { artname: 'wheel', os: windows-2022, pyver: '3.11', cuda: '11.8.0', rocm: '', torch: '2.4.0', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX' } - { artname: 'wheel', os: windows-2022, pyver: '3.11', cuda: '12.1.0', rocm: '', torch: '2.4.0', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX' } - { artname: 'wheel', os: windows-2022, pyver: '3.11', cuda: '11.8.0', rocm: '', torch: '2.5.0', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX' } - { artname: 'wheel', os: windows-2022, pyver: '3.11', cuda: '12.1.0', rocm: '', torch: '2.5.0', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX' } + - { artname: 'wheel', os: windows-2022, pyver: '3.11', cuda: '11.8.0', rocm: '', torch: '2.6.0', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX' } + - { artname: 'wheel', os: windows-2022, pyver: '3.11', cuda: '12.4.0', rocm: '', torch: '2.6.0', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX' } # Python 3.12 - { artname: 'wheel', os: windows-2022, pyver: '3.12', cuda: '11.8.0', rocm: '', torch: '2.3.1', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX' } @@ -113,6 +87,12 @@ jobs: - { artname: 'wheel', os: windows-2022, pyver: '3.12', cuda: '12.1.0', rocm: '', torch: '2.4.0', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX' } - { artname: 'wheel', os: windows-2022, pyver: '3.12', cuda: '11.8.0', rocm: '', torch: '2.5.0', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX' } - { artname: 'wheel', os: windows-2022, pyver: '3.12', cuda: '12.1.0', rocm: '', torch: '2.5.0', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX' } + - { artname: 'wheel', os: windows-2022, pyver: '3.12', cuda: '11.8.0', rocm: '', torch: '2.6.0', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX' } + - { artname: 'wheel', os: windows-2022, pyver: '3.12', cuda: '12.4.0', rocm: '', torch: '2.6.0', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX' } + + # Python 3.13 + - { artname: 'wheel', os: windows-2022, pyver: '3.13', cuda: '11.8.0', rocm: '', torch: '2.6.0', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX' } + - { artname: 'wheel', os: windows-2022, pyver: '3.13', cuda: '12.4.0', rocm: '', torch: '2.6.0', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX' } # Ubuntu 20.04 ROCm @@ -130,36 +110,9 @@ jobs: - { artname: 'wheel', os: ubuntu-20.04, pyver: '3.11', cuda: '', rocm: '6.1', torch: '2.4.0', cudaarch: '' } - { artname: 'wheel', os: ubuntu-20.04, pyver: '3.12', cuda: '', rocm: '6.1', torch: '2.4.0', cudaarch: '' } - # ROCm 6.2 - - { artname: 'wheel', os: ubuntu-20.04-l, pyver: '3.10', cuda: '', rocm: '6.2', torch: '2.5.0', cudaarch: '' } - - { artname: 'wheel', os: ubuntu-20.04-l, pyver: '3.11', cuda: '', rocm: '6.2', torch: '2.5.0', cudaarch: '' } - - { artname: 'wheel', os: ubuntu-20.04-l, pyver: '3.12', cuda: '', rocm: '6.2', torch: '2.5.0', cudaarch: '' } - # sdist - { artname: 'sdist', os: ubuntu-20.04, pyver: '3.11', cuda: '', rocm: '', torch: '2.3.1', cudaarch: '' } - # Extra Torch 2.2 wheels for Windows 2022 until PyTorch resolves the shm.dll issue - - # Python 3.8 - - { artname: 'wheel', os: windows-2022, pyver: '3.8', cuda: '11.8.0', rocm: '', torch: '2.2.0', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX' } - - { artname: 'wheel', os: windows-2022, pyver: '3.8', cuda: '12.1.0', rocm: '', torch: '2.2.0', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX' } - - # Python 3.9 - - { artname: 'wheel', os: windows-2022, pyver: '3.9', cuda: '11.8.0', rocm: '', torch: '2.2.0', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX' } - - { artname: 'wheel', os: windows-2022, pyver: '3.9', cuda: '12.1.0', rocm: '', torch: '2.2.0', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX' } - - # Python 3.10 - - { artname: 'wheel', os: windows-2022, pyver: '3.10', cuda: '11.8.0', rocm: '', torch: '2.2.0', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX' } - - { artname: 'wheel', os: windows-2022, pyver: '3.10', cuda: '12.1.0', rocm: '', torch: '2.2.0', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX' } - - # Python 3.11 - - { artname: 'wheel', os: windows-2022, pyver: '3.11', cuda: '11.8.0', rocm: '', torch: '2.2.0', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX' } - - { artname: 'wheel', os: windows-2022, pyver: '3.11', cuda: '12.1.0', rocm: '', torch: '2.2.0', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX' } - - # Python 3.12 - - { artname: 'wheel', os: windows-2022, pyver: '3.12', cuda: '11.8.0', rocm: '', torch: '2.2.0', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX' } - - { artname: 'wheel', os: windows-2022, pyver: '3.12', cuda: '12.1.0', rocm: '', torch: '2.2.0', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX' } - # Extra wheel for HF spaces - { artname: 'wheel', os: ubuntu-20.04, pyver: '3.10', cuda: '12.1.0', rocm: '', torch: '2.2.2', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX' } @@ -185,9 +138,9 @@ jobs: # Setup Python - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - - uses: actions/setup-python@v3 + - uses: actions/setup-python@v5.4.0 with: python-version: ${{ matrix.pyver }} @@ -243,7 +196,7 @@ jobs: python3 -m ensurepip --upgrade pip3 install torch==${{ matrix.torch }} --index-url="https://download.pytorch.org/whl/rocm$ROCM_VERSION" - pip3 install --upgrade setuptools==69.5.1 build wheel safetensors sentencepiece ninja + pip3 install --upgrade setuptools==69.5.1 build wheel safetensors sentencepiece ninja numpy pip3 cache purge # --- Build wheel @@ -254,13 +207,13 @@ jobs: - name: Setup Mamba if: matrix.cuda != '' - uses: conda-incubator/setup-miniconda@v2.3.0 + uses: conda-incubator/setup-miniconda@v3.1.0 with: activate-environment: "exllama" python-version: ${{ matrix.pyver }} - miniforge-variant: Mambaforge +# miniforge-variant: Mambaforge miniforge-version: latest - use-mamba: true +# use-mamba: true add-pip-as-python-dependency: true auto-activate-base: false @@ -292,7 +245,7 @@ jobs: python -m ensurepip --upgrade python -m pip install torch==${{ matrix.torch }} --index-url https://download.pytorch.org/whl/cu$cudaVersionPytorch - python -m pip install --upgrade setuptools==69.5.1 build wheel safetensors sentencepiece ninja + python -m pip install --upgrade setuptools==69.5.1 build wheel safetensors sentencepiece ninja numpy # --- Build wheel @@ -324,13 +277,13 @@ jobs: # Upload files - - uses: actions/upload-artifact@v3 + - uses: actions/upload-artifact@v4 if: matrix.artname == 'wheel' with: - name: 'wheel' + name: wheel-${{ matrix.os }}-py${{ matrix.pyver }}-cuda${{ matrix.cuda }}-torch${{ matrix.torch }} path: ./dist/* - - uses: actions/upload-artifact@v3 + - uses: actions/upload-artifact@v4 if: matrix.artname == 'sdist' with: name: 'sdist' diff --git a/examples/chat.py b/examples/chat.py index 3c9552d..79ecdb3 100644 --- a/examples/chat.py +++ b/examples/chat.py @@ -72,6 +72,7 @@ parser.add_argument("-cq8", "--cache_q8", action = "store_true", help = "Use Q8 parser.add_argument("-ngram", "--ngram_decoding", action = "store_true", help = "Use n-gram speculative decoding") +parser.add_argument("-mli", "--mli", action = "store_true", help = "Enable multi line input") parser.add_argument("-pt", "--print_timings", action = "store_true", help = "Output timings/stats after each prompt") parser.add_argument("-amnesia", "--amnesia", action = "store_true", help = "Forget context after every response") @@ -301,7 +302,22 @@ while True: # Get user prompt print() - up = input(col_user + username + ": " + col_default).strip() + print(col_user + username + ": " + col_default, end='', flush=True) + + # multi-lin support + if args.mli: + content = sys.stdin.read().rstrip() + else: + content = input().strip() + + # clear context + if content == "clear": + user_prompts = [] + responses_ids = [] + print(col_user + "Context cleared." + col_default, end='', flush=True) + continue + + up = username + ": " + content print() # Add to context @@ -337,6 +353,12 @@ while True: tokens = res["chunk_token_ids"] if len(response_text) == 0: chunk = chunk.lstrip() + + # trim thinking from context for qwq model + if args.mode == "qwq" and chunk == "": + chunk = "end of thinking" + responses_ids[-1] = torch.empty((1, 0), dtype = torch.long) + response_text += chunk responses_ids[-1] = torch.cat([responses_ids[-1], tokens], dim = -1) diff --git a/examples/chat_prompts.py b/examples/chat_prompts.py index 93ef63b..41cf2ba 100644 --- a/examples/chat_prompts.py +++ b/examples/chat_prompts.py @@ -210,6 +210,51 @@ class PromptFormat_codellama(PromptFormat_llama): """You are a helpful coding assistant. Always answer as helpfully as possible.""" +class PromptFormat_qwq(PromptFormat): + + description = "Qwen QwQ format" + + def __init__(self): + super().__init__() + pass + + def default_system_prompt(self): + return \ + f"""You are a useful coding assistant, who thinks before answering.""" + + def first_prompt(self, sysprompt): + r = "" + if sysprompt: + r += \ + """<|im_start|>system\n""" + \ + """<|system_prompt|>""" + \ + """<|im_end|>\n""" + r += \ + """<|im_start|>user\n""" + \ + """<|user_prompt|><|im_end|>\n""" + \ + """<|im_start|>assistant\n\n""" + return r + + def subs_prompt(self): + return \ + """<|im_end|>\n""" + \ + """<|im_start|>user\n""" + \ + """<|user_prompt|><|im_end|>\n""" + \ + """<|im_start|>assistant\n\n""" + + def stop_conditions(self, tokenizer): + return \ + [tokenizer.eos_token_id, + tokenizer.single_id("<|im_end|>"), + """<|im_end|>"""] + + def encoding_options(self): + return False, False, True + + def print_extra_newline(self): + return True + + class PromptFormat_chatml(PromptFormat): description = "ChatML format, as used by e.g. (Mistral)Orca" @@ -635,6 +680,7 @@ prompt_formats = \ "llama": PromptFormat_llama, "llama3": PromptFormat_llama3, "codellama": PromptFormat_codellama, + "qwq": PromptFormat_qwq, "chatml": PromptFormat_chatml, "tinyllama": PromptFormat_tinyllama, "zephyr": PromptFormat_zephyr,