Update chat.py, include multi-line input support and context clearing through input (#738)

* Update chat.py, include multi-line input support and context clearing

- Enable multi-line input (mli) support through the -mli argument. When using mli, end input with the EOF char (return/Ctrl+D on Unix, return/Ctrl+Z/return on Windows)
- Allow context clearing outside of amnesia by inputting "clear"

* Adding qwq chat mode, adding the ability to forget thinking context
This commit is contained in:
Thomas
2025-03-10 15:28:33 +01:00
committed by GitHub
parent d8fa1a8250
commit eaf8ad1041
4 changed files with 318 additions and 80 deletions

View File

@@ -0,0 +1,217 @@
name: Build Wheels & Release ROCm62
on:
workflow_dispatch:
inputs:
release:
description: 'Release? 1 = yes, 0 = no'
default: '0'
required: true
type: string
permissions:
contents: write
jobs:
build_wheels:
name: ${{ matrix.os }} P${{ matrix.pyver }} C${{ matrix.cuda }} R${{ matrix.rocm }} T${{ matrix.torch }}
runs-on: ${{ matrix.os }}
strategy:
matrix:
include:
# Ubuntu 20.04 CUDA
# ROCm 6.2
- { artname: 'wheel', os: ubuntu-20.04-l, pyver: '3.10', cuda: '', rocm: '6.2', torch: '2.5.0', cudaarch: '' }
- { artname: 'wheel', os: ubuntu-20.04-l, pyver: '3.11', cuda: '', rocm: '6.2', torch: '2.5.0', cudaarch: '' }
- { artname: 'wheel', os: ubuntu-20.04-l, pyver: '3.12', cuda: '', rocm: '6.2', torch: '2.5.0', cudaarch: '' }
# ROCm 6.2.4
- { artname: 'wheel', os: ubuntu-20.04-l, pyver: '3.10', cuda: '', rocm: '6.2.4', torch: '2.6.0', cudaarch: '' }
- { artname: 'wheel', os: ubuntu-20.04-l, pyver: '3.11', cuda: '', rocm: '6.2.4', torch: '2.6.0', cudaarch: '' }
- { artname: 'wheel', os: ubuntu-20.04-l, pyver: '3.12', cuda: '', rocm: '6.2.4', torch: '2.6.0', cudaarch: '' }
- { artname: 'wheel', os: ubuntu-20.04-l, pyver: '3.13', cuda: '', rocm: '6.2.4', torch: '2.6.0', cudaarch: '' }
fail-fast: false
defaults:
run:
shell: pwsh
steps:
# Free disk space
- name: Free Disk Space
uses: jlumbroso/free-disk-space@v1.3.1
if: runner.os == 'Linux'
with:
tool-cache: true
android: true
dotnet: true
haskell: true
large-packages: false
swap-storage: true
# Setup Python
- uses: actions/checkout@v4
- uses: actions/setup-python@v5.4.0
with:
python-version: ${{ matrix.pyver }}
# Get version string from package
- name: Get version string
id: package_version
run: |
$versionString = Get-Content $(Join-Path 'exllamav2' 'version.py') -raw
if ($versionString -match '__version__ = "(\d+\.(?:\d+\.?(?:dev\d+)?)*)"')
{
Write-Output $('::notice file=build-wheels-release.yml,line=200,title=Package Version::Detected package version is: {0}' -f $Matches[1])
Write-Output "PACKAGE_VERSION=$($Matches[1])" >> "$env:GITHUB_OUTPUT"
}
else
{
Write-Output '::error file=build-wheels-release.yml,line=203::Could not parse version from exllamav2/version.py! You must upload wheels manually!'
Write-Output "PACKAGE_VERSION=None" >> "$env:GITHUB_OUTPUT"
}
# Pin VS build tools to 17.9 so builds won't fail
- name: Install VS2022 BuildTools 17.9.7
run: choco install -y visualstudio2022buildtools --version=117.9.7.0 --params "--add Microsoft.VisualStudio.Component.VC.Tools.x86.x64 --installChannelUri https://aka.ms/vs/17/release/180911598_-255012421/channel"
if: runner.os == 'Windows'
# Install ROCm SDK, apparently needs to happen before setting up Python
- name: Build for ROCm
if: matrix.rocm != ''
shell: bash
run: |
# --- Install ROCm SDK
export ROCM_VERSION=${{ matrix.rocm }}
export TORCH_VERSION=${{ matrix.torch }}
[ ! -d /etc/apt/keyrings ] && sudo mkdir --parents --mode=0755 /etc/apt/keyrings
wget https://repo.radeon.com/rocm/rocm.gpg.key -O - | gpg --dearmor | sudo tee /etc/apt/keyrings/rocm.gpg > /dev/null
echo "deb [arch=amd64 signed-by=/etc/apt/keyrings/rocm.gpg] https://repo.radeon.com/rocm/apt/$ROCM_VERSION focal main" | sudo tee --append /etc/apt/sources.list.d/rocm.list
echo -e 'Package: *\nPin: release o=repo.radeon.com\nPin-Priority: 600' | sudo tee /etc/apt/preferences.d/rocm-pin-600
sudo apt update
sudo apt install rocm-hip-sdk -y
sudo apt clean -y
echo "/opt/rocm/bin" >> $GITHUB_PATH
echo "ROCM_PATH=/opt/rocm" >> $GITHUB_ENV
echo "ROCM_VERSION=$ROCM_VERSION" >> $GITHUB_ENV
echo "USE_ROCM=1" >> $GITHUB_ENV
# --- Install dependencies
python3 -m ensurepip --upgrade
pip3 install torch==${{ matrix.torch }} --index-url="https://download.pytorch.org/whl/rocm$ROCM_VERSION"
pip3 install --upgrade setuptools==69.5.1 build wheel safetensors sentencepiece ninja
pip3 cache purge
# --- Build wheel
python3 -m build -n --wheel -C--build-option=egg_info "-C--build-option=--tag-build=+rocm${{ matrix.rocm }}-torch${{ matrix.torch }}"
# Build for CUDA
- name: Setup Mamba
if: matrix.cuda != ''
uses: conda-incubator/setup-miniconda@v3.1.0
with:
activate-environment: "exllama"
python-version: ${{ matrix.pyver }}
# miniforge-variant: Mambaforge
miniforge-version: latest
# use-mamba: true
add-pip-as-python-dependency: true
auto-activate-base: false
- name: Build for CUDA
if: matrix.cuda != ''
run: |
# --- Spawn the VS shell
if ($IsWindows) {
Import-Module 'C:\Program Files (x86)\Microsoft Visual Studio\2022\BuildTools\Common7\Tools\Microsoft.VisualStudio.DevShell.dll'
Enter-VsDevShell -VsInstallPath 'C:\Program Files (x86)\Microsoft Visual Studio\2022\BuildTools' -DevCmdArguments '-arch=x64 -host_arch=x64'
$env:DISTUTILS_USE_SDK=1
}
# --- Install CUDA using Conda
$cudaVersion = '${{ matrix.cuda }}'
$cudaVersionPytorch = '${{ matrix.cuda }}'.Remove('${{ matrix.cuda }}'.LastIndexOf('.')).Replace('.','')
$env:MAMBA_NO_LOW_SPEED_LIMIT = 1
mamba install -y -c nvidia/label/cuda-$cudaVersion cuda-toolkit cuda-runtime
if (!(mamba list cuda)[-1].contains('cuda')) {sleep -s 10; mamba install -y 'cuda' $cudaVersion}
if (!(mamba list cuda)[-1].contains('cuda')) {throw 'CUDA Toolkit failed to install!'}
$env:CUDA_PATH = $env:CONDA_PREFIX
$env:CUDA_HOME = $env:CONDA_PREFIX
if ($IsLinux) {$env:LD_LIBRARY_PATH = $env:CONDA_PREFIX + '/lib:' + $env:LD_LIBRARY_PATH}
# --- Install dependencies
python -m ensurepip --upgrade
python -m pip install torch==${{ matrix.torch }} --index-url https://download.pytorch.org/whl/cu$cudaVersionPytorch
python -m pip install --upgrade setuptools==69.5.1 build wheel safetensors sentencepiece ninja
# --- Build wheel
$BUILDTAG = "+cu$cudaVersionPytorch-torch${{ matrix.torch }}"
$env:TORCH_CUDA_ARCH_LIST = '${{ matrix.cudaarch }}'
python -m build -n --wheel -C--build-option=egg_info "-C--build-option=--tag-build=$BUILDTAG"
# Build sdist
- name: Build sdist
if: matrix.cuda == '' && matrix.rocm == ''
run: |
# --- Spawn the VS shell
if ($IsWindows) {
Import-Module 'C:\Program Files (x86)\Microsoft Visual Studio\2022\BuildTools\Common7\Tools\Microsoft.VisualStudio.DevShell.dll'
Enter-VsDevShell -VsInstallPath 'C:\Program Files (x86)\Microsoft Visual Studio\2022\BuildTools' -DevCmdArguments '-arch=x64 -host_arch=x64'
$env:DISTUTILS_USE_SDK=1
}
# --- Install dependencies
python -m pip install torch==${{ matrix.torch }} --index-url https://download.pytorch.org/whl/cpu
python -m pip install build wheel ninja
# --- Build wheel
$env:EXLLAMA_NOCOMPILE=1
python -m build -n
# Upload files
- uses: actions/upload-artifact@v4
if: matrix.artname == 'wheel'
with:
name: wheel-${{ matrix.os }}-py${{ matrix.pyver }}-cuda${{ matrix.cuda }}-torch${{ matrix.torch }}
path: ./dist/*
- uses: actions/upload-artifact@v4
if: matrix.artname == 'sdist'
with:
name: 'sdist'
path: ./dist/*
- name: Upload files to GitHub release
if: steps.package_version.outputs.PACKAGE_VERSION != 'None' && inputs.release == '1'
uses: svenstaro/upload-release-action@2.6.1
with:
file: ./dist/*.whl
tag: ${{ format('v{0}', steps.package_version.outputs.PACKAGE_VERSION) }}
file_glob: true
overwrite: true
release_name: ${{ steps.package_version.outputs.PACKAGE_VERSION }}

View File

@@ -22,41 +22,25 @@ jobs:
# Ubuntu 20.04 CUDA
# Python 3.8
- { artname: 'wheel', os: ubuntu-20.04, pyver: '3.8', cuda: '11.7.0', rocm: '', torch: '2.0.1', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6+PTX' }
- { artname: 'wheel', os: ubuntu-20.04, pyver: '3.8', cuda: '11.8.0', rocm: '', torch: '2.3.1', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX' }
- { artname: 'wheel', os: ubuntu-20.04, pyver: '3.8', cuda: '12.1.0', rocm: '', torch: '2.3.1', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX' }
- { artname: 'wheel', os: ubuntu-20.04, pyver: '3.8', cuda: '11.8.0', rocm: '', torch: '2.4.0', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX' }
- { artname: 'wheel', os: ubuntu-20.04, pyver: '3.8', cuda: '12.1.0', rocm: '', torch: '2.4.0', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX' }
- { artname: 'wheel', os: ubuntu-20.04, pyver: '3.8', cuda: '11.8.0', rocm: '', torch: '2.5.0', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX' }
- { artname: 'wheel', os: ubuntu-20.04, pyver: '3.8', cuda: '12.1.0', rocm: '', torch: '2.5.0', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX' }
# Python 3.9
- { artname: 'wheel', os: ubuntu-20.04, pyver: '3.9', cuda: '11.7.0', rocm: '', torch: '2.0.1', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6+PTX' }
- { artname: 'wheel', os: ubuntu-20.04, pyver: '3.9', cuda: '11.8.0', rocm: '', torch: '2.3.1', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX' }
- { artname: 'wheel', os: ubuntu-20.04, pyver: '3.9', cuda: '12.1.0', rocm: '', torch: '2.3.1', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX' }
- { artname: 'wheel', os: ubuntu-20.04, pyver: '3.9', cuda: '11.8.0', rocm: '', torch: '2.4.0', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX' }
- { artname: 'wheel', os: ubuntu-20.04, pyver: '3.9', cuda: '12.1.0', rocm: '', torch: '2.4.0', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX' }
- { artname: 'wheel', os: ubuntu-20.04, pyver: '3.9', cuda: '11.8.0', rocm: '', torch: '2.5.0', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX' }
- { artname: 'wheel', os: ubuntu-20.04, pyver: '3.9', cuda: '12.1.0', rocm: '', torch: '2.5.0', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX' }
# Python 3.10
- { artname: 'wheel', os: ubuntu-20.04, pyver: '3.10', cuda: '11.7.0', rocm: '', torch: '2.0.1', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6+PTX' }
- { artname: 'wheel', os: ubuntu-20.04, pyver: '3.10', cuda: '11.8.0', rocm: '', torch: '2.3.1', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX' }
- { artname: 'wheel', os: ubuntu-20.04, pyver: '3.10', cuda: '12.1.0', rocm: '', torch: '2.3.1', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX' }
- { artname: 'wheel', os: ubuntu-20.04, pyver: '3.10', cuda: '11.8.0', rocm: '', torch: '2.4.0', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX' }
- { artname: 'wheel', os: ubuntu-20.04, pyver: '3.10', cuda: '12.1.0', rocm: '', torch: '2.4.0', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX' }
- { artname: 'wheel', os: ubuntu-20.04, pyver: '3.10', cuda: '11.8.0', rocm: '', torch: '2.5.0', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX' }
- { artname: 'wheel', os: ubuntu-20.04, pyver: '3.10', cuda: '12.1.0', rocm: '', torch: '2.5.0', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX' }
- { artname: 'wheel', os: ubuntu-20.04, pyver: '3.10', cuda: '11.8.0', rocm: '', torch: '2.6.0', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX' }
- { artname: 'wheel', os: ubuntu-20.04, pyver: '3.10', cuda: '12.4.0', rocm: '', torch: '2.6.0', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX' }
# Python 3.11
- { artname: 'wheel', os: ubuntu-20.04, pyver: '3.11', cuda: '11.7.0', rocm: '', torch: '2.0.1', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6+PTX' }
- { artname: 'wheel', os: ubuntu-20.04, pyver: '3.11', cuda: '11.8.0', rocm: '', torch: '2.3.1', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX' }
- { artname: 'wheel', os: ubuntu-20.04, pyver: '3.11', cuda: '12.1.0', rocm: '', torch: '2.3.1', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX' }
- { artname: 'wheel', os: ubuntu-20.04, pyver: '3.11', cuda: '11.8.0', rocm: '', torch: '2.4.0', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX' }
- { artname: 'wheel', os: ubuntu-20.04, pyver: '3.11', cuda: '12.1.0', rocm: '', torch: '2.4.0', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX' }
- { artname: 'wheel', os: ubuntu-20.04, pyver: '3.11', cuda: '11.8.0', rocm: '', torch: '2.5.0', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX' }
- { artname: 'wheel', os: ubuntu-20.04, pyver: '3.11', cuda: '12.1.0', rocm: '', torch: '2.5.0', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX' }
- { artname: 'wheel', os: ubuntu-20.04, pyver: '3.11', cuda: '11.8.0', rocm: '', torch: '2.6.0', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX' }
- { artname: 'wheel', os: ubuntu-20.04, pyver: '3.11', cuda: '12.4.0', rocm: '', torch: '2.6.0', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX' }
# Python 3.12
- { artname: 'wheel', os: ubuntu-20.04, pyver: '3.12', cuda: '11.8.0', rocm: '', torch: '2.3.1', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX' }
@@ -65,46 +49,36 @@ jobs:
- { artname: 'wheel', os: ubuntu-20.04, pyver: '3.12', cuda: '12.1.0', rocm: '', torch: '2.4.0', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX' }
- { artname: 'wheel', os: ubuntu-20.04, pyver: '3.12', cuda: '11.8.0', rocm: '', torch: '2.5.0', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX' }
- { artname: 'wheel', os: ubuntu-20.04, pyver: '3.12', cuda: '12.1.0', rocm: '', torch: '2.5.0', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX' }
- { artname: 'wheel', os: ubuntu-20.04, pyver: '3.12', cuda: '11.8.0', rocm: '', torch: '2.6.0', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX' }
- { artname: 'wheel', os: ubuntu-20.04, pyver: '3.12', cuda: '12.4.0', rocm: '', torch: '2.6.0', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX' }
# Python 3.13
- { artname: 'wheel', os: ubuntu-20.04, pyver: '3.13', cuda: '11.8.0', rocm: '', torch: '2.5.0', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX' }
- { artname: 'wheel', os: ubuntu-20.04, pyver: '3.13', cuda: '12.1.0', rocm: '', torch: '2.5.0', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX' }
- { artname: 'wheel', os: ubuntu-20.04, pyver: '3.13', cuda: '11.8.0', rocm: '', torch: '2.6.0', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX' }
- { artname: 'wheel', os: ubuntu-20.04, pyver: '3.13', cuda: '12.4.0', rocm: '', torch: '2.6.0', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX' }
# Windows 2022 CUDA
# Python 3.8
- { artname: 'wheel', os: windows-2022, pyver: '3.8', cuda: '11.7.0', rocm: '', torch: '2.0.1', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6+PTX' }
- { artname: 'wheel', os: windows-2022, pyver: '3.8', cuda: '11.8.0', rocm: '', torch: '2.3.1', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX' }
- { artname: 'wheel', os: windows-2022, pyver: '3.8', cuda: '12.1.0', rocm: '', torch: '2.3.1', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX' }
- { artname: 'wheel', os: windows-2022, pyver: '3.8', cuda: '11.8.0', rocm: '', torch: '2.4.0', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX' }
- { artname: 'wheel', os: windows-2022, pyver: '3.8', cuda: '12.1.0', rocm: '', torch: '2.4.0', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX' }
- { artname: 'wheel', os: windows-2022, pyver: '3.8', cuda: '11.8.0', rocm: '', torch: '2.5.0', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX' }
- { artname: 'wheel', os: windows-2022, pyver: '3.8', cuda: '12.1.0', rocm: '', torch: '2.5.0', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX' }
# Python 3.9
- { artname: 'wheel', os: windows-2022, pyver: '3.9', cuda: '11.7.0', rocm: '', torch: '2.0.1', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6+PTX' }
- { artname: 'wheel', os: windows-2022, pyver: '3.9', cuda: '11.8.0', rocm: '', torch: '2.3.1', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX' }
- { artname: 'wheel', os: windows-2022, pyver: '3.9', cuda: '12.1.0', rocm: '', torch: '2.3.1', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX' }
- { artname: 'wheel', os: windows-2022, pyver: '3.9', cuda: '11.8.0', rocm: '', torch: '2.4.0', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX' }
- { artname: 'wheel', os: windows-2022, pyver: '3.9', cuda: '12.1.0', rocm: '', torch: '2.4.0', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX' }
- { artname: 'wheel', os: windows-2022, pyver: '3.9', cuda: '11.8.0', rocm: '', torch: '2.5.0', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX' }
- { artname: 'wheel', os: windows-2022, pyver: '3.9', cuda: '12.1.0', rocm: '', torch: '2.5.0', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX' }
# Python 3.10
- { artname: 'wheel', os: windows-2022, pyver: '3.10', cuda: '11.7.0', rocm: '', torch: '2.0.1', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6+PTX' }
- { artname: 'wheel', os: windows-2022, pyver: '3.10', cuda: '11.8.0', rocm: '', torch: '2.3.1', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX' }
- { artname: 'wheel', os: windows-2022, pyver: '3.10', cuda: '12.1.0', rocm: '', torch: '2.3.1', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX' }
- { artname: 'wheel', os: windows-2022, pyver: '3.10', cuda: '11.8.0', rocm: '', torch: '2.4.0', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX' }
- { artname: 'wheel', os: windows-2022, pyver: '3.10', cuda: '12.1.0', rocm: '', torch: '2.4.0', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX' }
- { artname: 'wheel', os: windows-2022, pyver: '3.10', cuda: '11.8.0', rocm: '', torch: '2.5.0', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX' }
- { artname: 'wheel', os: windows-2022, pyver: '3.10', cuda: '12.1.0', rocm: '', torch: '2.5.0', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX' }
- { artname: 'wheel', os: windows-2022, pyver: '3.10', cuda: '11.8.0', rocm: '', torch: '2.6.0', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX' }
- { artname: 'wheel', os: windows-2022, pyver: '3.10', cuda: '12.4.0', rocm: '', torch: '2.6.0', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX' }
# Python 3.11
- { artname: 'wheel', os: windows-2022, pyver: '3.11', cuda: '11.8.0', rocm: '', torch: '2.4.0', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX' }
- { artname: 'wheel', os: windows-2022, pyver: '3.11', cuda: '12.1.0', rocm: '', torch: '2.4.0', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX' }
- { artname: 'wheel', os: windows-2022, pyver: '3.11', cuda: '11.7.0', rocm: '', torch: '2.0.1', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6+PTX' }
- { artname: 'wheel', os: windows-2022, pyver: '3.11', cuda: '11.8.0', rocm: '', torch: '2.3.1', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX' }
- { artname: 'wheel', os: windows-2022, pyver: '3.11', cuda: '12.1.0', rocm: '', torch: '2.3.1', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX' }
- { artname: 'wheel', os: windows-2022, pyver: '3.11', cuda: '11.8.0', rocm: '', torch: '2.4.0', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX' }
- { artname: 'wheel', os: windows-2022, pyver: '3.11', cuda: '12.1.0', rocm: '', torch: '2.4.0', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX' }
- { artname: 'wheel', os: windows-2022, pyver: '3.11', cuda: '11.8.0', rocm: '', torch: '2.5.0', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX' }
- { artname: 'wheel', os: windows-2022, pyver: '3.11', cuda: '12.1.0', rocm: '', torch: '2.5.0', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX' }
- { artname: 'wheel', os: windows-2022, pyver: '3.11', cuda: '11.8.0', rocm: '', torch: '2.6.0', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX' }
- { artname: 'wheel', os: windows-2022, pyver: '3.11', cuda: '12.4.0', rocm: '', torch: '2.6.0', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX' }
# Python 3.12
- { artname: 'wheel', os: windows-2022, pyver: '3.12', cuda: '11.8.0', rocm: '', torch: '2.3.1', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX' }
@@ -113,6 +87,12 @@ jobs:
- { artname: 'wheel', os: windows-2022, pyver: '3.12', cuda: '12.1.0', rocm: '', torch: '2.4.0', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX' }
- { artname: 'wheel', os: windows-2022, pyver: '3.12', cuda: '11.8.0', rocm: '', torch: '2.5.0', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX' }
- { artname: 'wheel', os: windows-2022, pyver: '3.12', cuda: '12.1.0', rocm: '', torch: '2.5.0', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX' }
- { artname: 'wheel', os: windows-2022, pyver: '3.12', cuda: '11.8.0', rocm: '', torch: '2.6.0', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX' }
- { artname: 'wheel', os: windows-2022, pyver: '3.12', cuda: '12.4.0', rocm: '', torch: '2.6.0', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX' }
# Python 3.13
- { artname: 'wheel', os: windows-2022, pyver: '3.13', cuda: '11.8.0', rocm: '', torch: '2.6.0', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX' }
- { artname: 'wheel', os: windows-2022, pyver: '3.13', cuda: '12.4.0', rocm: '', torch: '2.6.0', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX' }
# Ubuntu 20.04 ROCm
@@ -130,36 +110,9 @@ jobs:
- { artname: 'wheel', os: ubuntu-20.04, pyver: '3.11', cuda: '', rocm: '6.1', torch: '2.4.0', cudaarch: '' }
- { artname: 'wheel', os: ubuntu-20.04, pyver: '3.12', cuda: '', rocm: '6.1', torch: '2.4.0', cudaarch: '' }
# ROCm 6.2
- { artname: 'wheel', os: ubuntu-20.04-l, pyver: '3.10', cuda: '', rocm: '6.2', torch: '2.5.0', cudaarch: '' }
- { artname: 'wheel', os: ubuntu-20.04-l, pyver: '3.11', cuda: '', rocm: '6.2', torch: '2.5.0', cudaarch: '' }
- { artname: 'wheel', os: ubuntu-20.04-l, pyver: '3.12', cuda: '', rocm: '6.2', torch: '2.5.0', cudaarch: '' }
# sdist
- { artname: 'sdist', os: ubuntu-20.04, pyver: '3.11', cuda: '', rocm: '', torch: '2.3.1', cudaarch: '' }
# Extra Torch 2.2 wheels for Windows 2022 until PyTorch resolves the shm.dll issue
# Python 3.8
- { artname: 'wheel', os: windows-2022, pyver: '3.8', cuda: '11.8.0', rocm: '', torch: '2.2.0', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX' }
- { artname: 'wheel', os: windows-2022, pyver: '3.8', cuda: '12.1.0', rocm: '', torch: '2.2.0', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX' }
# Python 3.9
- { artname: 'wheel', os: windows-2022, pyver: '3.9', cuda: '11.8.0', rocm: '', torch: '2.2.0', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX' }
- { artname: 'wheel', os: windows-2022, pyver: '3.9', cuda: '12.1.0', rocm: '', torch: '2.2.0', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX' }
# Python 3.10
- { artname: 'wheel', os: windows-2022, pyver: '3.10', cuda: '11.8.0', rocm: '', torch: '2.2.0', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX' }
- { artname: 'wheel', os: windows-2022, pyver: '3.10', cuda: '12.1.0', rocm: '', torch: '2.2.0', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX' }
# Python 3.11
- { artname: 'wheel', os: windows-2022, pyver: '3.11', cuda: '11.8.0', rocm: '', torch: '2.2.0', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX' }
- { artname: 'wheel', os: windows-2022, pyver: '3.11', cuda: '12.1.0', rocm: '', torch: '2.2.0', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX' }
# Python 3.12
- { artname: 'wheel', os: windows-2022, pyver: '3.12', cuda: '11.8.0', rocm: '', torch: '2.2.0', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX' }
- { artname: 'wheel', os: windows-2022, pyver: '3.12', cuda: '12.1.0', rocm: '', torch: '2.2.0', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX' }
# Extra wheel for HF spaces
- { artname: 'wheel', os: ubuntu-20.04, pyver: '3.10', cuda: '12.1.0', rocm: '', torch: '2.2.2', cudaarch: '6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX' }
@@ -185,9 +138,9 @@ jobs:
# Setup Python
- uses: actions/checkout@v3
- uses: actions/checkout@v4
- uses: actions/setup-python@v3
- uses: actions/setup-python@v5.4.0
with:
python-version: ${{ matrix.pyver }}
@@ -243,7 +196,7 @@ jobs:
python3 -m ensurepip --upgrade
pip3 install torch==${{ matrix.torch }} --index-url="https://download.pytorch.org/whl/rocm$ROCM_VERSION"
pip3 install --upgrade setuptools==69.5.1 build wheel safetensors sentencepiece ninja
pip3 install --upgrade setuptools==69.5.1 build wheel safetensors sentencepiece ninja numpy
pip3 cache purge
# --- Build wheel
@@ -254,13 +207,13 @@ jobs:
- name: Setup Mamba
if: matrix.cuda != ''
uses: conda-incubator/setup-miniconda@v2.3.0
uses: conda-incubator/setup-miniconda@v3.1.0
with:
activate-environment: "exllama"
python-version: ${{ matrix.pyver }}
miniforge-variant: Mambaforge
# miniforge-variant: Mambaforge
miniforge-version: latest
use-mamba: true
# use-mamba: true
add-pip-as-python-dependency: true
auto-activate-base: false
@@ -292,7 +245,7 @@ jobs:
python -m ensurepip --upgrade
python -m pip install torch==${{ matrix.torch }} --index-url https://download.pytorch.org/whl/cu$cudaVersionPytorch
python -m pip install --upgrade setuptools==69.5.1 build wheel safetensors sentencepiece ninja
python -m pip install --upgrade setuptools==69.5.1 build wheel safetensors sentencepiece ninja numpy
# --- Build wheel
@@ -324,13 +277,13 @@ jobs:
# Upload files
- uses: actions/upload-artifact@v3
- uses: actions/upload-artifact@v4
if: matrix.artname == 'wheel'
with:
name: 'wheel'
name: wheel-${{ matrix.os }}-py${{ matrix.pyver }}-cuda${{ matrix.cuda }}-torch${{ matrix.torch }}
path: ./dist/*
- uses: actions/upload-artifact@v3
- uses: actions/upload-artifact@v4
if: matrix.artname == 'sdist'
with:
name: 'sdist'

View File

@@ -72,6 +72,7 @@ parser.add_argument("-cq8", "--cache_q8", action = "store_true", help = "Use Q8
parser.add_argument("-ngram", "--ngram_decoding", action = "store_true", help = "Use n-gram speculative decoding")
parser.add_argument("-mli", "--mli", action = "store_true", help = "Enable multi line input")
parser.add_argument("-pt", "--print_timings", action = "store_true", help = "Output timings/stats after each prompt")
parser.add_argument("-amnesia", "--amnesia", action = "store_true", help = "Forget context after every response")
@@ -301,7 +302,22 @@ while True:
# Get user prompt
print()
up = input(col_user + username + ": " + col_default).strip()
print(col_user + username + ": " + col_default, end='', flush=True)
# multi-lin support
if args.mli:
content = sys.stdin.read().rstrip()
else:
content = input().strip()
# clear context
if content == "clear":
user_prompts = []
responses_ids = []
print(col_user + "Context cleared." + col_default, end='', flush=True)
continue
up = username + ": " + content
print()
# Add to context
@@ -337,6 +353,12 @@ while True:
tokens = res["chunk_token_ids"]
if len(response_text) == 0: chunk = chunk.lstrip()
# trim thinking from context for qwq model
if args.mode == "qwq" and chunk == "</think>":
chunk = "end of thinking"
responses_ids[-1] = torch.empty((1, 0), dtype = torch.long)
response_text += chunk
responses_ids[-1] = torch.cat([responses_ids[-1], tokens], dim = -1)

View File

@@ -210,6 +210,51 @@ class PromptFormat_codellama(PromptFormat_llama):
"""You are a helpful coding assistant. Always answer as helpfully as possible."""
class PromptFormat_qwq(PromptFormat):
description = "Qwen QwQ format"
def __init__(self):
super().__init__()
pass
def default_system_prompt(self):
return \
f"""You are a useful coding assistant, who thinks before answering."""
def first_prompt(self, sysprompt):
r = ""
if sysprompt:
r += \
"""<|im_start|>system\n""" + \
"""<|system_prompt|>""" + \
"""<|im_end|>\n"""
r += \
"""<|im_start|>user\n""" + \
"""<|user_prompt|><|im_end|>\n""" + \
"""<|im_start|>assistant\n<think>\n"""
return r
def subs_prompt(self):
return \
"""<|im_end|>\n""" + \
"""<|im_start|>user\n""" + \
"""<|user_prompt|><|im_end|>\n""" + \
"""<|im_start|>assistant\n<think>\n"""
def stop_conditions(self, tokenizer):
return \
[tokenizer.eos_token_id,
tokenizer.single_id("<|im_end|>"),
"""<|im_end|>"""]
def encoding_options(self):
return False, False, True
def print_extra_newline(self):
return True
class PromptFormat_chatml(PromptFormat):
description = "ChatML format, as used by e.g. (Mistral)Orca"
@@ -635,6 +680,7 @@ prompt_formats = \
"llama": PromptFormat_llama,
"llama3": PromptFormat_llama3,
"codellama": PromptFormat_codellama,
"qwq": PromptFormat_qwq,
"chatml": PromptFormat_chatml,
"tinyllama": PromptFormat_tinyllama,
"zephyr": PromptFormat_zephyr,