mirror of
https://github.com/kvcache-ai/ktransformers.git
synced 2026-06-05 12:43:58 +00:00
chore: bump version to 0.5.3 (#1909)
This commit is contained in:
@@ -30,8 +30,8 @@ ARG GITHUB_ARTIFACTORY=github.com
|
||||
ARG FLASHINFER_VERSION=0.5.3
|
||||
|
||||
# ktransformers wheel version (cu128torch28 for CUDA 12.8 + PyTorch 2.8)
|
||||
ARG KTRANSFORMERS_VERSION=0.4.2
|
||||
ARG KTRANSFORMERS_WHEEL=ktransformers-0.4.2+cu128torch28fancy-cp312-cp312-linux_x86_64.whl
|
||||
ARG KTRANSFORMERS_VERSION=0.5.3
|
||||
ARG KTRANSFORMERS_WHEEL=ktransformers-0.5.3+cu128torch28fancy-cp312-cp312-linux_x86_64.whl
|
||||
|
||||
# flash_attn wheel for fine-tune env
|
||||
ARG FLASH_ATTN_WHEEL=flash_attn-2.8.3+cu12torch2.8cxx11abiTRUE-cp312-cp312-linux_x86_64.whl
|
||||
|
||||
@@ -25,16 +25,16 @@ sglang-v{sglang版本}_ktransformers-v{ktransformers版本}_{cpu信息}_{gpu信
|
||||
|
||||
**Tar file:**
|
||||
```
|
||||
sglang-v0.5.6_ktransformers-v0.4.3_x86-intel-multi_cu128_sft_llamafactory-v0.9.3_20241212143022.tar
|
||||
sglang-v0.5.6_ktransformers-v0.5.3_x86-intel-multi_cu128_sft_llamafactory-v0.9.3_20241212143022.tar
|
||||
```
|
||||
|
||||
**DockerHub tags:**
|
||||
```
|
||||
Full tag:
|
||||
kvcache/ktransformers:sglang-v0.5.6_ktransformers-v0.4.3_x86-intel-multi_cu128_sft_llamafactory-v0.9.3_20241212143022
|
||||
kvcache/ktransformers:sglang-v0.5.6_ktransformers-v0.5.3_x86-intel-multi_cu128_sft_llamafactory-v0.9.3_20241212143022
|
||||
|
||||
Simplified tag:
|
||||
kvcache/ktransformers:v0.4.3-cu128
|
||||
kvcache/ktransformers:v0.5.3-cu128
|
||||
```
|
||||
|
||||
### Name Components
|
||||
@@ -42,7 +42,7 @@ kvcache/ktransformers:v0.4.3-cu128
|
||||
| Component | Description | Example |
|
||||
|-----------|-------------|---------|
|
||||
| sglang version | SGLang package version | `v0.5.6` |
|
||||
| ktransformers version | KTransformers version | `v0.4.3` |
|
||||
| ktransformers version | KTransformers version | `v0.5.3` |
|
||||
| cpu info | CPU instruction set support | `x86-intel-multi` (includes AMX/AVX512/AVX2) |
|
||||
| gpu info | CUDA version | `cu128` (CUDA 12.8) |
|
||||
| functionality | Feature mode | `sft_llamafactory-v0.9.3` or `infer` |
|
||||
@@ -197,8 +197,8 @@ docker login
|
||||
```
|
||||
|
||||
This creates two tags:
|
||||
- Full: `kvcache/ktransformers:sglang-v0.5.6_ktransformers-v0.4.3_x86-intel-multi_cu128_sft_llamafactory-v0.9.3_20241212143022`
|
||||
- Simplified: `kvcache/ktransformers:v0.4.3-cu128`
|
||||
- Full: `kvcache/ktransformers:sglang-v0.5.6_ktransformers-v0.5.3_x86-intel-multi_cu128_sft_llamafactory-v0.9.3_20241212143022`
|
||||
- Simplified: `kvcache/ktransformers:v0.5.3-cu128`
|
||||
|
||||
### Example 4: Dry Run
|
||||
|
||||
@@ -225,12 +225,12 @@ Pass additional Docker build arguments:
|
||||
|
||||
```bash
|
||||
# Load the image
|
||||
docker load -i sglang-v0.5.6_ktransformers-v0.4.3_x86-intel-multi_cu128_sft_llamafactory-v0.9.3_20241212143022.tar
|
||||
docker load -i sglang-v0.5.6_ktransformers-v0.5.3_x86-intel-multi_cu128_sft_llamafactory-v0.9.3_20241212143022.tar
|
||||
|
||||
# Run the container
|
||||
docker run -it --rm \
|
||||
--gpus all \
|
||||
sglang-v0.5.6_ktransformers-v0.4.3_x86-intel-multi_cu128_sft_llamafactory-v0.9.3_20241212143022 \
|
||||
sglang-v0.5.6_ktransformers-v0.5.3_x86-intel-multi_cu128_sft_llamafactory-v0.9.3_20241212143022 \
|
||||
/bin/bash
|
||||
```
|
||||
|
||||
@@ -238,15 +238,15 @@ docker run -it --rm \
|
||||
|
||||
```bash
|
||||
# Pull with full tag
|
||||
docker pull kvcache/ktransformers:sglang-v0.5.6_ktransformers-v0.4.3_x86-intel-multi_cu128_sft_llamafactory-v0.9.3_20241212143022
|
||||
docker pull kvcache/ktransformers:sglang-v0.5.6_ktransformers-v0.5.3_x86-intel-multi_cu128_sft_llamafactory-v0.9.3_20241212143022
|
||||
|
||||
# Or pull with simplified tag
|
||||
docker pull kvcache/ktransformers:v0.4.3-cu128
|
||||
docker pull kvcache/ktransformers:v0.5.3-cu128
|
||||
|
||||
# Run the container
|
||||
docker run -it --rm \
|
||||
--gpus all \
|
||||
kvcache/ktransformers:v0.4.3-cu128 \
|
||||
kvcache/ktransformers:v0.5.3-cu128 \
|
||||
/bin/bash
|
||||
```
|
||||
|
||||
@@ -301,7 +301,7 @@ cat /workspace/versions.env
|
||||
|
||||
# Output:
|
||||
SGLANG_VERSION=0.5.6
|
||||
KTRANSFORMERS_VERSION=0.4.3
|
||||
KTRANSFORMERS_VERSION=0.5.3
|
||||
LLAMAFACTORY_VERSION=0.9.3
|
||||
```
|
||||
|
||||
|
||||
@@ -210,9 +210,9 @@ generate_image_name() {
|
||||
|
||||
# Generate simplified tag for DockerHub
|
||||
# Input:
|
||||
# $1: ktransformers_version (e.g., 0.4.3)
|
||||
# $1: ktransformers_version (e.g., 0.5.3)
|
||||
# $2: cuda_version (e.g., 12.8.1)
|
||||
# Output: Simplified tag (e.g., v0.4.3-cu128)
|
||||
# Output: Simplified tag (e.g., v0.5.3-cu128)
|
||||
generate_simplified_tag() {
|
||||
local ktrans_ver="$1"
|
||||
local cuda_version="$2"
|
||||
|
||||
@@ -137,13 +137,13 @@ OUTPUT:
|
||||
{registry}/{repository}:sglang-v{ver}_ktransformers-v{ver}_{cpu}_{gpu}_{func}_{timestamp}
|
||||
|
||||
Example:
|
||||
docker.io/kvcache/ktransformers:sglang-v0.5.6_ktransformers-v0.4.3_x86-intel-multi_cu128_sft_llamafactory-v0.9.3_20241212143022
|
||||
docker.io/kvcache/ktransformers:sglang-v0.5.6_ktransformers-v0.5.3_x86-intel-multi_cu128_sft_llamafactory-v0.9.3_20241212143022
|
||||
|
||||
Simplified tag (if --also-push-simplified):
|
||||
{registry}/{repository}:v{ktransformers-ver}-{cuda}
|
||||
|
||||
Example:
|
||||
docker.io/kvcache/ktransformers:v0.4.3-cu128
|
||||
docker.io/kvcache/ktransformers:v0.5.3-cu128
|
||||
|
||||
EOF
|
||||
exit 0
|
||||
@@ -372,7 +372,7 @@ generate_tags() {
|
||||
log_warning "DRY RUN: Using placeholder versions"
|
||||
# Use placeholder versions for dry run
|
||||
local versions="SGLANG_VERSION=0.5.6
|
||||
KTRANSFORMERS_VERSION=0.4.3
|
||||
KTRANSFORMERS_VERSION=0.5.3
|
||||
LLAMAFACTORY_VERSION=0.9.3"
|
||||
else
|
||||
# Extract versions from image
|
||||
@@ -709,13 +709,13 @@ OUTPUT:
|
||||
{registry}/{repository}:sglang-v{ver}_ktransformers-v{ver}_{cpu}_{gpu}_{func}_{timestamp}
|
||||
|
||||
Example:
|
||||
docker.io/kvcache/ktransformers:sglang-v0.5.6_ktransformers-v0.4.3_x86-intel-multi_cu128_sft_llamafactory-v0.9.3_20241212143022
|
||||
docker.io/kvcache/ktransformers:sglang-v0.5.6_ktransformers-v0.5.3_x86-intel-multi_cu128_sft_llamafactory-v0.9.3_20241212143022
|
||||
|
||||
Simplified tag (if --also-push-simplified):
|
||||
{registry}/{repository}:v{ktransformers-ver}-{cuda}
|
||||
|
||||
Example:
|
||||
docker.io/kvcache/ktransformers:v0.4.3-cu128
|
||||
docker.io/kvcache/ktransformers:v0.5.3-cu128
|
||||
|
||||
EOF
|
||||
exit 0
|
||||
@@ -944,7 +944,7 @@ generate_tags() {
|
||||
log_warning "DRY RUN: Using placeholder versions"
|
||||
# Use placeholder versions for dry run
|
||||
local versions="SGLANG_VERSION=0.5.6
|
||||
KTRANSFORMERS_VERSION=0.4.3
|
||||
KTRANSFORMERS_VERSION=0.5.3
|
||||
LLAMAFACTORY_VERSION=0.9.3"
|
||||
else
|
||||
# Extract versions from image
|
||||
|
||||
Reference in New Issue
Block a user