mirror of
https://github.com/kvcache-ai/ktransformers.git
synced 2026-04-20 06:18:59 +00:00
* refactor: move legacy code to archive/ directory - Moved ktransformers, csrc, third_party, merge_tensors to archive/ - Moved build scripts and configurations to archive/ - Kept kt-kernel, KT-SFT, doc, and README files in root - Preserved complete git history for all moved files * refactor: restructure repository to focus on kt-kernel and KT-SFT modules * fix README * fix README * fix README * fix README * docs: add performance benchmarks to kt-kernel section Add comprehensive performance data for kt-kernel to match KT-SFT's presentation: - AMX kernel optimization: 21.3 TFLOPS (3.9× faster than PyTorch) - Prefill phase: up to 20× speedup vs baseline - Decode phase: up to 4× speedup - NUMA optimization: up to 63% throughput improvement - Multi-GPU (8×L20): 227.85 tokens/s total throughput with DeepSeek-R1 FP8 Source: https://lmsys.org/blog/2025-10-22-KTransformers/ This provides users with concrete performance metrics for both core modules, making it easier to understand the capabilities of each component. * refactor: improve kt-kernel performance data with specific hardware and models Replace generic performance descriptions with concrete benchmarks: - Specify exact hardware: 8×L20 GPU + Xeon Gold 6454S, Single/Dual-socket Xeon + AMX - Include specific models: DeepSeek-R1-0528 (FP8), DeepSeek-V3 (671B) - Show detailed metrics: total throughput, output throughput, concurrency details - Match KT-SFT presentation style for consistency This provides users with actionable performance data they can use to evaluate hardware requirements and expected performance for their use cases. * fix README * docs: clean up performance table and improve formatting * add pic for README * refactor: simplify .gitmodules and backup legacy submodules - Remove 7 legacy submodules from root .gitmodules (archive/third_party/*) - Keep only 2 active submodules for kt-kernel (llama.cpp, pybind11) - Backup complete .gitmodules to archive/.gitmodules - Add documentation in archive/README.md for researchers who need legacy submodules This reduces initial clone size by ~500MB and avoids downloading unused dependencies. * refactor: move doc/ back to root directory Keep documentation in root for easier access and maintenance. * refactor: consolidate all images to doc/assets/ - Move kt-kernel/assets/heterogeneous_computing.png to doc/assets/ - Remove KT-SFT/assets/ (images already in doc/assets/) - Update KT-SFT/README.md image references to ../doc/assets/ - Eliminates ~7.9MB image duplication - Centralizes all documentation assets in one location * fix pic path for README
77 lines
1.6 KiB
TOML
77 lines
1.6 KiB
TOML
[build-system]
|
|
requires = [
|
|
"setuptools",
|
|
"torch >= 2.3.0",
|
|
"ninja",
|
|
"packaging",
|
|
"cpufeature"
|
|
]
|
|
build-backend = "setuptools.build_meta"
|
|
|
|
[project]
|
|
|
|
name = "ktransformers"
|
|
|
|
dynamic = ["version"]
|
|
|
|
dependencies = [
|
|
"torch >= 2.3.0",
|
|
"transformers",
|
|
"fastapi >= 0.111.0",
|
|
"uvicorn >= 0.30.1",
|
|
"langchain >= 0.2.0",
|
|
"blessed >= 1.20.0",
|
|
"accelerate >= 0.31.0",
|
|
"sentencepiece >= 0.1.97",
|
|
"setuptools",
|
|
"ninja",
|
|
"wheel",
|
|
"colorlog",
|
|
"build",
|
|
"fire",
|
|
"protobuf",
|
|
]
|
|
|
|
requires-python = ">=3.10"
|
|
|
|
authors = [
|
|
{name = "KVCache.AI", email = "zhang.mingxing@outlook.com"}
|
|
]
|
|
|
|
maintainers = [
|
|
{name = "james0zan", email = "zhang.mingxing@outlook.com"},
|
|
{name = "awake", email = "awake@approaching.ai"},
|
|
{name = "unicorn chan", email = "nl@approaching.ai"}
|
|
]
|
|
|
|
description = "KTransformers, pronounced as Quick Transformers, is designed to enhance your Transformers experience with advanced kernel optimizations and placement/parallelism strategies."
|
|
|
|
readme = "README.md"
|
|
license = {file = "LICENSE"}
|
|
|
|
keywords = ["ktransformers", "llm"]
|
|
|
|
classifiers = [
|
|
"Development Status :: 4 - Beta",
|
|
"Programming Language :: Python :: 3.10",
|
|
"Programming Language :: Python :: 3.11",
|
|
"Programming Language :: Python :: 3.12"
|
|
]
|
|
|
|
[project.urls]
|
|
Homepage = "https://kvcache.ai"
|
|
Repository = "https://github.com/kvcache-ai/ktransformers.git"
|
|
Issues = "https://github.com/kvcache-ai/ktransformers/issues"
|
|
|
|
|
|
[project.scripts]
|
|
ktransformers = "ktransformers.server.main:main"
|
|
|
|
[tool.setuptools.packages.find]
|
|
where = ["./", ]
|
|
include = ["ktransformers","ktransformers.*"]
|
|
[tool.black]
|
|
line-length = 120
|
|
preview = true
|
|
unstable = true
|