mirror of
https://github.com/kvcache-ai/ktransformers.git
synced 2026-03-14 18:37:23 +00:00
update README for kt-kernel for installation issues (#1590)
This commit is contained in:
@@ -11,7 +11,7 @@ High-performance kernel operations for KTransformers, featuring CPU-optimized Mo
|
||||
- **Async Execution**: Non-blocking `submit_forward` / `sync_forward` API for improved pipelining
|
||||
- **Easy Integration**: Clean Python API with automatic backend selection
|
||||
|
||||
**Note**: *LLAMAFILE backend support is currently in preview and not yet fully complete.
|
||||
**Note**: LLAMAFILE backend support is currently in *preview* and not yet fully complete.
|
||||
|
||||
## Installation
|
||||
|
||||
@@ -43,6 +43,39 @@ pip install -r requirements.txt
|
||||
|
||||
**Note**: This step is **optional**. If your environment already has torch and other required packages, you can skip this and directly run `pip install .`
|
||||
|
||||
### Error Troubleshooting
|
||||
|
||||
#### CUDA Not Found
|
||||
|
||||
```
|
||||
-- Looking for a CUDA compiler - NOTFOUND
|
||||
CMake Error at CMakeLists.txt:389 (message):
|
||||
KTRANSFORMERS_USE_CUDA=ON but CUDA compiler not found
|
||||
```
|
||||
|
||||
Make sure you have the CUDA toolkit installed and `nvcc` is in your system PATH.
|
||||
|
||||
Try `export CMAKE_ARGS="-D CMAKE_CUDA_COMPILER=$(which nvcc)"` and run `pip install .` again.
|
||||
|
||||
#### hwloc Not Found
|
||||
|
||||
Run `sudo apt install libhwloc-dev` if on a Debian-based system or build from source: https://www.open-mpi.org/projects/hwloc/.
|
||||
|
||||
```
|
||||
wget https://download.open-mpi.org/release/hwloc/v2.12/hwloc-2.12.2.tar.gz
|
||||
tar -xzf hwloc-2.12.2.tar.gz
|
||||
cd hwloc-2.12.2
|
||||
./configure
|
||||
make
|
||||
sudo make install
|
||||
```
|
||||
|
||||
## Verification
|
||||
|
||||
```bash
|
||||
python -c "from kt_kernel import KTMoEWrapper; print('✓ kt-kernel installed successfully')"
|
||||
```
|
||||
|
||||
## Usage
|
||||
|
||||
```python
|
||||
@@ -140,12 +173,6 @@ export CPUINFER_VERBOSE=1
|
||||
pip install .
|
||||
```
|
||||
|
||||
## Verification
|
||||
|
||||
```bash
|
||||
python -c "from kt_kernel import KTMoEWrapper; print('✓ kt-kernel installed successfully')"
|
||||
```
|
||||
|
||||
## Weight Quantization
|
||||
|
||||
KT-Kernel provides weight quantization tools for CPU-GPU hybrid inference (e.g., integrating with SGLang). Both tools work together to enable heterogeneous expert placement across CPUs and GPUs.
|
||||
|
||||
@@ -8,7 +8,7 @@ name = "kt-kernel"
|
||||
version = "0.1.0"
|
||||
description = "KT-Kernel: High-performance kernel operations for KTransformers (AMX/AVX/KML optimizations)"
|
||||
readme = "README.md"
|
||||
authors = [ { name = "kvcache-ai" } ]
|
||||
authors = [{ name = "kvcache-ai" }]
|
||||
# Use SPDX string form (table form deprecated in newer setuptools)
|
||||
license = "Apache-2.0"
|
||||
classifiers = [
|
||||
@@ -25,6 +25,7 @@ dependencies = [
|
||||
"compressed-tensors>=0.7.0",
|
||||
"numpy>=1.24.0",
|
||||
"triton>=2.0.0",
|
||||
"gguf>=0.17.0",
|
||||
# Development dependencies
|
||||
"black>=25.9.0",
|
||||
]
|
||||
|
||||
@@ -8,6 +8,6 @@ safetensors>=0.4.0
|
||||
compressed-tensors>=0.7.0
|
||||
numpy>=1.24.0
|
||||
triton>=2.0.0
|
||||
|
||||
gguf>=0.17.0
|
||||
# Development dependencies
|
||||
black>=25.9.0
|
||||
|
||||
Reference in New Issue
Block a user