update README for kt-kernel for installation issues (#1590)

2026-03-14 18:37:23 +00:00 · 2025-11-11 14:53:45 +08:00
parent 94c25626dc
commit 743f6f719b
3 changed files with 37 additions and 9 deletions
--- a/kt-kernel/README.md
+++ b/kt-kernel/README.md
@@ -11,7 +11,7 @@ High-performance kernel operations for KTransformers, featuring CPU-optimized Mo
 - **Async Execution**: Non-blocking `submit_forward` / `sync_forward` API for improved pipelining
 - **Easy Integration**: Clean Python API with automatic backend selection

-**Note**: *LLAMAFILE backend support is currently in preview and not yet fully complete.
+**Note**: LLAMAFILE backend support is currently in *preview* and not yet fully complete.

 ## Installation

@@ -43,6 +43,39 @@ pip install -r requirements.txt

 **Note**: This step is **optional**. If your environment already has torch and other required packages, you can skip this and directly run `pip install .`

+### Error Troubleshooting
+
+#### CUDA Not Found
+
+```
+ -- Looking for a CUDA compiler - NOTFOUND
+  CMake Error at CMakeLists.txt:389 (message):
+    KTRANSFORMERS_USE_CUDA=ON but CUDA compiler not found
+```
+
+Make sure you have the CUDA toolkit installed and `nvcc` is in your system PATH.
+
+Try `export CMAKE_ARGS="-D CMAKE_CUDA_COMPILER=$(which nvcc)"` and run `pip install .` again.
+
+#### hwloc Not Found
+
+Run `sudo apt install libhwloc-dev` if on a Debian-based system or build from source: https://www.open-mpi.org/projects/hwloc/.
+
+```
+wget https://download.open-mpi.org/release/hwloc/v2.12/hwloc-2.12.2.tar.gz
+tar -xzf hwloc-2.12.2.tar.gz
+cd hwloc-2.12.2
+./configure
+make
+sudo make install
+```
+
+## Verification
+
+```bash
+python -c "from kt_kernel import KTMoEWrapper; print('✓ kt-kernel installed successfully')"
+```
+
 ## Usage

 ```python
@@ -140,12 +173,6 @@ export CPUINFER_VERBOSE=1
 pip install .
 ```

-## Verification
-
-```bash
-python -c "from kt_kernel import KTMoEWrapper; print('✓ kt-kernel installed successfully')"
-```
-
 ## Weight Quantization

 KT-Kernel provides weight quantization tools for CPU-GPU hybrid inference (e.g., integrating with SGLang). Both tools work together to enable heterogeneous expert placement across CPUs and GPUs.
--- a/kt-kernel/pyproject.toml
+++ b/kt-kernel/pyproject.toml
@@ -8,7 +8,7 @@ name = "kt-kernel"
 version = "0.1.0"
 description = "KT-Kernel: High-performance kernel operations for KTransformers (AMX/AVX/KML optimizations)"
 readme = "README.md"
-authors = [ { name = "kvcache-ai" } ]
+authors = [{ name = "kvcache-ai" }]
 # Use SPDX string form (table form deprecated in newer setuptools)
 license = "Apache-2.0"
 classifiers = [
@@ -25,6 +25,7 @@ dependencies = [
  "compressed-tensors>=0.7.0",
  "numpy>=1.24.0",
  "triton>=2.0.0",
+  "gguf>=0.17.0",
  # Development dependencies
  "black>=25.9.0",
 ]
--- a/kt-kernel/requirements.txt
+++ b/kt-kernel/requirements.txt
@@ -8,6 +8,6 @@ safetensors>=0.4.0
 compressed-tensors>=0.7.0
 numpy>=1.24.0
 triton>=2.0.0
-
+gguf>=0.17.0
 # Development dependencies
 black>=25.9.0