Files
ktransformers/kt-kernel/python/cli/commands/doctor.py
Jianwei Dong 15c624dcae Fix/sglang kt detection (#1875)
* [feat]: simplify sglang installation with submodule, auto-sync CI, and version alignment

- Add kvcache-ai/sglang as git submodule at third_party/sglang (branch = main)
- Add top-level install.sh for one-click source installation (sglang + kt-kernel)
- Add sglang-kt as hard dependency in kt-kernel/pyproject.toml
- Add CI workflow to auto-sync sglang submodule daily and create PR
- Add CI workflow to build and publish sglang-kt to PyPI
- Integrate sglang-kt build into release-pypi.yml (version.py bump publishes both packages)
- Align sglang-kt version with ktransformers via SGLANG_KT_VERSION env var injection
- Update Dockerfile to use submodule and inject aligned version
- Update all 13 doc files, CLI hints, and i18n strings to reference new install methods

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

* [build]: bump version to 0.5.2

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

* [build]: rename PyPI package from kt-kernel to ktransformers

Users can now `pip install ktransformers` to get everything
(sglang-kt is auto-installed as a dependency).

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

* Revert "[build]: rename PyPI package from kt-kernel to ktransformers"

This reverts commit e0cbbf6364.

* [build]: add ktransformers meta-package for PyPI

`pip install ktransformers` now works as a single install command.
It pulls kt-kernel (which in turn pulls sglang-kt).

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

* [fix]: show sglang-kt package version in kt version command

- Prioritize sglang-kt package version (aligned with ktransformers)
  over sglang internal __version__
- Update display name from "sglang" to "sglang-kt"

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

* [fix]: improve sglang-kt detection in kt doctor and kt version

Recognize sglang-kt package name as proof of kvcache-ai fork installation.
Previously both commands fell through to "PyPI (not recommended)" for
non-editable local source installs. Now version.py reuses the centralized
check_sglang_installation() logic.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

* [build]: bump version to 0.5.2.post1

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

---------

Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-04 16:54:48 +08:00

539 lines
18 KiB
Python

"""
Doctor command for kt-cli.
Diagnoses environment issues and provides recommendations.
"""
import glob
import os
import platform
import shutil
from pathlib import Path
from typing import Optional
import typer
from rich.table import Table
from kt_kernel.cli.config.settings import get_settings
from kt_kernel.cli.i18n import t
from kt_kernel.cli.utils.console import console, print_error, print_info, print_success, print_warning
from kt_kernel.cli.utils.environment import (
check_docker,
detect_available_ram_gb,
detect_cpu_info,
detect_cuda_version,
detect_disk_space_gb,
detect_env_managers,
detect_gpus,
detect_memory_info,
detect_ram_gb,
get_installed_package_version,
)
def _get_kt_kernel_info() -> dict:
"""Get kt-kernel installation information."""
info = {
"installed": False,
"version": None,
"cpu_variant": None,
"install_path": None,
"available_variants": [],
"extension_file": None,
}
try:
import kt_kernel
info["installed"] = True
info["version"] = getattr(kt_kernel, "__version__", "unknown")
info["cpu_variant"] = getattr(kt_kernel, "__cpu_variant__", "unknown")
# Get installation path
info["install_path"] = os.path.dirname(kt_kernel.__file__)
# Find available .so files
kt_kernel_dir = info["install_path"]
so_files = glob.glob(os.path.join(kt_kernel_dir, "_kt_kernel_ext_*.so"))
so_files.extend(glob.glob(os.path.join(kt_kernel_dir, "kt_kernel_ext*.so")))
# Parse variant names from filenames
variants = set()
for so_file in so_files:
basename = os.path.basename(so_file)
if "_kt_kernel_ext_" in basename:
# Extract variant from _kt_kernel_ext_amx.cpython-311-x86_64-linux-gnu.so
parts = basename.split("_")
if len(parts) >= 4:
variant = parts[3] # "amx" from "_kt_kernel_ext_amx..."
if variant.startswith("avx"):
# Normalize avx variants
if variant in ["avx512", "avx512_bf16", "avx512_vbmi", "avx512_vnni", "avx512_base"]:
variants.add("avx512")
else:
variants.add(variant)
else:
variants.add(variant)
elif "kt_kernel_ext" in basename:
variants.add("default")
info["available_variants"] = sorted(list(variants))
# Get current extension file
if hasattr(kt_kernel, "kt_kernel_ext"):
ext_module = kt_kernel.kt_kernel_ext
info["extension_file"] = getattr(ext_module, "__file__", None)
except ImportError:
info["installed"] = False
except Exception as e:
info["error"] = str(e)
return info
def doctor(
verbose: bool = typer.Option(False, "--verbose", "-v", help="Show detailed diagnostics"),
) -> None:
"""Diagnose environment issues."""
console.print(f"\n[bold]{t('doctor_title')}[/bold]\n")
issues_found = False
checks = []
# 1. Python version
python_version = platform.python_version()
python_ok = _check_python_version(python_version)
checks.append(
{
"name": t("doctor_check_python"),
"status": "ok" if python_ok else "error",
"value": python_version,
"hint": "Python 3.10+ required" if not python_ok else None,
}
)
if not python_ok:
issues_found = True
# 2. CUDA availability
cuda_version = detect_cuda_version()
checks.append(
{
"name": t("doctor_check_cuda"),
"status": "ok" if cuda_version else "warning",
"value": cuda_version or t("version_cuda_not_found"),
"hint": "CUDA is optional but recommended for GPU acceleration" if not cuda_version else None,
}
)
# 3. GPU detection
gpus = detect_gpus()
if gpus:
gpu_names = ", ".join(g.name for g in gpus)
total_vram = sum(g.vram_gb for g in gpus)
checks.append(
{
"name": t("doctor_check_gpu"),
"status": "ok",
"value": t("doctor_gpu_found", count=len(gpus), names=gpu_names),
"hint": f"Total VRAM: {total_vram}GB",
}
)
else:
checks.append(
{
"name": t("doctor_check_gpu"),
"status": "warning",
"value": t("doctor_gpu_not_found"),
"hint": "GPU recommended for best performance",
}
)
# 4. CPU information
cpu_info = detect_cpu_info()
checks.append(
{
"name": t("doctor_check_cpu"),
"status": "ok",
"value": t("doctor_cpu_info", name=cpu_info.name, cores=cpu_info.cores, threads=cpu_info.threads),
"hint": None,
}
)
# 5. CPU instruction sets (critical for kt-kernel)
isa_list = cpu_info.instruction_sets
# Check for recommended instruction sets
recommended_isa = {"AVX2", "AVX512F", "AMX-INT8"}
has_recommended = bool(set(isa_list) & recommended_isa)
has_avx2 = "AVX2" in isa_list
has_avx512 = any(isa.startswith("AVX512") for isa in isa_list)
has_amx = any(isa.startswith("AMX") for isa in isa_list)
# Determine status and build display string
if has_amx:
isa_status = "ok"
isa_hint = "AMX available - best performance for INT4/INT8"
elif has_avx512:
isa_status = "ok"
isa_hint = "AVX512 available - good performance"
elif has_avx2:
isa_status = "warning"
isa_hint = "AVX2 only - consider upgrading CPU for better performance"
else:
isa_status = "error"
isa_hint = "AVX2 required for kt-kernel"
# Show top instruction sets (prioritize important ones)
display_isa = isa_list[:8] if len(isa_list) > 8 else isa_list
isa_display = ", ".join(display_isa)
if len(isa_list) > 8:
isa_display += f" (+{len(isa_list) - 8} more)"
checks.append(
{
"name": t("doctor_check_cpu_isa"),
"status": isa_status,
"value": isa_display if isa_display else "None detected",
"hint": isa_hint,
}
)
# 6. NUMA topology
numa_detail = []
for node, cpus in sorted(cpu_info.numa_info.items()):
if len(cpus) > 6:
cpu_str = f"{cpus[0]}-{cpus[-1]}"
else:
cpu_str = ",".join(str(c) for c in cpus)
numa_detail.append(f"{node}: {cpu_str}")
numa_value = t("doctor_numa_info", nodes=cpu_info.numa_nodes)
if verbose and numa_detail:
numa_value += " (" + "; ".join(numa_detail) + ")"
checks.append(
{
"name": t("doctor_check_numa"),
"status": "ok",
"value": numa_value,
"hint": f"{cpu_info.threads // cpu_info.numa_nodes} threads per node" if cpu_info.numa_nodes > 1 else None,
}
)
# 6b. kt-kernel installation check
kt_info = _get_kt_kernel_info()
if kt_info["installed"]:
# Build display string for kt-kernel
variant = kt_info["cpu_variant"]
version = kt_info["version"]
available_variants = kt_info["available_variants"]
# Determine status based on CPU variant
if variant == "amx":
kt_status = "ok"
kt_hint = "AMX variant loaded - optimal performance"
elif variant.startswith("avx512"):
kt_status = "ok"
kt_hint = "AVX512 variant loaded - good performance"
elif variant == "avx2":
kt_status = "warning"
kt_hint = "AVX2 variant - consider upgrading CPU for AMX/AVX512"
else:
kt_status = "warning"
kt_hint = f"Unknown variant: {variant}"
kt_value = f"v{version} ({variant.upper()})"
if verbose and available_variants:
kt_value += f" [dim] - available: {', '.join(available_variants)}[/dim]"
checks.append(
{
"name": "kt-kernel",
"status": kt_status,
"value": kt_value,
"hint": kt_hint,
}
)
# Show extension file path in verbose mode
if verbose and kt_info.get("extension_file"):
ext_file = os.path.basename(kt_info["extension_file"])
checks.append(
{
"name": " └─ Extension",
"status": "ok",
"value": ext_file,
"hint": None,
}
)
# Show installation path in verbose mode
if verbose and kt_info.get("install_path"):
checks.append(
{
"name": " └─ Path",
"status": "ok",
"value": kt_info["install_path"],
"hint": None,
}
)
else:
error_msg = kt_info.get("error", "Not installed")
checks.append(
{
"name": "kt-kernel",
"status": "error",
"value": error_msg,
"hint": "kt-kernel is required - run: pip install kt-kernel",
}
)
issues_found = True
# 7. System memory (with frequency if available)
mem_info = detect_memory_info()
if mem_info.frequency_mhz and mem_info.type:
mem_value = t(
"doctor_memory_freq",
available=f"{mem_info.available_gb}GB",
total=f"{mem_info.total_gb}GB",
freq=mem_info.frequency_mhz,
type=mem_info.type,
)
else:
mem_value = t("doctor_memory_info", available=f"{mem_info.available_gb}GB", total=f"{mem_info.total_gb}GB")
ram_ok = mem_info.total_gb >= 32
checks.append(
{
"name": t("doctor_check_memory"),
"status": "ok" if ram_ok else "warning",
"value": mem_value,
"hint": "32GB+ RAM recommended for large models" if not ram_ok else None,
}
)
# 8. Disk space - check all model paths
settings = get_settings()
model_paths = settings.get_model_paths()
# Check all configured model paths
for i, disk_path in enumerate(model_paths):
available_disk, total_disk = detect_disk_space_gb(str(disk_path))
disk_ok = available_disk >= 100
# For multiple paths, add index to name
path_label = f"Model Path {i+1}" if len(model_paths) > 1 else t("doctor_check_disk")
checks.append(
{
"name": path_label,
"status": "ok" if disk_ok else "warning",
"value": t("doctor_disk_info", available=f"{available_disk}GB", path=str(disk_path)),
"hint": "100GB+ free space recommended for model storage" if not disk_ok else None,
}
)
# 6. Required packages
packages = [
("kt-kernel", ">=0.4.0", False), # name, version_req, required
("sglang", ">=0.4.0", False),
("torch", ">=2.4.0", True),
("transformers", ">=4.45.0", True),
]
package_issues = []
for pkg_name, version_req, required in packages:
version = get_installed_package_version(pkg_name)
if version:
package_issues.append((pkg_name, version, "ok"))
elif required:
package_issues.append((pkg_name, t("version_not_installed"), "error"))
issues_found = True
else:
package_issues.append((pkg_name, t("version_not_installed"), "warning"))
if verbose:
checks.append(
{
"name": t("doctor_check_packages"),
"status": "ok" if not any(p[2] == "error" for p in package_issues) else "error",
"value": f"{sum(1 for p in package_issues if p[2] == 'ok')}/{len(package_issues)} installed",
"packages": package_issues,
}
)
# 7. SGLang installation source check
from kt_kernel.cli.utils.sglang_checker import check_sglang_installation, check_sglang_kt_kernel_support
sglang_info = check_sglang_installation()
if sglang_info["installed"]:
if sglang_info.get("is_kvcache_fork"):
# Package name is sglang-kt — this is definitively the kvcache-ai fork
if sglang_info["from_source"] and sglang_info["git_info"]:
git_remote = sglang_info["git_info"].get("remote", "unknown")
git_branch = sglang_info["git_info"].get("branch", "unknown")
sglang_source_value = f"sglang-kt (Source: {git_remote}, branch: {git_branch})"
elif sglang_info["editable"]:
sglang_source_value = "sglang-kt (editable)"
else:
sglang_source_value = "sglang-kt"
sglang_source_status = "ok"
sglang_source_hint = None
elif sglang_info["from_source"]:
if sglang_info["git_info"]:
git_remote = sglang_info["git_info"].get("remote", "unknown")
git_branch = sglang_info["git_info"].get("branch", "unknown")
sglang_source_value = f"Source (GitHub: {git_remote}, branch: {git_branch})"
sglang_source_status = "ok"
sglang_source_hint = None
else:
sglang_source_value = "Source (editable)"
sglang_source_status = "ok"
sglang_source_hint = None
else:
sglang_source_value = "PyPI sglang (not kvcache-ai fork)"
sglang_source_status = "warning"
sglang_source_hint = t("sglang_pypi_hint")
else:
sglang_source_value = "Not installed"
sglang_source_status = "warning"
sglang_source_hint = t("sglang_install_hint")
checks.append(
{
"name": "SGLang Source",
"status": sglang_source_status,
"value": sglang_source_value,
"hint": sglang_source_hint,
}
)
# 7b. SGLang kt-kernel support check (only if SGLang is installed)
kt_kernel_support = {"supported": True} # Default to True if not checked
if sglang_info["installed"]:
# Use cache=False to force re-check in doctor, but silent=True since we show in table
kt_kernel_support = check_sglang_kt_kernel_support(use_cache=False, silent=True)
if kt_kernel_support["supported"]:
kt_kernel_value = t("sglang_kt_kernel_supported")
kt_kernel_status = "ok"
kt_kernel_hint = None
else:
kt_kernel_value = t("sglang_kt_kernel_not_supported")
kt_kernel_status = "error"
kt_kernel_hint = "Reinstall SGLang: pip uninstall sglang -y && pip install sglang-kt (or run ./install.sh from ktransformers root)"
issues_found = True
checks.append(
{
"name": "SGLang kt-kernel",
"status": kt_kernel_status,
"value": kt_kernel_value,
"hint": kt_kernel_hint,
}
)
# 8. Environment managers
env_managers = detect_env_managers()
docker = check_docker()
env_list = [f"{m.name} {m.version}" for m in env_managers]
if docker:
env_list.append(f"docker {docker.version}")
checks.append(
{
"name": "Environment Managers",
"status": "ok" if env_list else "warning",
"value": ", ".join(env_list) if env_list else "None found",
"hint": "conda or docker recommended for installation" if not env_list else None,
}
)
# Display results
_display_results(checks, verbose)
# Show SGLang installation instructions if not installed
if not sglang_info["installed"]:
from kt_kernel.cli.utils.sglang_checker import print_sglang_install_instructions
console.print()
print_sglang_install_instructions()
# Show kt-kernel installation instructions if SGLang is installed but doesn't support kt-kernel
elif sglang_info["installed"] and not kt_kernel_support.get("supported", True):
from kt_kernel.cli.utils.sglang_checker import print_sglang_kt_kernel_instructions
console.print()
print_sglang_kt_kernel_instructions()
# Summary
console.print()
if issues_found:
print_warning(t("doctor_has_issues"))
else:
print_success(t("doctor_all_ok"))
console.print()
def _check_python_version(version: str) -> bool:
"""Check if Python version meets requirements."""
parts = version.split(".")
try:
major, minor = int(parts[0]), int(parts[1])
return major >= 3 and minor >= 10
except (IndexError, ValueError):
return False
def _display_results(checks: list[dict], verbose: bool) -> None:
"""Display diagnostic results."""
table = Table(show_header=True, header_style="bold")
table.add_column("Check", style="bold")
table.add_column("Status", width=8)
table.add_column("Value")
if verbose:
table.add_column("Notes", style="dim")
for check in checks:
status = check["status"]
if status == "ok":
status_str = f"[green]{t('doctor_status_ok')}[/green]"
elif status == "warning":
status_str = f"[yellow]{t('doctor_status_warning')}[/yellow]"
else:
status_str = f"[red]{t('doctor_status_error')}[/red]"
if verbose:
table.add_row(
check["name"],
status_str,
check["value"],
check.get("hint", ""),
)
else:
table.add_row(
check["name"],
status_str,
check["value"],
)
# Show package details if verbose
if verbose and "packages" in check:
for pkg_name, pkg_version, pkg_status in check["packages"]:
if pkg_status == "ok":
pkg_status_str = "[green]✓[/green]"
elif pkg_status == "warning":
pkg_status_str = "[yellow]○[/yellow]"
else:
pkg_status_str = "[red]✗[/red]"
table.add_row(
f" └─ {pkg_name}",
pkg_status_str,
pkg_version,
"",
)
console.print(table)