Files
ktransformers/kt-kernel/python/cli/commands/doctor.py
ErvinXie d8046e1bb4 Kt minimax (#1742)
[feat]: fp8 kernel and kt-cli support
2025-12-24 15:39:44 +08:00

395 lines
13 KiB
Python

"""
Doctor command for kt-cli.
Diagnoses environment issues and provides recommendations.
"""
import platform
import shutil
from pathlib import Path
from typing import Optional
import typer
from rich.table import Table
from kt_kernel.cli.config.settings import get_settings
from kt_kernel.cli.i18n import t
from kt_kernel.cli.utils.console import console, print_error, print_info, print_success, print_warning
from kt_kernel.cli.utils.environment import (
check_docker,
detect_available_ram_gb,
detect_cpu_info,
detect_cuda_version,
detect_disk_space_gb,
detect_env_managers,
detect_gpus,
detect_memory_info,
detect_ram_gb,
get_installed_package_version,
)
def doctor(
verbose: bool = typer.Option(False, "--verbose", "-v", help="Show detailed diagnostics"),
) -> None:
"""Diagnose environment issues."""
console.print(f"\n[bold]{t('doctor_title')}[/bold]\n")
issues_found = False
checks = []
# 1. Python version
python_version = platform.python_version()
python_ok = _check_python_version(python_version)
checks.append(
{
"name": t("doctor_check_python"),
"status": "ok" if python_ok else "error",
"value": python_version,
"hint": "Python 3.10+ required" if not python_ok else None,
}
)
if not python_ok:
issues_found = True
# 2. CUDA availability
cuda_version = detect_cuda_version()
checks.append(
{
"name": t("doctor_check_cuda"),
"status": "ok" if cuda_version else "warning",
"value": cuda_version or t("version_cuda_not_found"),
"hint": "CUDA is optional but recommended for GPU acceleration" if not cuda_version else None,
}
)
# 3. GPU detection
gpus = detect_gpus()
if gpus:
gpu_names = ", ".join(g.name for g in gpus)
total_vram = sum(g.vram_gb for g in gpus)
checks.append(
{
"name": t("doctor_check_gpu"),
"status": "ok",
"value": t("doctor_gpu_found", count=len(gpus), names=gpu_names),
"hint": f"Total VRAM: {total_vram}GB",
}
)
else:
checks.append(
{
"name": t("doctor_check_gpu"),
"status": "warning",
"value": t("doctor_gpu_not_found"),
"hint": "GPU recommended for best performance",
}
)
# 4. CPU information
cpu_info = detect_cpu_info()
checks.append(
{
"name": t("doctor_check_cpu"),
"status": "ok",
"value": t("doctor_cpu_info", name=cpu_info.name, cores=cpu_info.cores, threads=cpu_info.threads),
"hint": None,
}
)
# 5. CPU instruction sets (critical for kt-kernel)
isa_list = cpu_info.instruction_sets
# Check for recommended instruction sets
recommended_isa = {"AVX2", "AVX512F", "AMX-INT8"}
has_recommended = bool(set(isa_list) & recommended_isa)
has_avx2 = "AVX2" in isa_list
has_avx512 = any(isa.startswith("AVX512") for isa in isa_list)
has_amx = any(isa.startswith("AMX") for isa in isa_list)
# Determine status and build display string
if has_amx:
isa_status = "ok"
isa_hint = "AMX available - best performance for INT4/INT8"
elif has_avx512:
isa_status = "ok"
isa_hint = "AVX512 available - good performance"
elif has_avx2:
isa_status = "warning"
isa_hint = "AVX2 only - consider upgrading CPU for better performance"
else:
isa_status = "error"
isa_hint = "AVX2 required for kt-kernel"
# Show top instruction sets (prioritize important ones)
display_isa = isa_list[:8] if len(isa_list) > 8 else isa_list
isa_display = ", ".join(display_isa)
if len(isa_list) > 8:
isa_display += f" (+{len(isa_list) - 8} more)"
checks.append(
{
"name": t("doctor_check_cpu_isa"),
"status": isa_status,
"value": isa_display if isa_display else "None detected",
"hint": isa_hint,
}
)
# 6. NUMA topology
numa_detail = []
for node, cpus in sorted(cpu_info.numa_info.items()):
if len(cpus) > 6:
cpu_str = f"{cpus[0]}-{cpus[-1]}"
else:
cpu_str = ",".join(str(c) for c in cpus)
numa_detail.append(f"{node}: {cpu_str}")
numa_value = t("doctor_numa_info", nodes=cpu_info.numa_nodes)
if verbose and numa_detail:
numa_value += " (" + "; ".join(numa_detail) + ")"
checks.append(
{
"name": t("doctor_check_numa"),
"status": "ok",
"value": numa_value,
"hint": f"{cpu_info.threads // cpu_info.numa_nodes} threads per node" if cpu_info.numa_nodes > 1 else None,
}
)
# 7. System memory (with frequency if available)
mem_info = detect_memory_info()
if mem_info.frequency_mhz and mem_info.type:
mem_value = t(
"doctor_memory_freq",
available=f"{mem_info.available_gb}GB",
total=f"{mem_info.total_gb}GB",
freq=mem_info.frequency_mhz,
type=mem_info.type,
)
else:
mem_value = t("doctor_memory_info", available=f"{mem_info.available_gb}GB", total=f"{mem_info.total_gb}GB")
ram_ok = mem_info.total_gb >= 32
checks.append(
{
"name": t("doctor_check_memory"),
"status": "ok" if ram_ok else "warning",
"value": mem_value,
"hint": "32GB+ RAM recommended for large models" if not ram_ok else None,
}
)
# 8. Disk space - check all model paths
settings = get_settings()
model_paths = settings.get_model_paths()
# Check all configured model paths
for i, disk_path in enumerate(model_paths):
available_disk, total_disk = detect_disk_space_gb(str(disk_path))
disk_ok = available_disk >= 100
# For multiple paths, add index to name
path_label = f"Model Path {i+1}" if len(model_paths) > 1 else t("doctor_check_disk")
checks.append(
{
"name": path_label,
"status": "ok" if disk_ok else "warning",
"value": t("doctor_disk_info", available=f"{available_disk}GB", path=str(disk_path)),
"hint": "100GB+ free space recommended for model storage" if not disk_ok else None,
}
)
# 6. Required packages
packages = [
("kt-kernel", ">=0.4.0", False), # name, version_req, required
("ktransformers", ">=0.4.0", False),
("sglang", ">=0.4.0", False),
("torch", ">=2.4.0", True),
("transformers", ">=4.45.0", True),
]
package_issues = []
for pkg_name, version_req, required in packages:
version = get_installed_package_version(pkg_name)
if version:
package_issues.append((pkg_name, version, "ok"))
elif required:
package_issues.append((pkg_name, t("version_not_installed"), "error"))
issues_found = True
else:
package_issues.append((pkg_name, t("version_not_installed"), "warning"))
if verbose:
checks.append(
{
"name": t("doctor_check_packages"),
"status": "ok" if not any(p[2] == "error" for p in package_issues) else "error",
"value": f"{sum(1 for p in package_issues if p[2] == 'ok')}/{len(package_issues)} installed",
"packages": package_issues,
}
)
# 7. SGLang installation source check
from kt_kernel.cli.utils.sglang_checker import check_sglang_installation, check_sglang_kt_kernel_support
sglang_info = check_sglang_installation()
if sglang_info["installed"]:
if sglang_info["from_source"]:
if sglang_info["git_info"]:
git_remote = sglang_info["git_info"].get("remote", "unknown")
git_branch = sglang_info["git_info"].get("branch", "unknown")
sglang_source_value = f"Source (GitHub: {git_remote}, branch: {git_branch})"
sglang_source_status = "ok"
sglang_source_hint = None
else:
sglang_source_value = "Source (editable)"
sglang_source_status = "ok"
sglang_source_hint = None
else:
sglang_source_value = "PyPI (not recommended)"
sglang_source_status = "warning"
sglang_source_hint = t("sglang_pypi_hint")
else:
sglang_source_value = "Not installed"
sglang_source_status = "warning"
sglang_source_hint = t("sglang_install_hint")
checks.append(
{
"name": "SGLang Source",
"status": sglang_source_status,
"value": sglang_source_value,
"hint": sglang_source_hint,
}
)
# 7b. SGLang kt-kernel support check (only if SGLang is installed)
kt_kernel_support = {"supported": True} # Default to True if not checked
if sglang_info["installed"]:
# Use cache=False to force re-check in doctor, but silent=True since we show in table
kt_kernel_support = check_sglang_kt_kernel_support(use_cache=False, silent=True)
if kt_kernel_support["supported"]:
kt_kernel_value = t("sglang_kt_kernel_supported")
kt_kernel_status = "ok"
kt_kernel_hint = None
else:
kt_kernel_value = t("sglang_kt_kernel_not_supported")
kt_kernel_status = "error"
kt_kernel_hint = 'Reinstall SGLang from: git clone https://github.com/kvcache-ai/sglang && cd sglang && pip install -e "python[all]"'
issues_found = True
checks.append(
{
"name": "SGLang kt-kernel",
"status": kt_kernel_status,
"value": kt_kernel_value,
"hint": kt_kernel_hint,
}
)
# 8. Environment managers
env_managers = detect_env_managers()
docker = check_docker()
env_list = [f"{m.name} {m.version}" for m in env_managers]
if docker:
env_list.append(f"docker {docker.version}")
checks.append(
{
"name": "Environment Managers",
"status": "ok" if env_list else "warning",
"value": ", ".join(env_list) if env_list else "None found",
"hint": "conda or docker recommended for installation" if not env_list else None,
}
)
# Display results
_display_results(checks, verbose)
# Show SGLang installation instructions if not installed
if not sglang_info["installed"]:
from kt_kernel.cli.utils.sglang_checker import print_sglang_install_instructions
console.print()
print_sglang_install_instructions()
# Show kt-kernel installation instructions if SGLang is installed but doesn't support kt-kernel
elif sglang_info["installed"] and not kt_kernel_support.get("supported", True):
from kt_kernel.cli.utils.sglang_checker import print_sglang_kt_kernel_instructions
console.print()
print_sglang_kt_kernel_instructions()
# Summary
console.print()
if issues_found:
print_warning(t("doctor_has_issues"))
else:
print_success(t("doctor_all_ok"))
console.print()
def _check_python_version(version: str) -> bool:
"""Check if Python version meets requirements."""
parts = version.split(".")
try:
major, minor = int(parts[0]), int(parts[1])
return major >= 3 and minor >= 10
except (IndexError, ValueError):
return False
def _display_results(checks: list[dict], verbose: bool) -> None:
"""Display diagnostic results."""
table = Table(show_header=True, header_style="bold")
table.add_column("Check", style="bold")
table.add_column("Status", width=8)
table.add_column("Value")
if verbose:
table.add_column("Notes", style="dim")
for check in checks:
status = check["status"]
if status == "ok":
status_str = f"[green]{t('doctor_status_ok')}[/green]"
elif status == "warning":
status_str = f"[yellow]{t('doctor_status_warning')}[/yellow]"
else:
status_str = f"[red]{t('doctor_status_error')}[/red]"
if verbose:
table.add_row(
check["name"],
status_str,
check["value"],
check.get("hint", ""),
)
else:
table.add_row(
check["name"],
status_str,
check["value"],
)
# Show package details if verbose
if verbose and "packages" in check:
for pkg_name, pkg_version, pkg_status in check["packages"]:
if pkg_status == "ok":
pkg_status_str = "[green]✓[/green]"
elif pkg_status == "warning":
pkg_status_str = "[yellow]○[/yellow]"
else:
pkg_status_str = "[red]✗[/red]"
table.add_row(
f" └─ {pkg_name}",
pkg_status_str,
pkg_version,
"",
)
console.print(table)