""" Doctor command for kt-cli. Diagnoses environment issues and provides recommendations. """ import glob import os import platform import shutil from pathlib import Path from typing import Optional import typer from rich.table import Table from kt_kernel.cli.config.settings import get_settings from kt_kernel.cli.i18n import t from kt_kernel.cli.utils.console import console, print_error, print_info, print_success, print_warning from kt_kernel.cli.utils.environment import ( check_docker, detect_available_ram_gb, detect_cpu_info, detect_cuda_version, detect_disk_space_gb, detect_env_managers, detect_gpus, detect_memory_info, detect_ram_gb, get_installed_package_version, ) def _get_kt_kernel_info() -> dict: """Get kt-kernel installation information.""" info = { "installed": False, "version": None, "cpu_variant": None, "install_path": None, "available_variants": [], "extension_file": None, } try: import kt_kernel info["installed"] = True info["version"] = getattr(kt_kernel, "__version__", "unknown") info["cpu_variant"] = getattr(kt_kernel, "__cpu_variant__", "unknown") # Get installation path info["install_path"] = os.path.dirname(kt_kernel.__file__) # Find available .so files kt_kernel_dir = info["install_path"] so_files = glob.glob(os.path.join(kt_kernel_dir, "_kt_kernel_ext_*.so")) so_files.extend(glob.glob(os.path.join(kt_kernel_dir, "kt_kernel_ext*.so"))) # Parse variant names from filenames variants = set() for so_file in so_files: basename = os.path.basename(so_file) if "_kt_kernel_ext_" in basename: # Extract variant from _kt_kernel_ext_amx.cpython-311-x86_64-linux-gnu.so parts = basename.split("_") if len(parts) >= 4: variant = parts[3] # "amx" from "_kt_kernel_ext_amx..." if variant.startswith("avx"): # Normalize avx variants if variant in ["avx512", "avx512_bf16", "avx512_vbmi", "avx512_vnni", "avx512_base"]: variants.add("avx512") else: variants.add(variant) else: variants.add(variant) elif "kt_kernel_ext" in basename: variants.add("default") info["available_variants"] = sorted(list(variants)) # Get current extension file if hasattr(kt_kernel, "kt_kernel_ext"): ext_module = kt_kernel.kt_kernel_ext info["extension_file"] = getattr(ext_module, "__file__", None) except ImportError: info["installed"] = False except Exception as e: info["error"] = str(e) return info def doctor( verbose: bool = typer.Option(False, "--verbose", "-v", help="Show detailed diagnostics"), ) -> None: """Diagnose environment issues.""" console.print(f"\n[bold]{t('doctor_title')}[/bold]\n") issues_found = False checks = [] # 1. Python version python_version = platform.python_version() python_ok = _check_python_version(python_version) checks.append( { "name": t("doctor_check_python"), "status": "ok" if python_ok else "error", "value": python_version, "hint": "Python 3.10+ required" if not python_ok else None, } ) if not python_ok: issues_found = True # 2. CUDA availability cuda_version = detect_cuda_version() checks.append( { "name": t("doctor_check_cuda"), "status": "ok" if cuda_version else "warning", "value": cuda_version or t("version_cuda_not_found"), "hint": "CUDA is optional but recommended for GPU acceleration" if not cuda_version else None, } ) # 3. GPU detection gpus = detect_gpus() if gpus: gpu_names = ", ".join(g.name for g in gpus) total_vram = sum(g.vram_gb for g in gpus) checks.append( { "name": t("doctor_check_gpu"), "status": "ok", "value": t("doctor_gpu_found", count=len(gpus), names=gpu_names), "hint": f"Total VRAM: {total_vram}GB", } ) else: checks.append( { "name": t("doctor_check_gpu"), "status": "warning", "value": t("doctor_gpu_not_found"), "hint": "GPU recommended for best performance", } ) # 4. CPU information cpu_info = detect_cpu_info() checks.append( { "name": t("doctor_check_cpu"), "status": "ok", "value": t("doctor_cpu_info", name=cpu_info.name, cores=cpu_info.cores, threads=cpu_info.threads), "hint": None, } ) # 5. CPU instruction sets (critical for kt-kernel) isa_list = cpu_info.instruction_sets # Check for recommended instruction sets recommended_isa = {"AVX2", "AVX512F", "AMX-INT8"} has_recommended = bool(set(isa_list) & recommended_isa) has_avx2 = "AVX2" in isa_list has_avx512 = any(isa.startswith("AVX512") for isa in isa_list) has_amx = any(isa.startswith("AMX") for isa in isa_list) # Determine status and build display string if has_amx: isa_status = "ok" isa_hint = "AMX available - best performance for INT4/INT8" elif has_avx512: isa_status = "ok" isa_hint = "AVX512 available - good performance" elif has_avx2: isa_status = "warning" isa_hint = "AVX2 only - consider upgrading CPU for better performance" else: isa_status = "error" isa_hint = "AVX2 required for kt-kernel" # Show top instruction sets (prioritize important ones) display_isa = isa_list[:8] if len(isa_list) > 8 else isa_list isa_display = ", ".join(display_isa) if len(isa_list) > 8: isa_display += f" (+{len(isa_list) - 8} more)" checks.append( { "name": t("doctor_check_cpu_isa"), "status": isa_status, "value": isa_display if isa_display else "None detected", "hint": isa_hint, } ) # 6. NUMA topology numa_detail = [] for node, cpus in sorted(cpu_info.numa_info.items()): if len(cpus) > 6: cpu_str = f"{cpus[0]}-{cpus[-1]}" else: cpu_str = ",".join(str(c) for c in cpus) numa_detail.append(f"{node}: {cpu_str}") numa_value = t("doctor_numa_info", nodes=cpu_info.numa_nodes) if verbose and numa_detail: numa_value += " (" + "; ".join(numa_detail) + ")" checks.append( { "name": t("doctor_check_numa"), "status": "ok", "value": numa_value, "hint": f"{cpu_info.threads // cpu_info.numa_nodes} threads per node" if cpu_info.numa_nodes > 1 else None, } ) # 6b. kt-kernel installation check kt_info = _get_kt_kernel_info() if kt_info["installed"]: # Build display string for kt-kernel variant = kt_info["cpu_variant"] version = kt_info["version"] available_variants = kt_info["available_variants"] # Determine status based on CPU variant if variant == "amx": kt_status = "ok" kt_hint = "AMX variant loaded - optimal performance" elif variant.startswith("avx512"): kt_status = "ok" kt_hint = "AVX512 variant loaded - good performance" elif variant == "avx2": kt_status = "warning" kt_hint = "AVX2 variant - consider upgrading CPU for AMX/AVX512" else: kt_status = "warning" kt_hint = f"Unknown variant: {variant}" kt_value = f"v{version} ({variant.upper()})" if verbose and available_variants: kt_value += f" [dim] - available: {', '.join(available_variants)}[/dim]" checks.append( { "name": "kt-kernel", "status": kt_status, "value": kt_value, "hint": kt_hint, } ) # Show extension file path in verbose mode if verbose and kt_info.get("extension_file"): ext_file = os.path.basename(kt_info["extension_file"]) checks.append( { "name": " └─ Extension", "status": "ok", "value": ext_file, "hint": None, } ) # Show installation path in verbose mode if verbose and kt_info.get("install_path"): checks.append( { "name": " └─ Path", "status": "ok", "value": kt_info["install_path"], "hint": None, } ) else: error_msg = kt_info.get("error", "Not installed") checks.append( { "name": "kt-kernel", "status": "error", "value": error_msg, "hint": "kt-kernel is required - run: pip install kt-kernel", } ) issues_found = True # 7. System memory (with frequency if available) mem_info = detect_memory_info() if mem_info.frequency_mhz and mem_info.type: mem_value = t( "doctor_memory_freq", available=f"{mem_info.available_gb}GB", total=f"{mem_info.total_gb}GB", freq=mem_info.frequency_mhz, type=mem_info.type, ) else: mem_value = t("doctor_memory_info", available=f"{mem_info.available_gb}GB", total=f"{mem_info.total_gb}GB") ram_ok = mem_info.total_gb >= 32 checks.append( { "name": t("doctor_check_memory"), "status": "ok" if ram_ok else "warning", "value": mem_value, "hint": "32GB+ RAM recommended for large models" if not ram_ok else None, } ) # 8. Disk space - check all model paths settings = get_settings() model_paths = settings.get_model_paths() # Check all configured model paths for i, disk_path in enumerate(model_paths): available_disk, total_disk = detect_disk_space_gb(str(disk_path)) disk_ok = available_disk >= 100 # For multiple paths, add index to name path_label = f"Model Path {i+1}" if len(model_paths) > 1 else t("doctor_check_disk") checks.append( { "name": path_label, "status": "ok" if disk_ok else "warning", "value": t("doctor_disk_info", available=f"{available_disk}GB", path=str(disk_path)), "hint": "100GB+ free space recommended for model storage" if not disk_ok else None, } ) # 6. Required packages packages = [ ("kt-kernel", ">=0.4.0", False), # name, version_req, required ("sglang", ">=0.4.0", False), ("torch", ">=2.4.0", True), ("transformers", ">=4.45.0", True), ] package_issues = [] for pkg_name, version_req, required in packages: version = get_installed_package_version(pkg_name) if version: package_issues.append((pkg_name, version, "ok")) elif required: package_issues.append((pkg_name, t("version_not_installed"), "error")) issues_found = True else: package_issues.append((pkg_name, t("version_not_installed"), "warning")) if verbose: checks.append( { "name": t("doctor_check_packages"), "status": "ok" if not any(p[2] == "error" for p in package_issues) else "error", "value": f"{sum(1 for p in package_issues if p[2] == 'ok')}/{len(package_issues)} installed", "packages": package_issues, } ) # 7. SGLang installation source check from kt_kernel.cli.utils.sglang_checker import check_sglang_installation, check_sglang_kt_kernel_support sglang_info = check_sglang_installation() if sglang_info["installed"]: if sglang_info.get("is_kvcache_fork"): # Package name is sglang-kt — this is definitively the kvcache-ai fork if sglang_info["from_source"] and sglang_info["git_info"]: git_remote = sglang_info["git_info"].get("remote", "unknown") git_branch = sglang_info["git_info"].get("branch", "unknown") sglang_source_value = f"sglang-kt (Source: {git_remote}, branch: {git_branch})" elif sglang_info["editable"]: sglang_source_value = "sglang-kt (editable)" else: sglang_source_value = "sglang-kt" sglang_source_status = "ok" sglang_source_hint = None elif sglang_info["from_source"]: if sglang_info["git_info"]: git_remote = sglang_info["git_info"].get("remote", "unknown") git_branch = sglang_info["git_info"].get("branch", "unknown") sglang_source_value = f"Source (GitHub: {git_remote}, branch: {git_branch})" sglang_source_status = "ok" sglang_source_hint = None else: sglang_source_value = "Source (editable)" sglang_source_status = "ok" sglang_source_hint = None else: sglang_source_value = "PyPI sglang (not kvcache-ai fork)" sglang_source_status = "warning" sglang_source_hint = t("sglang_pypi_hint") else: sglang_source_value = "Not installed" sglang_source_status = "warning" sglang_source_hint = t("sglang_install_hint") checks.append( { "name": "SGLang Source", "status": sglang_source_status, "value": sglang_source_value, "hint": sglang_source_hint, } ) # 7b. SGLang kt-kernel support check (only if SGLang is installed) kt_kernel_support = {"supported": True} # Default to True if not checked if sglang_info["installed"]: # Use cache=False to force re-check in doctor, but silent=True since we show in table kt_kernel_support = check_sglang_kt_kernel_support(use_cache=False, silent=True) if kt_kernel_support["supported"]: kt_kernel_value = t("sglang_kt_kernel_supported") kt_kernel_status = "ok" kt_kernel_hint = None else: kt_kernel_value = t("sglang_kt_kernel_not_supported") kt_kernel_status = "error" kt_kernel_hint = "Reinstall SGLang: pip uninstall sglang -y && pip install sglang-kt (or run ./install.sh from ktransformers root)" issues_found = True checks.append( { "name": "SGLang kt-kernel", "status": kt_kernel_status, "value": kt_kernel_value, "hint": kt_kernel_hint, } ) # 8. Environment managers env_managers = detect_env_managers() docker = check_docker() env_list = [f"{m.name} {m.version}" for m in env_managers] if docker: env_list.append(f"docker {docker.version}") checks.append( { "name": "Environment Managers", "status": "ok" if env_list else "warning", "value": ", ".join(env_list) if env_list else "None found", "hint": "conda or docker recommended for installation" if not env_list else None, } ) # Display results _display_results(checks, verbose) # Show SGLang installation instructions if not installed if not sglang_info["installed"]: from kt_kernel.cli.utils.sglang_checker import print_sglang_install_instructions console.print() print_sglang_install_instructions() # Show kt-kernel installation instructions if SGLang is installed but doesn't support kt-kernel elif sglang_info["installed"] and not kt_kernel_support.get("supported", True): from kt_kernel.cli.utils.sglang_checker import print_sglang_kt_kernel_instructions console.print() print_sglang_kt_kernel_instructions() # Summary console.print() if issues_found: print_warning(t("doctor_has_issues")) else: print_success(t("doctor_all_ok")) console.print() def _check_python_version(version: str) -> bool: """Check if Python version meets requirements.""" parts = version.split(".") try: major, minor = int(parts[0]), int(parts[1]) return major >= 3 and minor >= 10 except (IndexError, ValueError): return False def _display_results(checks: list[dict], verbose: bool) -> None: """Display diagnostic results.""" table = Table(show_header=True, header_style="bold") table.add_column("Check", style="bold") table.add_column("Status", width=8) table.add_column("Value") if verbose: table.add_column("Notes", style="dim") for check in checks: status = check["status"] if status == "ok": status_str = f"[green]{t('doctor_status_ok')}[/green]" elif status == "warning": status_str = f"[yellow]{t('doctor_status_warning')}[/yellow]" else: status_str = f"[red]{t('doctor_status_error')}[/red]" if verbose: table.add_row( check["name"], status_str, check["value"], check.get("hint", ""), ) else: table.add_row( check["name"], status_str, check["value"], ) # Show package details if verbose if verbose and "packages" in check: for pkg_name, pkg_version, pkg_status in check["packages"]: if pkg_status == "ok": pkg_status_str = "[green]✓[/green]" elif pkg_status == "warning": pkg_status_str = "[yellow]○[/yellow]" else: pkg_status_str = "[red]✗[/red]" table.add_row( f" └─ {pkg_name}", pkg_status_str, pkg_version, "", ) console.print(table)