mirror of
https://github.com/kvcache-ai/ktransformers.git
synced 2026-03-24 15:27:23 +00:00
* [feat]: redesign kt run interactive configuration with i18n support - Redesign kt run with 8-step interactive flow (model selection, inference method, NUMA/CPU, GPU experts, KV cache, GPU/TP selection, parsers, host/port) - Add configuration save/load system (~/.ktransformers/run_configs.yaml) - Add i18n support for kt chat (en/zh translations) - Add universal input validators with auto-retry and Chinese comma support - Add port availability checker with auto-suggestion - Add parser configuration (--tool-call-parser, --reasoning-parser) - Remove tuna command and clean up redundant files - Fix: variable reference bug in run.py, filter to show only MoE models * [feat]: unify model selection UI and enable shared experts fusion by default - Unify kt run model selection table with kt model list display * Add Total size, MoE Size, Repo, and SHA256 status columns * Use consistent formatting and styling * Improve user decision-making with more information - Enable --disable-shared-experts-fusion by default * Change default value from False to True * Users can still override with --enable-shared-experts-fusion * [feat]: improve kt chat with performance metrics and better CJK support - Add performance metrics display after each response * Total time, TTFT (Time To First Token), TPOT (Time Per Output Token) * Accurate input/output token counts using model tokenizer * Fallback to estimation if tokenizer unavailable * Metrics shown in dim style (not prominent) - Fix Chinese character input issues * Replace Prompt.ask() with console.input() for better CJK support * Fixes backspace deletion showing half-characters - Suppress NumPy subnormal warnings * Filter "The value of the smallest subnormal" warnings * Cleaner CLI output on certain hardware environments * [fix]: correct TTFT measurement in kt chat - Move start_time initialization before API call - Previously start_time was set when receiving first chunk, causing TTFT ≈ 0ms - Now correctly measures time from request sent to first token received * [docs]: 添加 Clawdbot 集成指南 - KTransformers 企业级 AI 助手部署方案 * [docs]: 强调推荐使用 Kimi K2.5 作为核心模型,突出企业级推理能力 * [docs]: 添加 Clawdbot 飞书接入教程链接 * [feat]: improve CLI table display, model verification, and chat experience - Add sequence number (#) column to all model tables by default - Filter kt edit to show only MoE GPU models (exclude AMX) - Extend kt model verify to check *.json and *.py files in addition to weights - Fix re-verification bug where repaired files caused false failures - Suppress tokenizer debug output in kt chat token counting * [fix]: fix cpu cores. --------- Co-authored-by: skqliao <skqliao@gmail.com>
119 lines
3.7 KiB
Python
119 lines
3.7 KiB
Python
"""
|
|
Debug utility to inspect saved run configurations.
|
|
|
|
Usage: python -m kt_kernel.cli.utils.debug_configs
|
|
"""
|
|
|
|
from pathlib import Path
|
|
import yaml
|
|
from rich.console import Console
|
|
from rich.table import Table
|
|
from rich import box
|
|
|
|
console = Console()
|
|
|
|
|
|
def main():
|
|
"""Show all saved configurations."""
|
|
config_file = Path.home() / ".ktransformers" / "run_configs.yaml"
|
|
|
|
console.print()
|
|
console.print(f"[bold]Configuration file:[/bold] {config_file}")
|
|
console.print()
|
|
|
|
if not config_file.exists():
|
|
console.print("[red]✗ Configuration file does not exist![/red]")
|
|
console.print()
|
|
console.print("No configurations have been saved yet.")
|
|
return
|
|
|
|
try:
|
|
with open(config_file, "r", encoding="utf-8") as f:
|
|
data = yaml.safe_load(f) or {}
|
|
except Exception as e:
|
|
console.print(f"[red]✗ Failed to load configuration file: {e}[/red]")
|
|
return
|
|
|
|
console.print(f"[green]✓[/green] Configuration file loaded")
|
|
console.print()
|
|
|
|
configs = data.get("configs", {})
|
|
|
|
if not configs:
|
|
console.print("[yellow]No saved configurations found.[/yellow]")
|
|
return
|
|
|
|
console.print(f"[bold]Found configurations for {len(configs)} model(s):[/bold]")
|
|
console.print()
|
|
|
|
for model_id, model_configs in configs.items():
|
|
console.print(f"[cyan]Model ID:[/cyan] {model_id}")
|
|
console.print(f"[dim] {len(model_configs)} configuration(s)[/dim]")
|
|
console.print()
|
|
|
|
if not model_configs:
|
|
continue
|
|
|
|
# Display configs in a table
|
|
table = Table(box=box.ROUNDED, show_header=True, header_style="bold cyan")
|
|
table.add_column("#", justify="right", style="cyan")
|
|
table.add_column("Name", style="white")
|
|
table.add_column("Method", style="yellow")
|
|
table.add_column("TP", justify="right", style="green")
|
|
table.add_column("GPU Experts", justify="right", style="magenta")
|
|
table.add_column("Created", style="dim")
|
|
|
|
for i, cfg in enumerate(model_configs, 1):
|
|
method = cfg.get("inference_method", "?")
|
|
kt_method = cfg.get("kt_method", "?")
|
|
method_display = f"{method.upper()}"
|
|
if method == "raw":
|
|
method_display += f" ({cfg.get('raw_method', '?')})"
|
|
elif method == "amx":
|
|
method_display += f" ({kt_method})"
|
|
|
|
table.add_row(
|
|
str(i),
|
|
cfg.get("config_name", f"Config {i}"),
|
|
method_display,
|
|
str(cfg.get("tp_size", "?")),
|
|
str(cfg.get("gpu_experts", "?")),
|
|
cfg.get("created_at", "Unknown")[:19] if cfg.get("created_at") else "Unknown",
|
|
)
|
|
|
|
console.print(table)
|
|
console.print()
|
|
|
|
# Also check user_models.yaml to show model names
|
|
console.print("[bold]Checking model registry...[/bold]")
|
|
console.print()
|
|
|
|
from kt_kernel.cli.utils.user_model_registry import UserModelRegistry
|
|
|
|
try:
|
|
registry = UserModelRegistry()
|
|
all_models = registry.list_models()
|
|
|
|
console.print(f"[green]✓[/green] Found {len(all_models)} registered model(s)")
|
|
console.print()
|
|
|
|
# Map model IDs to names
|
|
id_to_name = {m.id: m.name for m in all_models}
|
|
|
|
console.print("[bold]Model ID → Name mapping:[/bold]")
|
|
console.print()
|
|
|
|
for model_id in configs.keys():
|
|
model_name = id_to_name.get(model_id, "[red]Unknown (model not found in registry)[/red]")
|
|
console.print(f" {model_id[:8]}... → {model_name}")
|
|
|
|
console.print()
|
|
|
|
except Exception as e:
|
|
console.print(f"[yellow]⚠ Could not load model registry: {e}[/yellow]")
|
|
console.print()
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|