Files
ktransformers/kt-kernel/python/cli/utils/model_table_builder.py
Oql 56cbd69ac4 kt-cli enhancement (#1834)
* [feat]: redesign kt run interactive configuration with i18n support

- Redesign kt run with 8-step interactive flow (model selection, inference method, NUMA/CPU, GPU experts, KV cache, GPU/TP selection, parsers, host/port)
- Add configuration save/load system (~/.ktransformers/run_configs.yaml)
- Add i18n support for kt chat (en/zh translations)
- Add universal input validators with auto-retry and Chinese comma support
- Add port availability checker with auto-suggestion
- Add parser configuration (--tool-call-parser, --reasoning-parser)
- Remove tuna command and clean up redundant files
- Fix: variable reference bug in run.py, filter to show only MoE models

* [feat]: unify model selection UI and enable shared experts fusion by default

- Unify kt run model selection table with kt model list display
  * Add Total size, MoE Size, Repo, and SHA256 status columns
  * Use consistent formatting and styling
  * Improve user decision-making with more information

- Enable --disable-shared-experts-fusion by default
  * Change default value from False to True
  * Users can still override with --enable-shared-experts-fusion

* [feat]: improve kt chat with performance metrics and better CJK support

- Add performance metrics display after each response
  * Total time, TTFT (Time To First Token), TPOT (Time Per Output Token)
  * Accurate input/output token counts using model tokenizer
  * Fallback to estimation if tokenizer unavailable
  * Metrics shown in dim style (not prominent)

- Fix Chinese character input issues
  * Replace Prompt.ask() with console.input() for better CJK support
  * Fixes backspace deletion showing half-characters

- Suppress NumPy subnormal warnings
  * Filter "The value of the smallest subnormal" warnings
  * Cleaner CLI output on certain hardware environments

* [fix]: correct TTFT measurement in kt chat

- Move start_time initialization before API call
- Previously start_time was set when receiving first chunk, causing TTFT ≈ 0ms
- Now correctly measures time from request sent to first token received

* [docs]: 添加 Clawdbot 集成指南 - KTransformers 企业级 AI 助手部署方案

* [docs]: 强调推荐使用 Kimi K2.5 作为核心模型,突出企业级推理能力

* [docs]: 添加 Clawdbot 飞书接入教程链接

* [feat]: improve CLI table display, model verification, and chat experience

- Add sequence number (#) column to all model tables by default
- Filter kt edit to show only MoE GPU models (exclude AMX)
- Extend kt model verify to check *.json and *.py files in addition to weights
- Fix re-verification bug where repaired files caused false failures
- Suppress tokenizer debug output in kt chat token counting

* [fix]: fix cpu cores.

---------

Co-authored-by: skqliao <skqliao@gmail.com>
2026-02-04 16:44:54 +08:00

255 lines
8.5 KiB
Python

"""
Shared model table builders for consistent UI across commands.
Provides reusable table construction functions for displaying models
in kt model list, kt quant, kt run, etc.
"""
from typing import List, Optional, Tuple
from pathlib import Path
from rich.table import Table
from rich.console import Console
import json
def format_model_size(model_path: Path, format_type: str) -> str:
"""Calculate and format model size."""
from kt_kernel.cli.utils.model_scanner import format_size
try:
if format_type == "safetensors":
files = list(model_path.glob("*.safetensors"))
elif format_type == "gguf":
files = list(model_path.glob("*.gguf"))
else:
return "[dim]-[/dim]"
total_size = sum(f.stat().st_size for f in files if f.exists())
return format_size(total_size)
except Exception:
return "[dim]-[/dim]"
def format_repo_info(model) -> str:
"""Format repository information."""
if model.repo_id:
repo_abbr = "hf" if model.repo_type == "huggingface" else "ms"
return f"{repo_abbr}:{model.repo_id}"
return "[dim]-[/dim]"
def format_sha256_status(model, status_map: dict) -> str:
"""Format SHA256 verification status."""
return status_map.get(model.sha256_status or "not_checked", "[dim]?[/dim]")
def build_moe_gpu_table(
models: List, status_map: dict, show_index: bool = True, start_index: int = 1
) -> Tuple[Table, List]:
"""
Build MoE GPU models table.
Args:
models: List of MoE GPU model objects
status_map: SHA256_STATUS_MAP for formatting status
show_index: Whether to show # column for selection (default: True)
start_index: Starting index number
Returns:
Tuple of (Table object, list of models in display order)
"""
table = Table(show_header=True, header_style="bold", show_lines=False)
if show_index:
table.add_column("#", justify="right", style="cyan", no_wrap=True)
table.add_column("Name", style="cyan", no_wrap=True)
table.add_column("Path", style="dim", overflow="fold")
table.add_column("Total", justify="right")
table.add_column("Exps", justify="center", style="yellow")
table.add_column("Act", justify="center", style="green")
table.add_column("Repository", style="dim", overflow="fold")
table.add_column("SHA256", justify="center")
displayed_models = []
for i, model in enumerate(models, start_index):
displayed_models.append(model)
# Calculate size
size_str = format_model_size(Path(model.path), "safetensors")
# MoE info
num_experts = str(model.moe_num_experts) if model.moe_num_experts else "[dim]-[/dim]"
num_active = str(model.moe_num_experts_per_tok) if model.moe_num_experts_per_tok else "[dim]-[/dim]"
# Repository and SHA256
repo_str = format_repo_info(model)
sha256_str = format_sha256_status(model, status_map)
row = []
if show_index:
row.append(str(i))
row.extend([model.name, model.path, size_str, num_experts, num_active, repo_str, sha256_str])
table.add_row(*row)
return table, displayed_models
def build_amx_table(
models: List,
status_map: dict = None, # Kept for API compatibility but not used
show_index: bool = True,
start_index: int = 1,
show_linked_gpus: bool = False,
gpu_models: Optional[List] = None,
) -> Tuple[Table, List]:
"""
Build AMX models table.
Note: AMX models are locally quantized, so no SHA256 verification column.
Args:
models: List of AMX model objects
status_map: (Unused - kept for API compatibility)
show_index: Whether to show # column for selection (default: True)
start_index: Starting index number
show_linked_gpus: Whether to show sub-rows for linked GPU models
gpu_models: List of GPU models (required if show_linked_gpus=True)
Returns:
Tuple of (Table object, list of models in display order)
"""
table = Table(show_header=True, header_style="bold", show_lines=False)
if show_index:
table.add_column("#", justify="right", style="cyan", no_wrap=True)
table.add_column("Name", style="cyan", no_wrap=True)
table.add_column("Path", style="dim", overflow="fold")
table.add_column("Total", justify="right")
table.add_column("Method", justify="center", style="yellow")
table.add_column("NUMA", justify="center", style="green")
table.add_column("Source", style="dim", overflow="fold")
# Build reverse map if needed
amx_used_by_gpu = {}
if show_linked_gpus and gpu_models:
for model in models:
if model.gpu_model_ids:
gpu_names = []
for gpu_id in model.gpu_model_ids:
for gpu_model in gpu_models:
if gpu_model.id == gpu_id:
gpu_names.append(gpu_model.name)
break
if gpu_names:
amx_used_by_gpu[model.id] = gpu_names
displayed_models = []
for i, model in enumerate(models, start_index):
displayed_models.append(model)
# Calculate size
size_str = format_model_size(Path(model.path), "safetensors")
# Read metadata from config.json or UserModel fields
method_from_config = None
numa_from_config = None
try:
config_path = Path(model.path) / "config.json"
if config_path.exists():
with open(config_path, "r", encoding="utf-8") as f:
config = json.load(f)
amx_quant = config.get("amx_quantization", {})
if amx_quant.get("converted"):
method_from_config = amx_quant.get("method")
numa_from_config = amx_quant.get("numa_count")
except Exception:
pass
# Priority: UserModel fields > config.json > ?
method_display = (
model.amx_quant_method.upper()
if model.amx_quant_method
else method_from_config.upper() if method_from_config else "[dim]?[/dim]"
)
numa_display = (
str(model.amx_numa_nodes)
if model.amx_numa_nodes
else str(numa_from_config) if numa_from_config else "[dim]?[/dim]"
)
source_display = model.amx_source_model or "[dim]-[/dim]"
row = []
if show_index:
row.append(str(i))
row.extend([model.name, model.path, size_str, method_display, numa_display, source_display])
table.add_row(*row)
# Add sub-row showing linked GPUs
if show_linked_gpus and model.id in amx_used_by_gpu:
gpu_list = amx_used_by_gpu[model.id]
gpu_names_str = ", ".join([f"[dim]{name}[/dim]" for name in gpu_list])
sub_row = []
if show_index:
sub_row.append("")
sub_row.extend([f" [dim]↳ GPU: {gpu_names_str}[/dim]", "", "", "", "", ""])
table.add_row(*sub_row, style="dim")
return table, displayed_models
def build_gguf_table(
models: List, status_map: dict, show_index: bool = True, start_index: int = 1
) -> Tuple[Table, List]:
"""
Build GGUF models table.
Args:
models: List of GGUF model objects
status_map: SHA256_STATUS_MAP for formatting status
show_index: Whether to show # column for selection (default: True)
start_index: Starting index number
Returns:
Tuple of (Table object, list of models in display order)
"""
table = Table(show_header=True, header_style="bold", show_lines=False)
if show_index:
table.add_column("#", justify="right", style="cyan", no_wrap=True)
table.add_column("Name", style="cyan", no_wrap=True)
table.add_column("Path", style="dim", overflow="fold")
table.add_column("Total", justify="right")
table.add_column("Repository", style="dim", overflow="fold")
table.add_column("SHA256", justify="center")
displayed_models = []
for i, model in enumerate(models, start_index):
displayed_models.append(model)
# Calculate size
size_str = format_model_size(Path(model.path), "gguf")
# Repository and SHA256
repo_str = format_repo_info(model)
sha256_str = format_sha256_status(model, status_map)
row = []
if show_index:
row.append(str(i))
row.extend([model.name, model.path, size_str, repo_str, sha256_str])
table.add_row(*row)
return table, displayed_models