Files
ktransformers/kt-kernel/python/cli/utils/model_discovery.py
Oql 56cbd69ac4 kt-cli enhancement (#1834)
* [feat]: redesign kt run interactive configuration with i18n support

- Redesign kt run with 8-step interactive flow (model selection, inference method, NUMA/CPU, GPU experts, KV cache, GPU/TP selection, parsers, host/port)
- Add configuration save/load system (~/.ktransformers/run_configs.yaml)
- Add i18n support for kt chat (en/zh translations)
- Add universal input validators with auto-retry and Chinese comma support
- Add port availability checker with auto-suggestion
- Add parser configuration (--tool-call-parser, --reasoning-parser)
- Remove tuna command and clean up redundant files
- Fix: variable reference bug in run.py, filter to show only MoE models

* [feat]: unify model selection UI and enable shared experts fusion by default

- Unify kt run model selection table with kt model list display
  * Add Total size, MoE Size, Repo, and SHA256 status columns
  * Use consistent formatting and styling
  * Improve user decision-making with more information

- Enable --disable-shared-experts-fusion by default
  * Change default value from False to True
  * Users can still override with --enable-shared-experts-fusion

* [feat]: improve kt chat with performance metrics and better CJK support

- Add performance metrics display after each response
  * Total time, TTFT (Time To First Token), TPOT (Time Per Output Token)
  * Accurate input/output token counts using model tokenizer
  * Fallback to estimation if tokenizer unavailable
  * Metrics shown in dim style (not prominent)

- Fix Chinese character input issues
  * Replace Prompt.ask() with console.input() for better CJK support
  * Fixes backspace deletion showing half-characters

- Suppress NumPy subnormal warnings
  * Filter "The value of the smallest subnormal" warnings
  * Cleaner CLI output on certain hardware environments

* [fix]: correct TTFT measurement in kt chat

- Move start_time initialization before API call
- Previously start_time was set when receiving first chunk, causing TTFT ≈ 0ms
- Now correctly measures time from request sent to first token received

* [docs]: 添加 Clawdbot 集成指南 - KTransformers 企业级 AI 助手部署方案

* [docs]: 强调推荐使用 Kimi K2.5 作为核心模型,突出企业级推理能力

* [docs]: 添加 Clawdbot 飞书接入教程链接

* [feat]: improve CLI table display, model verification, and chat experience

- Add sequence number (#) column to all model tables by default
- Filter kt edit to show only MoE GPU models (exclude AMX)
- Extend kt model verify to check *.json and *.py files in addition to weights
- Fix re-verification bug where repaired files caused false failures
- Suppress tokenizer debug output in kt chat token counting

* [fix]: fix cpu cores.

---------

Co-authored-by: skqliao <skqliao@gmail.com>
2026-02-04 16:44:54 +08:00

251 lines
8.3 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
Model Discovery Utilities
Shared functions for discovering and registering new models across different commands.
"""
from typing import List, Optional, Tuple
from pathlib import Path
from rich.console import Console
from kt_kernel.cli.utils.model_scanner import (
discover_models,
scan_directory_for_models,
ScannedModel,
)
from kt_kernel.cli.utils.user_model_registry import UserModelRegistry, UserModel
console = Console()
def discover_and_register_global(
min_size_gb: float = 2.0, max_depth: int = 6, show_progress: bool = True, lang: str = "en"
) -> Tuple[int, int, List[UserModel]]:
"""
Perform global model discovery and register new models.
Args:
min_size_gb: Minimum model size in GB
max_depth: Maximum search depth
show_progress: Whether to show progress messages
lang: Language for messages ("en" or "zh")
Returns:
Tuple of (total_found, new_found, registered_models)
"""
registry = UserModelRegistry()
if show_progress:
if lang == "zh":
console.print("[dim]正在扫描系统中的模型权重这可能需要30-60秒...[/dim]")
else:
console.print("[dim]Scanning system for model weights, this may take 30-60 seconds...[/dim]")
# Global scan
all_models = discover_models(mount_points=None, min_size_gb=min_size_gb, max_depth=max_depth)
# Filter out existing models
new_models = []
for model in all_models:
if not registry.find_by_path(model.path):
new_models.append(model)
# Register new models
registered = []
for model in new_models:
user_model = _create_and_register_model(registry, model)
if user_model:
registered.append(user_model)
return len(all_models), len(new_models), registered
def discover_and_register_path(
path: str,
min_size_gb: float = 2.0,
existing_paths: Optional[set] = None,
show_progress: bool = True,
lang: str = "en",
) -> Tuple[int, int, List[UserModel]]:
"""
Discover models in a specific path and register new ones.
Args:
path: Directory path to scan
min_size_gb: Minimum model file size in GB
existing_paths: Set of already discovered paths in this session (optional)
show_progress: Whether to show progress messages
lang: Language for messages ("en" or "zh")
Returns:
Tuple of (total_found, new_found, registered_models)
"""
registry = UserModelRegistry()
if show_progress:
if lang == "zh":
console.print(f"[dim]正在扫描 {path}...[/dim]")
else:
console.print(f"[dim]Scanning {path}...[/dim]")
# Scan directory
model_info = scan_directory_for_models(path, min_file_size_gb=min_size_gb)
if not model_info:
return 0, 0, []
# Convert to ScannedModel and filter
new_models = []
for dir_path, (format_type, size_bytes, file_count, files) in model_info.items():
# Check if already in registry
if registry.find_by_path(dir_path):
continue
# Check if already discovered in this session
if existing_paths and dir_path in existing_paths:
continue
model = ScannedModel(
path=dir_path, format=format_type, size_bytes=size_bytes, file_count=file_count, files=files
)
new_models.append(model)
# Register new models
registered = []
for model in new_models:
user_model = _create_and_register_model(registry, model)
if user_model:
registered.append(user_model)
return len(model_info), len(new_models), registered
def _create_and_register_model(registry: UserModelRegistry, scanned_model: ScannedModel) -> Optional[UserModel]:
"""
Create a UserModel from ScannedModel and register it.
Handles name conflicts by suggesting a unique name (e.g., model-2, model-3).
Automatically detects repo_id from README.md YAML frontmatter.
Automatically detects and caches MoE information for safetensors models.
Args:
registry: UserModelRegistry instance
scanned_model: ScannedModel to register
Returns:
Registered UserModel or None if failed
"""
# Use suggest_name to get a unique name (adds -2, -3, etc. if needed)
unique_name = registry.suggest_name(scanned_model.folder_name)
user_model = UserModel(name=unique_name, path=scanned_model.path, format=scanned_model.format)
# Auto-detect repo_id from README.md (only YAML frontmatter)
try:
from kt_kernel.cli.utils.repo_detector import detect_repo_for_model
repo_info = detect_repo_for_model(scanned_model.path)
if repo_info:
repo_id, repo_type = repo_info
user_model.repo_id = repo_id
user_model.repo_type = repo_type
except Exception:
# Silently continue if detection fails
pass
# Auto-detect MoE information for safetensors models
if scanned_model.format == "safetensors":
try:
from kt_kernel.cli.utils.analyze_moe_model import analyze_moe_model
moe_result = analyze_moe_model(scanned_model.path, use_cache=True)
if moe_result and moe_result.get("is_moe"):
user_model.is_moe = True
user_model.moe_num_experts = moe_result.get("num_experts")
user_model.moe_num_experts_per_tok = moe_result.get("num_experts_per_tok")
else:
user_model.is_moe = False
except Exception:
# Silently continue if MoE detection fails
# is_moe will remain None
pass
try:
registry.add_model(user_model)
return user_model
except Exception:
# Should not happen since we used suggest_name, but handle gracefully
return None
def format_discovery_summary(
total_found: int,
new_found: int,
registered: List[UserModel],
lang: str = "en",
show_models: bool = True,
max_show: int = 10,
) -> None:
"""
Print formatted discovery summary.
Args:
total_found: Total models found
new_found: New models found
registered: List of registered UserModel objects
lang: Language ("en" or "zh")
show_models: Whether to show model list
max_show: Maximum models to show
"""
console.print()
if new_found == 0:
if total_found > 0:
if lang == "zh":
console.print(f"[green]✓[/green] 扫描完成:找到 {total_found} 个模型,所有模型均已在列表中")
else:
console.print(f"[green]✓[/green] Scan complete: found {total_found} models, all already in the list")
else:
if lang == "zh":
console.print("[yellow]未找到模型[/yellow]")
else:
console.print("[yellow]No models found[/yellow]")
return
# Show summary
if lang == "zh":
console.print(f"[green]✓[/green] 扫描完成:找到 {total_found} 个模型,其中 {new_found} 个为新模型")
else:
console.print(f"[green]✓[/green] Scan complete: found {total_found} models, {new_found} are new")
# Show registered count
if len(registered) > 0:
if lang == "zh":
console.print(f"[green]✓[/green] 成功添加 {len(registered)} 个新模型到列表")
else:
console.print(f"[green]✓[/green] Successfully added {len(registered)} new models to list")
# Show model list
if show_models and registered:
console.print()
if lang == "zh":
console.print(f"[dim]新发现的模型(前{max_show}个):[/dim]")
else:
console.print(f"[dim]Newly discovered models (first {max_show}):[/dim]")
for i, model in enumerate(registered[:max_show], 1):
# Get size from registry or estimate
size_str = "?.? GB"
# Try to find the ScannedModel to get size
# For now just show name and path
console.print(f" {i}. {model.name} ({model.format})")
console.print(f" [dim]{model.path}[/dim]")
if len(registered) > max_show:
remaining = len(registered) - max_show
if lang == "zh":
console.print(f" [dim]... 还有 {remaining} 个新模型[/dim]")
else:
console.print(f" [dim]... and {remaining} more new models[/dim]")