mirror of
https://github.com/kvcache-ai/ktransformers.git
synced 2026-04-19 22:09:10 +00:00
* [feat]: redesign kt run interactive configuration with i18n support - Redesign kt run with 8-step interactive flow (model selection, inference method, NUMA/CPU, GPU experts, KV cache, GPU/TP selection, parsers, host/port) - Add configuration save/load system (~/.ktransformers/run_configs.yaml) - Add i18n support for kt chat (en/zh translations) - Add universal input validators with auto-retry and Chinese comma support - Add port availability checker with auto-suggestion - Add parser configuration (--tool-call-parser, --reasoning-parser) - Remove tuna command and clean up redundant files - Fix: variable reference bug in run.py, filter to show only MoE models * [feat]: unify model selection UI and enable shared experts fusion by default - Unify kt run model selection table with kt model list display * Add Total size, MoE Size, Repo, and SHA256 status columns * Use consistent formatting and styling * Improve user decision-making with more information - Enable --disable-shared-experts-fusion by default * Change default value from False to True * Users can still override with --enable-shared-experts-fusion * [feat]: improve kt chat with performance metrics and better CJK support - Add performance metrics display after each response * Total time, TTFT (Time To First Token), TPOT (Time Per Output Token) * Accurate input/output token counts using model tokenizer * Fallback to estimation if tokenizer unavailable * Metrics shown in dim style (not prominent) - Fix Chinese character input issues * Replace Prompt.ask() with console.input() for better CJK support * Fixes backspace deletion showing half-characters - Suppress NumPy subnormal warnings * Filter "The value of the smallest subnormal" warnings * Cleaner CLI output on certain hardware environments * [fix]: correct TTFT measurement in kt chat - Move start_time initialization before API call - Previously start_time was set when receiving first chunk, causing TTFT ≈ 0ms - Now correctly measures time from request sent to first token received * [docs]: 添加 Clawdbot 集成指南 - KTransformers 企业级 AI 助手部署方案 * [docs]: 强调推荐使用 Kimi K2.5 作为核心模型,突出企业级推理能力 * [docs]: 添加 Clawdbot 飞书接入教程链接 * [feat]: improve CLI table display, model verification, and chat experience - Add sequence number (#) column to all model tables by default - Filter kt edit to show only MoE GPU models (exclude AMX) - Extend kt model verify to check *.json and *.py files in addition to weights - Fix re-verification bug where repaired files caused false failures - Suppress tokenizer debug output in kt chat token counting * [fix]: fix cpu cores. --------- Co-authored-by: skqliao <skqliao@gmail.com>
303 lines
9.0 KiB
Python
303 lines
9.0 KiB
Python
"""
|
|
User Model Registry
|
|
|
|
Manages user-registered models in ~/.ktransformers/user_models.yaml
|
|
"""
|
|
|
|
from dataclasses import dataclass, asdict, field
|
|
from datetime import datetime
|
|
from pathlib import Path
|
|
from typing import Optional, List, Dict, Any
|
|
import yaml
|
|
|
|
|
|
# Constants
|
|
USER_MODELS_FILE = Path.home() / ".ktransformers" / "user_models.yaml"
|
|
REGISTRY_VERSION = "1.0"
|
|
|
|
|
|
@dataclass
|
|
class UserModel:
|
|
"""Represents a user-registered model"""
|
|
|
|
name: str # User-editable name (default: folder name)
|
|
path: str # Absolute path to model directory
|
|
format: str # "safetensors" | "gguf"
|
|
id: Optional[str] = None # Unique UUID for this model (auto-generated if None)
|
|
repo_type: Optional[str] = None # "huggingface" | "modelscope" | None
|
|
repo_id: Optional[str] = None # e.g., "deepseek-ai/DeepSeek-V3"
|
|
sha256_status: str = "not_checked" # "not_checked" | "checking" | "passed" | "failed" | "no_repo"
|
|
gpu_model_ids: Optional[List[str]] = None # For llamafile/AMX: list of GPU model UUIDs to run with
|
|
created_at: str = field(default_factory=lambda: datetime.now().isoformat())
|
|
last_verified: Optional[str] = None # ISO format datetime
|
|
# MoE information (cached from analyze_moe_model)
|
|
is_moe: Optional[bool] = None # True if MoE model, False if non-MoE, None if not analyzed
|
|
moe_num_experts: Optional[int] = None # Total number of experts (for MoE models)
|
|
moe_num_experts_per_tok: Optional[int] = None # Number of active experts per token (for MoE models)
|
|
# AMX quantization metadata (for format == "amx")
|
|
amx_source_model: Optional[str] = None # Name of the source MoE model that was quantized
|
|
amx_quant_method: Optional[str] = None # "int4" | "int8"
|
|
amx_numa_nodes: Optional[int] = None # Number of NUMA nodes used for quantization
|
|
|
|
def __post_init__(self):
|
|
"""Ensure ID is set after initialization"""
|
|
if self.id is None:
|
|
import uuid
|
|
|
|
self.id = str(uuid.uuid4())
|
|
|
|
def to_dict(self) -> Dict[str, Any]:
|
|
"""Convert to dictionary for YAML serialization"""
|
|
return asdict(self)
|
|
|
|
@classmethod
|
|
def from_dict(cls, data: Dict[str, Any]) -> "UserModel":
|
|
"""Create from dictionary loaded from YAML"""
|
|
return cls(**data)
|
|
|
|
def path_exists(self) -> bool:
|
|
"""Check if model path still exists"""
|
|
return Path(self.path).exists()
|
|
|
|
|
|
class UserModelRegistry:
|
|
"""Manages the user model registry"""
|
|
|
|
def __init__(self, registry_file: Optional[Path] = None):
|
|
"""
|
|
Initialize the registry
|
|
|
|
Args:
|
|
registry_file: Path to the registry YAML file (default: USER_MODELS_FILE)
|
|
"""
|
|
self.registry_file = registry_file or USER_MODELS_FILE
|
|
self.models: List[UserModel] = []
|
|
self.version = REGISTRY_VERSION
|
|
|
|
# Ensure directory exists
|
|
self.registry_file.parent.mkdir(parents=True, exist_ok=True)
|
|
|
|
# Load existing registry
|
|
self.load()
|
|
|
|
def load(self) -> None:
|
|
"""Load models from YAML file"""
|
|
if not self.registry_file.exists():
|
|
# Initialize empty registry
|
|
self.models = []
|
|
self.save() # Create the file
|
|
return
|
|
|
|
try:
|
|
with open(self.registry_file, "r", encoding="utf-8") as f:
|
|
data = yaml.safe_load(f)
|
|
|
|
if not data:
|
|
self.models = []
|
|
return
|
|
|
|
# Load version
|
|
self.version = data.get("version", REGISTRY_VERSION)
|
|
|
|
# Load models
|
|
models_data = data.get("models", [])
|
|
self.models = [UserModel.from_dict(m) for m in models_data]
|
|
|
|
# Migrate: ensure all models have UUIDs (for backward compatibility)
|
|
needs_save = False
|
|
for model in self.models:
|
|
if model.id is None:
|
|
import uuid
|
|
|
|
model.id = str(uuid.uuid4())
|
|
needs_save = True
|
|
|
|
if needs_save:
|
|
self.save()
|
|
|
|
except Exception as e:
|
|
raise RuntimeError(f"Failed to load user model registry: {e}")
|
|
|
|
def save(self) -> None:
|
|
"""Save models to YAML file"""
|
|
data = {"version": self.version, "models": [m.to_dict() for m in self.models]}
|
|
|
|
try:
|
|
with open(self.registry_file, "w", encoding="utf-8") as f:
|
|
yaml.safe_dump(data, f, default_flow_style=False, allow_unicode=True, sort_keys=False)
|
|
except Exception as e:
|
|
raise RuntimeError(f"Failed to save user model registry: {e}")
|
|
|
|
def add_model(self, model: UserModel) -> None:
|
|
"""
|
|
Add a model to the registry
|
|
|
|
Args:
|
|
model: UserModel instance to add
|
|
|
|
Raises:
|
|
ValueError: If a model with the same name already exists
|
|
"""
|
|
if self.check_name_conflict(model.name):
|
|
raise ValueError(f"Model with name '{model.name}' already exists")
|
|
|
|
self.models.append(model)
|
|
self.save()
|
|
|
|
def remove_model(self, name: str) -> bool:
|
|
"""
|
|
Remove a model from the registry
|
|
|
|
Args:
|
|
name: Name of the model to remove
|
|
|
|
Returns:
|
|
True if model was removed, False if not found
|
|
"""
|
|
original_count = len(self.models)
|
|
self.models = [m for m in self.models if m.name != name]
|
|
|
|
if len(self.models) < original_count:
|
|
self.save()
|
|
return True
|
|
return False
|
|
|
|
def update_model(self, name: str, updates: Dict[str, Any]) -> bool:
|
|
"""
|
|
Update a model's attributes
|
|
|
|
Args:
|
|
name: Name of the model to update
|
|
updates: Dictionary of attributes to update
|
|
|
|
Returns:
|
|
True if model was updated, False if not found
|
|
"""
|
|
model = self.get_model(name)
|
|
if not model:
|
|
return False
|
|
|
|
# Update attributes
|
|
for key, value in updates.items():
|
|
if hasattr(model, key):
|
|
setattr(model, key, value)
|
|
|
|
self.save()
|
|
return True
|
|
|
|
def get_model(self, name: str) -> Optional[UserModel]:
|
|
"""
|
|
Get a model by name
|
|
|
|
Args:
|
|
name: Name of the model
|
|
|
|
Returns:
|
|
UserModel instance or None if not found
|
|
"""
|
|
for model in self.models:
|
|
if model.name == name:
|
|
return model
|
|
return None
|
|
|
|
def get_model_by_id(self, model_id: str) -> Optional[UserModel]:
|
|
"""
|
|
Get a model by its unique ID
|
|
|
|
Args:
|
|
model_id: UUID of the model
|
|
|
|
Returns:
|
|
UserModel instance or None if not found
|
|
"""
|
|
for model in self.models:
|
|
if model.id == model_id:
|
|
return model
|
|
return None
|
|
|
|
def list_models(self) -> List[UserModel]:
|
|
"""
|
|
List all models
|
|
|
|
Returns:
|
|
List of all UserModel instances
|
|
"""
|
|
return self.models.copy()
|
|
|
|
def find_by_path(self, path: str) -> Optional[UserModel]:
|
|
"""
|
|
Find a model by its path
|
|
|
|
Args:
|
|
path: Model directory path
|
|
|
|
Returns:
|
|
UserModel instance or None if not found
|
|
"""
|
|
# Normalize paths for comparison
|
|
search_path = str(Path(path).resolve())
|
|
|
|
for model in self.models:
|
|
model_path = str(Path(model.path).resolve())
|
|
if model_path == search_path:
|
|
return model
|
|
return None
|
|
|
|
def check_name_conflict(self, name: str, exclude_name: Optional[str] = None) -> bool:
|
|
"""
|
|
Check if a name conflicts with existing models
|
|
|
|
Args:
|
|
name: Name to check
|
|
exclude_name: Optional name to exclude from check (for rename operations)
|
|
|
|
Returns:
|
|
True if conflict exists, False otherwise
|
|
"""
|
|
for model in self.models:
|
|
if model.name == name and model.name != exclude_name:
|
|
return True
|
|
return False
|
|
|
|
def refresh_status(self) -> Dict[str, List[str]]:
|
|
"""
|
|
Check all models and identify missing ones
|
|
|
|
Returns:
|
|
Dictionary with 'valid' and 'missing' lists of model names
|
|
"""
|
|
valid = []
|
|
missing = []
|
|
|
|
for model in self.models:
|
|
if model.path_exists():
|
|
valid.append(model.name)
|
|
else:
|
|
missing.append(model.name)
|
|
|
|
return {"valid": valid, "missing": missing}
|
|
|
|
def get_model_count(self) -> int:
|
|
"""Get total number of registered models"""
|
|
return len(self.models)
|
|
|
|
def suggest_name(self, base_name: str) -> str:
|
|
"""
|
|
Suggest a unique name based on base_name
|
|
|
|
Args:
|
|
base_name: Base name to derive from
|
|
|
|
Returns:
|
|
A unique name (may have suffix like -2, -3 etc.)
|
|
"""
|
|
if not self.check_name_conflict(base_name):
|
|
return base_name
|
|
|
|
counter = 2
|
|
while True:
|
|
candidate = f"{base_name}-{counter}"
|
|
if not self.check_name_conflict(candidate):
|
|
return candidate
|
|
counter += 1
|