mirror of
https://github.com/kvcache-ai/ktransformers.git
synced 2026-03-15 02:47:22 +00:00
* [feat]: redesign kt run interactive configuration with i18n support - Redesign kt run with 8-step interactive flow (model selection, inference method, NUMA/CPU, GPU experts, KV cache, GPU/TP selection, parsers, host/port) - Add configuration save/load system (~/.ktransformers/run_configs.yaml) - Add i18n support for kt chat (en/zh translations) - Add universal input validators with auto-retry and Chinese comma support - Add port availability checker with auto-suggestion - Add parser configuration (--tool-call-parser, --reasoning-parser) - Remove tuna command and clean up redundant files - Fix: variable reference bug in run.py, filter to show only MoE models * [feat]: unify model selection UI and enable shared experts fusion by default - Unify kt run model selection table with kt model list display * Add Total size, MoE Size, Repo, and SHA256 status columns * Use consistent formatting and styling * Improve user decision-making with more information - Enable --disable-shared-experts-fusion by default * Change default value from False to True * Users can still override with --enable-shared-experts-fusion * [feat]: improve kt chat with performance metrics and better CJK support - Add performance metrics display after each response * Total time, TTFT (Time To First Token), TPOT (Time Per Output Token) * Accurate input/output token counts using model tokenizer * Fallback to estimation if tokenizer unavailable * Metrics shown in dim style (not prominent) - Fix Chinese character input issues * Replace Prompt.ask() with console.input() for better CJK support * Fixes backspace deletion showing half-characters - Suppress NumPy subnormal warnings * Filter "The value of the smallest subnormal" warnings * Cleaner CLI output on certain hardware environments * [fix]: correct TTFT measurement in kt chat - Move start_time initialization before API call - Previously start_time was set when receiving first chunk, causing TTFT ≈ 0ms - Now correctly measures time from request sent to first token received * [docs]: 添加 Clawdbot 集成指南 - KTransformers 企业级 AI 助手部署方案 * [docs]: 强调推荐使用 Kimi K2.5 作为核心模型,突出企业级推理能力 * [docs]: 添加 Clawdbot 飞书接入教程链接 * [feat]: improve CLI table display, model verification, and chat experience - Add sequence number (#) column to all model tables by default - Filter kt edit to show only MoE GPU models (exclude AMX) - Extend kt model verify to check *.json and *.py files in addition to weights - Fix re-verification bug where repaired files caused false failures - Suppress tokenizer debug output in kt chat token counting * [fix]: fix cpu cores. --------- Co-authored-by: skqliao <skqliao@gmail.com>
112 lines
3.3 KiB
Python
112 lines
3.3 KiB
Python
"""
|
|
Configuration save/load for kt run command.
|
|
|
|
Manages saved run configurations bound to specific models.
|
|
"""
|
|
|
|
from pathlib import Path
|
|
from typing import Dict, List, Optional, Any
|
|
from datetime import datetime
|
|
import yaml
|
|
|
|
|
|
CONFIG_FILE = Path.home() / ".ktransformers" / "run_configs.yaml"
|
|
|
|
|
|
class RunConfigManager:
|
|
"""Manager for saved run configurations."""
|
|
|
|
def __init__(self):
|
|
self.config_file = CONFIG_FILE
|
|
self._ensure_config_file()
|
|
|
|
def _ensure_config_file(self):
|
|
"""Ensure config file exists."""
|
|
if not self.config_file.exists():
|
|
self.config_file.parent.mkdir(parents=True, exist_ok=True)
|
|
self._save_data({"version": "1.0", "configs": {}})
|
|
|
|
def _load_data(self) -> Dict:
|
|
"""Load raw config data."""
|
|
try:
|
|
with open(self.config_file, "r", encoding="utf-8") as f:
|
|
return yaml.safe_load(f) or {"version": "1.0", "configs": {}}
|
|
except Exception:
|
|
return {"version": "1.0", "configs": {}}
|
|
|
|
def _save_data(self, data: Dict):
|
|
"""Save raw config data."""
|
|
with open(self.config_file, "w", encoding="utf-8") as f:
|
|
yaml.dump(data, f, allow_unicode=True, default_flow_style=False)
|
|
|
|
def list_configs(self, model_id: str) -> List[Dict[str, Any]]:
|
|
"""List all saved configs for a model.
|
|
|
|
Returns:
|
|
List of config dicts with 'config_name' and other fields.
|
|
"""
|
|
data = self._load_data()
|
|
configs = data.get("configs", {}).get(model_id, [])
|
|
return configs if isinstance(configs, list) else []
|
|
|
|
def save_config(self, model_id: str, config: Dict[str, Any]):
|
|
"""Save a configuration for a model.
|
|
|
|
Args:
|
|
model_id: Model ID to bind config to
|
|
config: Configuration dict with all run parameters
|
|
"""
|
|
data = self._load_data()
|
|
|
|
if "configs" not in data:
|
|
data["configs"] = {}
|
|
|
|
if model_id not in data["configs"]:
|
|
data["configs"][model_id] = []
|
|
|
|
# Add timestamp
|
|
config["created_at"] = datetime.now().isoformat()
|
|
|
|
# Append config
|
|
data["configs"][model_id].append(config)
|
|
|
|
self._save_data(data)
|
|
|
|
def delete_config(self, model_id: str, config_index: int) -> bool:
|
|
"""Delete a saved configuration.
|
|
|
|
Args:
|
|
model_id: Model ID
|
|
config_index: Index of config to delete (0-based)
|
|
|
|
Returns:
|
|
True if deleted, False if not found
|
|
"""
|
|
data = self._load_data()
|
|
|
|
if model_id not in data.get("configs", {}):
|
|
return False
|
|
|
|
configs = data["configs"][model_id]
|
|
if config_index < 0 or config_index >= len(configs):
|
|
return False
|
|
|
|
configs.pop(config_index)
|
|
self._save_data(data)
|
|
return True
|
|
|
|
def get_config(self, model_id: str, config_index: int) -> Optional[Dict[str, Any]]:
|
|
"""Get a specific saved configuration.
|
|
|
|
Args:
|
|
model_id: Model ID
|
|
config_index: Index of config to get (0-based)
|
|
|
|
Returns:
|
|
Config dict or None if not found
|
|
"""
|
|
configs = self.list_configs(model_id)
|
|
if config_index < 0 or config_index >= len(configs):
|
|
return None
|
|
return configs[config_index]
|