mirror of
https://github.com/kvcache-ai/ktransformers.git
synced 2026-04-19 22:09:10 +00:00
* [feat]: redesign kt run interactive configuration with i18n support - Redesign kt run with 8-step interactive flow (model selection, inference method, NUMA/CPU, GPU experts, KV cache, GPU/TP selection, parsers, host/port) - Add configuration save/load system (~/.ktransformers/run_configs.yaml) - Add i18n support for kt chat (en/zh translations) - Add universal input validators with auto-retry and Chinese comma support - Add port availability checker with auto-suggestion - Add parser configuration (--tool-call-parser, --reasoning-parser) - Remove tuna command and clean up redundant files - Fix: variable reference bug in run.py, filter to show only MoE models * [feat]: unify model selection UI and enable shared experts fusion by default - Unify kt run model selection table with kt model list display * Add Total size, MoE Size, Repo, and SHA256 status columns * Use consistent formatting and styling * Improve user decision-making with more information - Enable --disable-shared-experts-fusion by default * Change default value from False to True * Users can still override with --enable-shared-experts-fusion * [feat]: improve kt chat with performance metrics and better CJK support - Add performance metrics display after each response * Total time, TTFT (Time To First Token), TPOT (Time Per Output Token) * Accurate input/output token counts using model tokenizer * Fallback to estimation if tokenizer unavailable * Metrics shown in dim style (not prominent) - Fix Chinese character input issues * Replace Prompt.ask() with console.input() for better CJK support * Fixes backspace deletion showing half-characters - Suppress NumPy subnormal warnings * Filter "The value of the smallest subnormal" warnings * Cleaner CLI output on certain hardware environments * [fix]: correct TTFT measurement in kt chat - Move start_time initialization before API call - Previously start_time was set when receiving first chunk, causing TTFT ≈ 0ms - Now correctly measures time from request sent to first token received * [docs]: 添加 Clawdbot 集成指南 - KTransformers 企业级 AI 助手部署方案 * [docs]: 强调推荐使用 Kimi K2.5 作为核心模型,突出企业级推理能力 * [docs]: 添加 Clawdbot 飞书接入教程链接 * [feat]: improve CLI table display, model verification, and chat experience - Add sequence number (#) column to all model tables by default - Filter kt edit to show only MoE GPU models (exclude AMX) - Extend kt model verify to check *.json and *.py files in addition to weights - Fix re-verification bug where repaired files caused false failures - Suppress tokenizer debug output in kt chat token counting * [fix]: fix cpu cores. --------- Co-authored-by: skqliao <skqliao@gmail.com>
147 lines
4.3 KiB
Python
147 lines
4.3 KiB
Python
"""Helper functions for interactive model download."""
|
|
|
|
from pathlib import Path
|
|
from typing import Dict, List, Tuple
|
|
import fnmatch
|
|
|
|
|
|
def list_remote_files_hf(repo_id: str, use_mirror: bool = False) -> List[Dict[str, any]]:
|
|
"""
|
|
List files in a HuggingFace repository.
|
|
|
|
Returns:
|
|
List of dicts with keys: 'path', 'size' (in bytes)
|
|
"""
|
|
from huggingface_hub import HfApi
|
|
import os
|
|
|
|
# Set mirror if needed
|
|
original_endpoint = os.environ.get("HF_ENDPOINT")
|
|
if use_mirror and not original_endpoint:
|
|
os.environ["HF_ENDPOINT"] = "https://hf-mirror.com"
|
|
|
|
try:
|
|
api = HfApi()
|
|
files_info = api.list_repo_tree(repo_id=repo_id, recursive=True)
|
|
|
|
result = []
|
|
for item in files_info:
|
|
# Skip directories
|
|
if hasattr(item, "type") and item.type == "directory":
|
|
continue
|
|
|
|
# Get file info
|
|
file_path = item.path if hasattr(item, "path") else str(item)
|
|
file_size = item.size if hasattr(item, "size") else 0
|
|
|
|
result.append({"path": file_path, "size": file_size})
|
|
|
|
return result
|
|
finally:
|
|
# Restore original endpoint
|
|
if use_mirror and not original_endpoint:
|
|
os.environ.pop("HF_ENDPOINT", None)
|
|
elif original_endpoint:
|
|
os.environ["HF_ENDPOINT"] = original_endpoint
|
|
|
|
|
|
def list_remote_files_ms(repo_id: str) -> List[Dict[str, any]]:
|
|
"""
|
|
List files in a ModelScope repository.
|
|
|
|
Returns:
|
|
List of dicts with keys: 'path', 'size' (in bytes)
|
|
"""
|
|
from modelscope.hub.api import HubApi
|
|
|
|
api = HubApi()
|
|
files_info = api.get_model_files(model_id=repo_id, recursive=True)
|
|
|
|
result = []
|
|
for file_info in files_info:
|
|
file_path = file_info.get("Name", file_info.get("Path", ""))
|
|
file_size = file_info.get("Size", 0)
|
|
|
|
result.append({"path": file_path, "size": file_size})
|
|
|
|
return result
|
|
|
|
|
|
def filter_files_by_pattern(files: List[Dict[str, any]], pattern: str) -> List[Dict[str, any]]:
|
|
"""Filter files by glob pattern."""
|
|
if pattern == "*":
|
|
return files
|
|
|
|
filtered = []
|
|
for file in files:
|
|
# Check if filename matches pattern
|
|
filename = Path(file["path"]).name
|
|
full_path = file["path"]
|
|
|
|
if fnmatch.fnmatch(filename, pattern) or fnmatch.fnmatch(full_path, pattern):
|
|
filtered.append(file)
|
|
|
|
return filtered
|
|
|
|
|
|
def calculate_total_size(files: List[Dict[str, any]]) -> int:
|
|
"""Calculate total size of files in bytes."""
|
|
return sum(f["size"] for f in files)
|
|
|
|
|
|
def format_file_list_table(files: List[Dict[str, any]], max_display: int = 10):
|
|
"""Format file list as a table for display."""
|
|
from rich.table import Table
|
|
from kt_kernel.cli.utils.model_scanner import format_size
|
|
|
|
table = Table(show_header=True, header_style="bold")
|
|
table.add_column("File", style="cyan", overflow="fold")
|
|
table.add_column("Size", justify="right")
|
|
|
|
# Show first max_display files
|
|
for file in files[:max_display]:
|
|
table.add_row(file["path"], format_size(file["size"]))
|
|
|
|
if len(files) > max_display:
|
|
table.add_row(f"... and {len(files) - max_display} more files", "[dim]...[/dim]")
|
|
|
|
return table
|
|
|
|
|
|
def verify_repo_exists(repo_id: str, repo_type: str, use_mirror: bool = False) -> Tuple[bool, str]:
|
|
"""
|
|
Verify if a repository exists.
|
|
|
|
Returns:
|
|
(exists: bool, message: str)
|
|
"""
|
|
try:
|
|
if repo_type == "huggingface":
|
|
import os
|
|
|
|
original_endpoint = os.environ.get("HF_ENDPOINT")
|
|
if use_mirror and not original_endpoint:
|
|
os.environ["HF_ENDPOINT"] = "https://hf-mirror.com"
|
|
|
|
from huggingface_hub import HfApi
|
|
|
|
try:
|
|
api = HfApi()
|
|
api.repo_info(repo_id=repo_id, repo_type="model")
|
|
return True, "Repository found"
|
|
finally:
|
|
if use_mirror and not original_endpoint:
|
|
os.environ.pop("HF_ENDPOINT", None)
|
|
elif original_endpoint:
|
|
os.environ["HF_ENDPOINT"] = original_endpoint
|
|
|
|
else: # modelscope
|
|
from modelscope.hub.api import HubApi
|
|
|
|
api = HubApi()
|
|
api.get_model(model_id=repo_id)
|
|
return True, "Repository found"
|
|
|
|
except Exception as e:
|
|
return False, f"Repository not found: {str(e)}"
|