Files
ktransformers/kt-kernel/python/cli/utils/download_helper.py
Oql 56cbd69ac4 kt-cli enhancement (#1834)
* [feat]: redesign kt run interactive configuration with i18n support

- Redesign kt run with 8-step interactive flow (model selection, inference method, NUMA/CPU, GPU experts, KV cache, GPU/TP selection, parsers, host/port)
- Add configuration save/load system (~/.ktransformers/run_configs.yaml)
- Add i18n support for kt chat (en/zh translations)
- Add universal input validators with auto-retry and Chinese comma support
- Add port availability checker with auto-suggestion
- Add parser configuration (--tool-call-parser, --reasoning-parser)
- Remove tuna command and clean up redundant files
- Fix: variable reference bug in run.py, filter to show only MoE models

* [feat]: unify model selection UI and enable shared experts fusion by default

- Unify kt run model selection table with kt model list display
  * Add Total size, MoE Size, Repo, and SHA256 status columns
  * Use consistent formatting and styling
  * Improve user decision-making with more information

- Enable --disable-shared-experts-fusion by default
  * Change default value from False to True
  * Users can still override with --enable-shared-experts-fusion

* [feat]: improve kt chat with performance metrics and better CJK support

- Add performance metrics display after each response
  * Total time, TTFT (Time To First Token), TPOT (Time Per Output Token)
  * Accurate input/output token counts using model tokenizer
  * Fallback to estimation if tokenizer unavailable
  * Metrics shown in dim style (not prominent)

- Fix Chinese character input issues
  * Replace Prompt.ask() with console.input() for better CJK support
  * Fixes backspace deletion showing half-characters

- Suppress NumPy subnormal warnings
  * Filter "The value of the smallest subnormal" warnings
  * Cleaner CLI output on certain hardware environments

* [fix]: correct TTFT measurement in kt chat

- Move start_time initialization before API call
- Previously start_time was set when receiving first chunk, causing TTFT ≈ 0ms
- Now correctly measures time from request sent to first token received

* [docs]: 添加 Clawdbot 集成指南 - KTransformers 企业级 AI 助手部署方案

* [docs]: 强调推荐使用 Kimi K2.5 作为核心模型,突出企业级推理能力

* [docs]: 添加 Clawdbot 飞书接入教程链接

* [feat]: improve CLI table display, model verification, and chat experience

- Add sequence number (#) column to all model tables by default
- Filter kt edit to show only MoE GPU models (exclude AMX)
- Extend kt model verify to check *.json and *.py files in addition to weights
- Fix re-verification bug where repaired files caused false failures
- Suppress tokenizer debug output in kt chat token counting

* [fix]: fix cpu cores.

---------

Co-authored-by: skqliao <skqliao@gmail.com>
2026-02-04 16:44:54 +08:00

147 lines
4.3 KiB
Python

"""Helper functions for interactive model download."""
from pathlib import Path
from typing import Dict, List, Tuple
import fnmatch
def list_remote_files_hf(repo_id: str, use_mirror: bool = False) -> List[Dict[str, any]]:
"""
List files in a HuggingFace repository.
Returns:
List of dicts with keys: 'path', 'size' (in bytes)
"""
from huggingface_hub import HfApi
import os
# Set mirror if needed
original_endpoint = os.environ.get("HF_ENDPOINT")
if use_mirror and not original_endpoint:
os.environ["HF_ENDPOINT"] = "https://hf-mirror.com"
try:
api = HfApi()
files_info = api.list_repo_tree(repo_id=repo_id, recursive=True)
result = []
for item in files_info:
# Skip directories
if hasattr(item, "type") and item.type == "directory":
continue
# Get file info
file_path = item.path if hasattr(item, "path") else str(item)
file_size = item.size if hasattr(item, "size") else 0
result.append({"path": file_path, "size": file_size})
return result
finally:
# Restore original endpoint
if use_mirror and not original_endpoint:
os.environ.pop("HF_ENDPOINT", None)
elif original_endpoint:
os.environ["HF_ENDPOINT"] = original_endpoint
def list_remote_files_ms(repo_id: str) -> List[Dict[str, any]]:
"""
List files in a ModelScope repository.
Returns:
List of dicts with keys: 'path', 'size' (in bytes)
"""
from modelscope.hub.api import HubApi
api = HubApi()
files_info = api.get_model_files(model_id=repo_id, recursive=True)
result = []
for file_info in files_info:
file_path = file_info.get("Name", file_info.get("Path", ""))
file_size = file_info.get("Size", 0)
result.append({"path": file_path, "size": file_size})
return result
def filter_files_by_pattern(files: List[Dict[str, any]], pattern: str) -> List[Dict[str, any]]:
"""Filter files by glob pattern."""
if pattern == "*":
return files
filtered = []
for file in files:
# Check if filename matches pattern
filename = Path(file["path"]).name
full_path = file["path"]
if fnmatch.fnmatch(filename, pattern) or fnmatch.fnmatch(full_path, pattern):
filtered.append(file)
return filtered
def calculate_total_size(files: List[Dict[str, any]]) -> int:
"""Calculate total size of files in bytes."""
return sum(f["size"] for f in files)
def format_file_list_table(files: List[Dict[str, any]], max_display: int = 10):
"""Format file list as a table for display."""
from rich.table import Table
from kt_kernel.cli.utils.model_scanner import format_size
table = Table(show_header=True, header_style="bold")
table.add_column("File", style="cyan", overflow="fold")
table.add_column("Size", justify="right")
# Show first max_display files
for file in files[:max_display]:
table.add_row(file["path"], format_size(file["size"]))
if len(files) > max_display:
table.add_row(f"... and {len(files) - max_display} more files", "[dim]...[/dim]")
return table
def verify_repo_exists(repo_id: str, repo_type: str, use_mirror: bool = False) -> Tuple[bool, str]:
"""
Verify if a repository exists.
Returns:
(exists: bool, message: str)
"""
try:
if repo_type == "huggingface":
import os
original_endpoint = os.environ.get("HF_ENDPOINT")
if use_mirror and not original_endpoint:
os.environ["HF_ENDPOINT"] = "https://hf-mirror.com"
from huggingface_hub import HfApi
try:
api = HfApi()
api.repo_info(repo_id=repo_id, repo_type="model")
return True, "Repository found"
finally:
if use_mirror and not original_endpoint:
os.environ.pop("HF_ENDPOINT", None)
elif original_endpoint:
os.environ["HF_ENDPOINT"] = original_endpoint
else: # modelscope
from modelscope.hub.api import HubApi
api = HubApi()
api.get_model(model_id=repo_id)
return True, "Repository found"
except Exception as e:
return False, f"Repository not found: {str(e)}"