mirror of
https://github.com/kvcache-ai/ktransformers.git
synced 2026-04-19 22:09:10 +00:00
* [feat]: redesign kt run interactive configuration with i18n support - Redesign kt run with 8-step interactive flow (model selection, inference method, NUMA/CPU, GPU experts, KV cache, GPU/TP selection, parsers, host/port) - Add configuration save/load system (~/.ktransformers/run_configs.yaml) - Add i18n support for kt chat (en/zh translations) - Add universal input validators with auto-retry and Chinese comma support - Add port availability checker with auto-suggestion - Add parser configuration (--tool-call-parser, --reasoning-parser) - Remove tuna command and clean up redundant files - Fix: variable reference bug in run.py, filter to show only MoE models * [feat]: unify model selection UI and enable shared experts fusion by default - Unify kt run model selection table with kt model list display * Add Total size, MoE Size, Repo, and SHA256 status columns * Use consistent formatting and styling * Improve user decision-making with more information - Enable --disable-shared-experts-fusion by default * Change default value from False to True * Users can still override with --enable-shared-experts-fusion * [feat]: improve kt chat with performance metrics and better CJK support - Add performance metrics display after each response * Total time, TTFT (Time To First Token), TPOT (Time Per Output Token) * Accurate input/output token counts using model tokenizer * Fallback to estimation if tokenizer unavailable * Metrics shown in dim style (not prominent) - Fix Chinese character input issues * Replace Prompt.ask() with console.input() for better CJK support * Fixes backspace deletion showing half-characters - Suppress NumPy subnormal warnings * Filter "The value of the smallest subnormal" warnings * Cleaner CLI output on certain hardware environments * [fix]: correct TTFT measurement in kt chat - Move start_time initialization before API call - Previously start_time was set when receiving first chunk, causing TTFT ≈ 0ms - Now correctly measures time from request sent to first token received * [docs]: 添加 Clawdbot 集成指南 - KTransformers 企业级 AI 助手部署方案 * [docs]: 强调推荐使用 Kimi K2.5 作为核心模型,突出企业级推理能力 * [docs]: 添加 Clawdbot 飞书接入教程链接 * [feat]: improve CLI table display, model verification, and chat experience - Add sequence number (#) column to all model tables by default - Filter kt edit to show only MoE GPU models (exclude AMX) - Extend kt model verify to check *.json and *.py files in addition to weights - Fix re-verification bug where repaired files caused false failures - Suppress tokenizer debug output in kt chat token counting * [fix]: fix cpu cores. --------- Co-authored-by: skqliao <skqliao@gmail.com>
78 lines
3.1 KiB
Bash
78 lines
3.1 KiB
Bash
#!/bin/bash
|
|
# Bash completion for kt command
|
|
# This is a static completion script that doesn't require Python startup
|
|
|
|
_kt_completion() {
|
|
local cur prev opts
|
|
COMPREPLY=()
|
|
cur="${COMP_WORDS[COMP_CWORD]}"
|
|
prev="${COMP_WORDS[COMP_CWORD-1]}"
|
|
|
|
# Main commands
|
|
local commands="version run chat quant edit bench microbench doctor model config sft"
|
|
|
|
# Global options
|
|
local global_opts="--help --version"
|
|
|
|
# Handle subcommands
|
|
case "${COMP_CWORD}" in
|
|
1)
|
|
# First argument: suggest commands and global options
|
|
COMPREPLY=( $(compgen -W "${commands} ${global_opts}" -- ${cur}) )
|
|
return 0
|
|
;;
|
|
*)
|
|
# Handle specific command options
|
|
case "${COMP_WORDS[1]}" in
|
|
run)
|
|
local run_opts="--host --port --gpu-experts --cpu-threads --tensor-parallel-size --kt-method --attention-backend --max-total-tokens --dry-run --help"
|
|
COMPREPLY=( $(compgen -W "${run_opts}" -- ${cur}) )
|
|
;;
|
|
chat)
|
|
local chat_opts="--host --port --model --temperature --max-tokens --system --save-history --no-save-history --history-file --stream --no-stream --help"
|
|
COMPREPLY=( $(compgen -W "${chat_opts}" -- ${cur}) )
|
|
;;
|
|
quant)
|
|
local quant_opts="--method --output --help"
|
|
COMPREPLY=( $(compgen -W "${quant_opts}" -- ${cur}) )
|
|
;;
|
|
edit)
|
|
local edit_opts="--help"
|
|
COMPREPLY=( $(compgen -W "${edit_opts}" -- ${cur}) )
|
|
;;
|
|
bench|microbench)
|
|
local bench_opts="--model --config --help"
|
|
COMPREPLY=( $(compgen -W "${bench_opts}" -- ${cur}) )
|
|
;;
|
|
doctor)
|
|
local doctor_opts="--verbose --help"
|
|
COMPREPLY=( $(compgen -W "${doctor_opts}" -- ${cur}) )
|
|
;;
|
|
model)
|
|
local model_cmds="download list path-list path-add path-remove search"
|
|
local model_opts="--help"
|
|
COMPREPLY=( $(compgen -W "${model_cmds} ${model_opts}" -- ${cur}) )
|
|
;;
|
|
config)
|
|
local config_cmds="show get set reset path init model-path-list model-path-add model-path-remove"
|
|
local config_opts="--help"
|
|
COMPREPLY=( $(compgen -W "${config_cmds} ${config_opts}" -- ${cur}) )
|
|
;;
|
|
sft)
|
|
local sft_cmds="train chat export"
|
|
local sft_opts="--help"
|
|
COMPREPLY=( $(compgen -W "${sft_cmds} ${sft_opts}" -- ${cur}) )
|
|
;;
|
|
version)
|
|
COMPREPLY=( $(compgen -W "--help" -- ${cur}) )
|
|
;;
|
|
*)
|
|
COMPREPLY=()
|
|
;;
|
|
esac
|
|
;;
|
|
esac
|
|
}
|
|
|
|
complete -F _kt_completion kt
|