From 12949c8acd9120a5c9d75a6b53ce667f586a82de Mon Sep 17 00:00:00 2001 From: SkqLiao Date: Sat, 15 Mar 2025 01:47:14 +0800 Subject: [PATCH 01/17] fix default options --- .github/workflows/install.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/install.yml b/.github/workflows/install.yml index c8a87a49..d8f199a0 100644 --- a/.github/workflows/install.yml +++ b/.github/workflows/install.yml @@ -6,7 +6,7 @@ on: job_to_run: description: "Which job to run?" required: true - default: "install&test" + default: "install-test" type: choice options: - create-install-test From 0f1684c28d306aed40198dcc959fbc63977cef09 Mon Sep 17 00:00:00 2001 From: SkqLiao Date: Sat, 15 Mar 2025 02:31:19 +0800 Subject: [PATCH 02/17] local chat for cicd test --- ktransformers/local_chat_test.py | 171 +++++++++++++++++++++++++++++++ 1 file changed, 171 insertions(+) create mode 100644 ktransformers/local_chat_test.py diff --git a/ktransformers/local_chat_test.py b/ktransformers/local_chat_test.py new file mode 100644 index 00000000..2927fe03 --- /dev/null +++ b/ktransformers/local_chat_test.py @@ -0,0 +1,171 @@ +""" +Description : +Author : Boxin Zhang, Azure-Tang +Version : 0.1.0 +Copyright (c) 2024 by KVCache.AI, All Rights Reserved. +""" + +import os +import platform +import sys + +project_dir = os.path.dirname(os.path.dirname(__file__)) +sys.path.insert(0, project_dir) +import torch +import logging +from transformers import ( + AutoTokenizer, + AutoConfig, + AutoModelForCausalLM, + GenerationConfig, + TextStreamer, +) +import json +import fire +from ktransformers.optimize.optimize import optimize_and_load_gguf +from ktransformers.models.modeling_deepseek import DeepseekV2ForCausalLM +from ktransformers.models.modeling_qwen2_moe import Qwen2MoeForCausalLM +from ktransformers.models.modeling_deepseek_v3 import DeepseekV3ForCausalLM +from ktransformers.models.modeling_llama import LlamaForCausalLM +from ktransformers.models.modeling_mixtral import MixtralForCausalLM +from ktransformers.util.utils import prefill_and_generate, get_compute_capability +from ktransformers.server.config.config import Config +from ktransformers.operators.flashinfer_wrapper import flashinfer_enabled + +custom_models = { + "DeepseekV2ForCausalLM": DeepseekV2ForCausalLM, + "DeepseekV3ForCausalLM": DeepseekV3ForCausalLM, + "Qwen2MoeForCausalLM": Qwen2MoeForCausalLM, + "LlamaForCausalLM": LlamaForCausalLM, + "MixtralForCausalLM": MixtralForCausalLM, +} + +ktransformer_rules_dir = ( + os.path.dirname(os.path.abspath(__file__)) + "/optimize/optimize_rules/" +) +default_optimize_rules = { + "DeepseekV2ForCausalLM": ktransformer_rules_dir + "DeepSeek-V2-Chat.yaml", + "DeepseekV3ForCausalLM": ktransformer_rules_dir + "DeepSeek-V3-Chat.yaml", + "Qwen2MoeForCausalLM": ktransformer_rules_dir + "Qwen2-57B-A14B-Instruct.yaml", + "LlamaForCausalLM": ktransformer_rules_dir + "Internlm2_5-7b-Chat-1m.yaml", + "MixtralForCausalLM": ktransformer_rules_dir + "Mixtral.yaml", +} + + +def local_chat( + model_path: str | None = None, + optimize_config_path: str = None, + gguf_path: str | None = None, + max_new_tokens: int = 1000, + cpu_infer: int = Config().cpu_infer, + use_cuda_graph: bool = True, + prompt_file : str | None = None, + mode: str = "normal", + force_think: bool = False, + chunk_prefill_size: int = 8192 +): + + torch.set_grad_enabled(False) + + Config().cpu_infer = cpu_infer + + tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True) + config = AutoConfig.from_pretrained(model_path, trust_remote_code=True) + if mode == 'long_context': + assert config.architectures[0] == "LlamaForCausalLM", "only LlamaForCausalLM support long_context mode" + torch.set_default_dtype(torch.float16) + else: + torch.set_default_dtype(config.torch_dtype) + + with torch.device("meta"): + if config.architectures[0] in custom_models: + print("using custom modeling_xxx.py.") + if ( + "Qwen2Moe" in config.architectures[0] + ): # Qwen2Moe must use flash_attention_2 to avoid overflow. + config._attn_implementation = "flash_attention_2" + if "Llama" in config.architectures[0]: + config._attn_implementation = "eager" + if "Mixtral" in config.architectures[0]: + config._attn_implementation = "flash_attention_2" + + model = custom_models[config.architectures[0]](config) + else: + model = AutoModelForCausalLM.from_config( + config, trust_remote_code=True, attn_implementation="flash_attention_2" + ) + + if optimize_config_path is None: + if config.architectures[0] in default_optimize_rules: + print("using default_optimize_rule for", config.architectures[0]) + optimize_config_path = default_optimize_rules[config.architectures[0]] + else: + optimize_config_path = input( + "please input the path of your rule file(yaml file containing optimize rules):" + ) + + if gguf_path is None: + gguf_path = input( + "please input the path of your gguf file(gguf file in the dir containing input gguf file must all belong to current model):" + ) + optimize_and_load_gguf(model, optimize_config_path, gguf_path, config) + + try: + model.generation_config = GenerationConfig.from_pretrained(model_path) + except Exception as e: + print(f"generation config can't auto create, make default. Message: {e}") + gen_config = GenerationConfig( + temperature=0.6, + top_p=0.95, + do_sample=True + ) + model.generation_config = gen_config + # model.generation_config = GenerationConfig.from_pretrained(model_path) + if model.generation_config.pad_token_id is None: + model.generation_config.pad_token_id = model.generation_config.eos_token_id + model.eval() + logging.basicConfig(level=logging.INFO) + + system = platform.system() + if system == "Windows": + os.system("cls") + else: + os.system("clear") + + if prompt_file != None: + assert os.path.isfile(prompt_file), "prompt file not exist" + print(f"prompt file is {prompt_file}") + content = open(prompt_file, "r").read() + else: + content = "Please write a piece of quicksort code in C++." + + print('Start Testing...(1 round)') + print('Prompt:', content) + + while True: + messages = [{"role": "user", "content": content}] + input_tensor = tokenizer.apply_chat_template( + messages, add_generation_prompt=True, return_tensors="pt" + ) + if force_think: + token_thinks = torch.tensor([tokenizer.encode("\\n",add_special_tokens=False)],device=input_tensor.device) + input_tensor = torch.cat( + [input_tensor, token_thinks], dim=1 + ) + if mode == 'long_context': + assert Config().long_context_config['max_seq_len'] > input_tensor.shape[1] + max_new_tokens, \ + "please change max_seq_len in ~/.ktransformers/config.yaml" + + if system != "Windows" and (config.architectures[0] == "DeepseekV2ForCausalLM" or config.architectures[0] == "DeepseekV3ForCausalLM") and flashinfer_enabled and get_compute_capability() >= 8: + generated = prefill_and_generate( + model, tokenizer, input_tensor.cuda(), max_new_tokens, use_cuda_graph, mode = mode, force_think = force_think, chunk_prefill_size = chunk_prefill_size, + use_flashinfer_mla = True, num_heads = config.num_attention_heads, head_dim_ckv = config.kv_lora_rank, head_dim_kpe = config.qk_rope_head_dim, q_head_dim = config.qk_rope_head_dim + config.qk_nope_head_dim + ) + else: + generated = prefill_and_generate( + model, tokenizer, input_tensor.cuda(), max_new_tokens, use_cuda_graph, mode = mode, force_think = force_think, chunk_prefill_size = chunk_prefill_size, + ) + break + +if __name__ == "__main__": + fire.Fire(local_chat) From 9812d57c1196856f9f6c526d0ad0191cad403f6b Mon Sep 17 00:00:00 2001 From: SkqLiao Date: Sat, 15 Mar 2025 02:31:49 +0800 Subject: [PATCH 03/17] fix typo, logging to file --- .github/workflows/install.yml | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/.github/workflows/install.yml b/.github/workflows/install.yml index d8f199a0..c16dda83 100644 --- a/.github/workflows/install.yml +++ b/.github/workflows/install.yml @@ -60,6 +60,11 @@ jobs: export LD_LIBRARY_PATH=/usr/local/cuda-12.4/lib64:$LD_LIBRARY_PATH export CUDA_HOME=/usr/local/cuda-12.4 cd ${{ github.workspace }} - python ktransformers/local_chat.py --model_path /home/qujing3/models/DeepSeek-R1-Q4_K_M/config --gguf_path /home/qujing3/models/DeepSeek-R1-Q4_K_M/ --max_new_tokens 100 --cache_len 1536 --cpu_infer 64 --prompt_file /home/qujing3/promptsbook.txt - DeepSeek-R1-Q4_K_M/config --gguf_path /home/qujing3/models/DeepSeek-R1-Q4_K_M/ --max_new_tokens 100 --cache_len 1536 --cpu_infer 64 --prompt_file /home/qujing3/prompts/chinese.txt + echo "Running Local Chat 1" + python ktransformers/local_chat.py --model_path /home/qujing3/models/DeepSeek-R1-Q4_K_M/config --gguf_path /home/qujing3/models/DeepSeek-R1-Q4_K_M/ --max_new_tokens 256 --cache_len 1536 --cpu_infer 64 --prompt_file /home/qujing3/prompts/book.txt > log1.txt + sed -n '/Prompt:,$p' log1.txt + echo "Running Local Chat 2" + python ktransformers/local_chat.py --model_path /home/qujing3/models/DeepSeek-R1-Q4_K_M/config --gguf_path /home/qujing3/models/DeepSeek-R1-Q4_K_M/ --max_new_tokens 256 --cache_len 1536 --cpu_infer 64 --prompt_file /home/qujing3/prompts/chinese.txt > log2.txt + sed -n '/Prompt:,$p' log2.txt + - run: echo "This job's status is ${{ job.status }}." From 57cf449a97097045b560001d1dcb9ce5754be42f Mon Sep 17 00:00:00 2001 From: SkqLiao Date: Sat, 15 Mar 2025 02:35:56 +0800 Subject: [PATCH 04/17] fix command --- .github/workflows/install.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/install.yml b/.github/workflows/install.yml index c16dda83..cce443b2 100644 --- a/.github/workflows/install.yml +++ b/.github/workflows/install.yml @@ -61,10 +61,10 @@ jobs: export CUDA_HOME=/usr/local/cuda-12.4 cd ${{ github.workspace }} echo "Running Local Chat 1" - python ktransformers/local_chat.py --model_path /home/qujing3/models/DeepSeek-R1-Q4_K_M/config --gguf_path /home/qujing3/models/DeepSeek-R1-Q4_K_M/ --max_new_tokens 256 --cache_len 1536 --cpu_infer 64 --prompt_file /home/qujing3/prompts/book.txt > log1.txt + python ktransformers/local_chat-test.py --model_path /home/qujing3/models/DeepSeek-R1-Q4_K_M/config --gguf_path /home/qujing3/models/DeepSeek-R1-Q4_K_M/ --max_new_tokens 256 --cache_len 1536 --cpu_infer 64 --prompt_file /home/qujing3/prompts/book.txt > log1.txt sed -n '/Prompt:,$p' log1.txt echo "Running Local Chat 2" - python ktransformers/local_chat.py --model_path /home/qujing3/models/DeepSeek-R1-Q4_K_M/config --gguf_path /home/qujing3/models/DeepSeek-R1-Q4_K_M/ --max_new_tokens 256 --cache_len 1536 --cpu_infer 64 --prompt_file /home/qujing3/prompts/chinese.txt > log2.txt + python ktransformers/local_chat-test.py --model_path /home/qujing3/models/DeepSeek-R1-Q4_K_M/config --gguf_path /home/qujing3/models/DeepSeek-R1-Q4_K_M/ --max_new_tokens 256 --cache_len 1536 --cpu_infer 64 --prompt_file /home/qujing3/prompts/chinese.txt > log2.txt sed -n '/Prompt:,$p' log2.txt - run: echo "This job's status is ${{ job.status }}." From 129e013b41133e9bf236642fa43362e68623716a Mon Sep 17 00:00:00 2001 From: SkqLiao Date: Sat, 15 Mar 2025 02:36:37 +0800 Subject: [PATCH 05/17] rename cicd --- .github/workflows/install.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/install.yml b/.github/workflows/install.yml index cce443b2..fce549cb 100644 --- a/.github/workflows/install.yml +++ b/.github/workflows/install.yml @@ -1,5 +1,5 @@ -name: Install and Test KTransformers -run-name: Install and Test KTransformers +name: Install / Test KTransformers +run-name: Install / Test KTransformers on: workflow_dispatch: inputs: From a31e09969f8423fd9cd93130e00a0376eeffe024 Mon Sep 17 00:00:00 2001 From: SkqLiao Date: Sat, 15 Mar 2025 02:37:08 +0800 Subject: [PATCH 06/17] fix typo --- .github/workflows/install.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/install.yml b/.github/workflows/install.yml index fce549cb..a58b426c 100644 --- a/.github/workflows/install.yml +++ b/.github/workflows/install.yml @@ -61,10 +61,10 @@ jobs: export CUDA_HOME=/usr/local/cuda-12.4 cd ${{ github.workspace }} echo "Running Local Chat 1" - python ktransformers/local_chat-test.py --model_path /home/qujing3/models/DeepSeek-R1-Q4_K_M/config --gguf_path /home/qujing3/models/DeepSeek-R1-Q4_K_M/ --max_new_tokens 256 --cache_len 1536 --cpu_infer 64 --prompt_file /home/qujing3/prompts/book.txt > log1.txt + python ktransformers/local_chat_test.py --model_path /home/qujing3/models/DeepSeek-R1-Q4_K_M/config --gguf_path /home/qujing3/models/DeepSeek-R1-Q4_K_M/ --max_new_tokens 256 --cache_len 1536 --cpu_infer 64 --prompt_file /home/qujing3/prompts/book.txt > log1.txt sed -n '/Prompt:,$p' log1.txt echo "Running Local Chat 2" - python ktransformers/local_chat-test.py --model_path /home/qujing3/models/DeepSeek-R1-Q4_K_M/config --gguf_path /home/qujing3/models/DeepSeek-R1-Q4_K_M/ --max_new_tokens 256 --cache_len 1536 --cpu_infer 64 --prompt_file /home/qujing3/prompts/chinese.txt > log2.txt + python ktransformers/local_chat_test.py --model_path /home/qujing3/models/DeepSeek-R1-Q4_K_M/config --gguf_path /home/qujing3/models/DeepSeek-R1-Q4_K_M/ --max_new_tokens 256 --cache_len 1536 --cpu_infer 64 --prompt_file /home/qujing3/prompts/chinese.txt > log2.txt sed -n '/Prompt:,$p' log2.txt - run: echo "This job's status is ${{ job.status }}." From 0be19c39e97fc81e3c5113dacfee7ae0832b0833 Mon Sep 17 00:00:00 2001 From: SkqLiao Date: Sat, 15 Mar 2025 02:37:54 +0800 Subject: [PATCH 07/17] change cicd option default --- .github/workflows/install.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/install.yml b/.github/workflows/install.yml index a58b426c..924feb2f 100644 --- a/.github/workflows/install.yml +++ b/.github/workflows/install.yml @@ -6,7 +6,7 @@ on: job_to_run: description: "Which job to run?" required: true - default: "install-test" + default: "test" type: choice options: - create-install-test From f21ea700f30979dcd9ffb3150264316e9085a65a Mon Sep 17 00:00:00 2001 From: SkqLiao Date: Sat, 15 Mar 2025 02:45:35 +0800 Subject: [PATCH 08/17] fix term --- .github/workflows/install.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/install.yml b/.github/workflows/install.yml index 924feb2f..9b1522e2 100644 --- a/.github/workflows/install.yml +++ b/.github/workflows/install.yml @@ -53,6 +53,8 @@ jobs: git submodule update bash install.sh - name: Test Local Chat + env: + TERM: xterm-256color run: | source /home/qujing3/anaconda3/etc/profile.d/conda.sh conda activate ktransformers-dev From 2ed4dff85d70a357b055446839eaa0ecbfc15c6c Mon Sep 17 00:00:00 2001 From: SkqLiao Date: Sat, 15 Mar 2025 02:51:03 +0800 Subject: [PATCH 09/17] fix command typo --- .github/workflows/install.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/install.yml b/.github/workflows/install.yml index 9b1522e2..610368e6 100644 --- a/.github/workflows/install.yml +++ b/.github/workflows/install.yml @@ -64,9 +64,9 @@ jobs: cd ${{ github.workspace }} echo "Running Local Chat 1" python ktransformers/local_chat_test.py --model_path /home/qujing3/models/DeepSeek-R1-Q4_K_M/config --gguf_path /home/qujing3/models/DeepSeek-R1-Q4_K_M/ --max_new_tokens 256 --cache_len 1536 --cpu_infer 64 --prompt_file /home/qujing3/prompts/book.txt > log1.txt - sed -n '/Prompt:,$p' log1.txt + sed -n '/Prompt:/,$p' log1.txt echo "Running Local Chat 2" python ktransformers/local_chat_test.py --model_path /home/qujing3/models/DeepSeek-R1-Q4_K_M/config --gguf_path /home/qujing3/models/DeepSeek-R1-Q4_K_M/ --max_new_tokens 256 --cache_len 1536 --cpu_infer 64 --prompt_file /home/qujing3/prompts/chinese.txt > log2.txt - sed -n '/Prompt:,$p' log2.txt + sed -n '/Prompt:/,$p' log2.txt - run: echo "This job's status is ${{ job.status }}." From 336b5dd59024ee5434fe8daabfa0762a68b63e60 Mon Sep 17 00:00:00 2001 From: SkqLiao Date: Sat, 15 Mar 2025 02:55:36 +0800 Subject: [PATCH 10/17] fix sed command --- .github/workflows/install.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/install.yml b/.github/workflows/install.yml index 610368e6..29caecab 100644 --- a/.github/workflows/install.yml +++ b/.github/workflows/install.yml @@ -62,11 +62,11 @@ jobs: export LD_LIBRARY_PATH=/usr/local/cuda-12.4/lib64:$LD_LIBRARY_PATH export CUDA_HOME=/usr/local/cuda-12.4 cd ${{ github.workspace }} - echo "Running Local Chat 1" + echo "Running Local Chat 1...(book.txt)" python ktransformers/local_chat_test.py --model_path /home/qujing3/models/DeepSeek-R1-Q4_K_M/config --gguf_path /home/qujing3/models/DeepSeek-R1-Q4_K_M/ --max_new_tokens 256 --cache_len 1536 --cpu_infer 64 --prompt_file /home/qujing3/prompts/book.txt > log1.txt - sed -n '/Prompt:/,$p' log1.txt - echo "Running Local Chat 2" + echo $(sed -n '/Prompt:/,$p' log1.txt) + echo "Running Local Chat 2...(chinese.txt)" python ktransformers/local_chat_test.py --model_path /home/qujing3/models/DeepSeek-R1-Q4_K_M/config --gguf_path /home/qujing3/models/DeepSeek-R1-Q4_K_M/ --max_new_tokens 256 --cache_len 1536 --cpu_infer 64 --prompt_file /home/qujing3/prompts/chinese.txt > log2.txt - sed -n '/Prompt:/,$p' log2.txt + echo $(sed -n '/Prompt:/,$p' log2.txt) - run: echo "This job's status is ${{ job.status }}." From 9d19b7b4d4c03029fca2048638b37eea791fbc68 Mon Sep 17 00:00:00 2001 From: SkqLiao Date: Sat, 15 Mar 2025 03:03:38 +0800 Subject: [PATCH 11/17] fix sed --- .github/workflows/install.yml | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/.github/workflows/install.yml b/.github/workflows/install.yml index 29caecab..42714f3f 100644 --- a/.github/workflows/install.yml +++ b/.github/workflows/install.yml @@ -56,6 +56,7 @@ jobs: env: TERM: xterm-256color run: | + set -e source /home/qujing3/anaconda3/etc/profile.d/conda.sh conda activate ktransformers-dev export PATH=/usr/local/cuda-12.4/bin:$PATH @@ -64,9 +65,11 @@ jobs: cd ${{ github.workspace }} echo "Running Local Chat 1...(book.txt)" python ktransformers/local_chat_test.py --model_path /home/qujing3/models/DeepSeek-R1-Q4_K_M/config --gguf_path /home/qujing3/models/DeepSeek-R1-Q4_K_M/ --max_new_tokens 256 --cache_len 1536 --cpu_infer 64 --prompt_file /home/qujing3/prompts/book.txt > log1.txt - echo $(sed -n '/Prompt:/,$p' log1.txt) + output=$(sed -n '/Prompt:/,$p' log1.txt) + echo "$output" echo "Running Local Chat 2...(chinese.txt)" python ktransformers/local_chat_test.py --model_path /home/qujing3/models/DeepSeek-R1-Q4_K_M/config --gguf_path /home/qujing3/models/DeepSeek-R1-Q4_K_M/ --max_new_tokens 256 --cache_len 1536 --cpu_infer 64 --prompt_file /home/qujing3/prompts/chinese.txt > log2.txt - echo $(sed -n '/Prompt:/,$p' log2.txt) + output=$(sed -n '/Prompt:/,$p' log2.txt) + echo "$output" - run: echo "This job's status is ${{ job.status }}." From 6385308ff049b231b5c544993de38585af8c01d0 Mon Sep 17 00:00:00 2001 From: SkqLiao Date: Sat, 15 Mar 2025 03:11:26 +0800 Subject: [PATCH 12/17] replace sed with awk --- .github/workflows/install.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/install.yml b/.github/workflows/install.yml index 42714f3f..e0c89aea 100644 --- a/.github/workflows/install.yml +++ b/.github/workflows/install.yml @@ -65,11 +65,11 @@ jobs: cd ${{ github.workspace }} echo "Running Local Chat 1...(book.txt)" python ktransformers/local_chat_test.py --model_path /home/qujing3/models/DeepSeek-R1-Q4_K_M/config --gguf_path /home/qujing3/models/DeepSeek-R1-Q4_K_M/ --max_new_tokens 256 --cache_len 1536 --cpu_infer 64 --prompt_file /home/qujing3/prompts/book.txt > log1.txt - output=$(sed -n '/Prompt:/,$p' log1.txt) + output=$(awk '/Prompt:/ {found=1} found' log1.txt) || exit_code=$? echo "$output" echo "Running Local Chat 2...(chinese.txt)" python ktransformers/local_chat_test.py --model_path /home/qujing3/models/DeepSeek-R1-Q4_K_M/config --gguf_path /home/qujing3/models/DeepSeek-R1-Q4_K_M/ --max_new_tokens 256 --cache_len 1536 --cpu_infer 64 --prompt_file /home/qujing3/prompts/chinese.txt > log2.txt - output=$(sed -n '/Prompt:/,$p' log2.txt) + output=$(awk '/Prompt:/ {found=1} found' log2.txt) || exit_code=$? echo "$output" - run: echo "This job's status is ${{ job.status }}." From 570c98c52db6ff0b25700e630f14c6f1e68e68c8 Mon Sep 17 00:00:00 2001 From: SkqLiao Date: Sat, 15 Mar 2025 03:17:17 +0800 Subject: [PATCH 13/17] remove output test --- .github/workflows/install.yml | 3 --- 1 file changed, 3 deletions(-) diff --git a/.github/workflows/install.yml b/.github/workflows/install.yml index e0c89aea..4595418d 100644 --- a/.github/workflows/install.yml +++ b/.github/workflows/install.yml @@ -65,9 +65,6 @@ jobs: cd ${{ github.workspace }} echo "Running Local Chat 1...(book.txt)" python ktransformers/local_chat_test.py --model_path /home/qujing3/models/DeepSeek-R1-Q4_K_M/config --gguf_path /home/qujing3/models/DeepSeek-R1-Q4_K_M/ --max_new_tokens 256 --cache_len 1536 --cpu_infer 64 --prompt_file /home/qujing3/prompts/book.txt > log1.txt - output=$(awk '/Prompt:/ {found=1} found' log1.txt) || exit_code=$? - echo "$output" - echo "Running Local Chat 2...(chinese.txt)" python ktransformers/local_chat_test.py --model_path /home/qujing3/models/DeepSeek-R1-Q4_K_M/config --gguf_path /home/qujing3/models/DeepSeek-R1-Q4_K_M/ --max_new_tokens 256 --cache_len 1536 --cpu_infer 64 --prompt_file /home/qujing3/prompts/chinese.txt > log2.txt output=$(awk '/Prompt:/ {found=1} found' log2.txt) || exit_code=$? echo "$output" From 0899b7dde6ef371bd0e047c3b2e6630d3c7b41df Mon Sep 17 00:00:00 2001 From: SkqLiao Date: Sat, 15 Mar 2025 03:17:35 +0800 Subject: [PATCH 14/17] remove file output est --- .github/workflows/install.yml | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/.github/workflows/install.yml b/.github/workflows/install.yml index 4595418d..6b839340 100644 --- a/.github/workflows/install.yml +++ b/.github/workflows/install.yml @@ -64,9 +64,7 @@ jobs: export CUDA_HOME=/usr/local/cuda-12.4 cd ${{ github.workspace }} echo "Running Local Chat 1...(book.txt)" - python ktransformers/local_chat_test.py --model_path /home/qujing3/models/DeepSeek-R1-Q4_K_M/config --gguf_path /home/qujing3/models/DeepSeek-R1-Q4_K_M/ --max_new_tokens 256 --cache_len 1536 --cpu_infer 64 --prompt_file /home/qujing3/prompts/book.txt > log1.txt - python ktransformers/local_chat_test.py --model_path /home/qujing3/models/DeepSeek-R1-Q4_K_M/config --gguf_path /home/qujing3/models/DeepSeek-R1-Q4_K_M/ --max_new_tokens 256 --cache_len 1536 --cpu_infer 64 --prompt_file /home/qujing3/prompts/chinese.txt > log2.txt - output=$(awk '/Prompt:/ {found=1} found' log2.txt) || exit_code=$? - echo "$output" + python ktransformers/local_chat_test.py --model_path /home/qujing3/models/DeepSeek-R1-Q4_K_M/config --gguf_path /home/qujing3/models/DeepSeek-R1-Q4_K_M/ --max_new_tokens 256 --cache_len 1536 --cpu_infer 64 --prompt_file /home/qujing3/prompts/book.txt + python ktransformers/local_chat_test.py --model_path /home/qujing3/models/DeepSeek-R1-Q4_K_M/config --gguf_path /home/qujing3/models/DeepSeek-R1-Q4_K_M/ --max_new_tokens 256 --cache_len 1536 --cpu_infer 64 --prompt_file /home/qujing3/prompts/chinese.txt - run: echo "This job's status is ${{ job.status }}." From 4e23a4c02452e7dc90adb83e2096b17ab17ebeb7 Mon Sep 17 00:00:00 2001 From: SkqLiao Date: Sat, 15 Mar 2025 11:32:43 +0800 Subject: [PATCH 15/17] split two test --- .github/workflows/install.yml | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/.github/workflows/install.yml b/.github/workflows/install.yml index 6b839340..ae3fc951 100644 --- a/.github/workflows/install.yml +++ b/.github/workflows/install.yml @@ -52,9 +52,7 @@ jobs: git submodule init git submodule update bash install.sh - - name: Test Local Chat - env: - TERM: xterm-256color + - name: Test Local Chat 1 run: | set -e source /home/qujing3/anaconda3/etc/profile.d/conda.sh @@ -65,6 +63,16 @@ jobs: cd ${{ github.workspace }} echo "Running Local Chat 1...(book.txt)" python ktransformers/local_chat_test.py --model_path /home/qujing3/models/DeepSeek-R1-Q4_K_M/config --gguf_path /home/qujing3/models/DeepSeek-R1-Q4_K_M/ --max_new_tokens 256 --cache_len 1536 --cpu_infer 64 --prompt_file /home/qujing3/prompts/book.txt + - name: Test Local Chat 2 + run: | + set -e + source /home/qujing3/anaconda3/etc/profile.d/conda.sh + conda activate ktransformers-dev + export PATH=/usr/local/cuda-12.4/bin:$PATH + export LD_LIBRARY_PATH=/usr/local/cuda-12.4/lib64:$LD_LIBRARY_PATH + export CUDA_HOME=/usr/local/cuda-12.4 + cd ${{ github.workspace }} + echo "Running Local Chat 2...(chinese.txt)" python ktransformers/local_chat_test.py --model_path /home/qujing3/models/DeepSeek-R1-Q4_K_M/config --gguf_path /home/qujing3/models/DeepSeek-R1-Q4_K_M/ --max_new_tokens 256 --cache_len 1536 --cpu_infer 64 --prompt_file /home/qujing3/prompts/chinese.txt - run: echo "This job's status is ${{ job.status }}." From a1891b845d6739fc27cbb964184767e83c6cf3c8 Mon Sep 17 00:00:00 2001 From: SkqLiao Date: Sat, 15 Mar 2025 17:04:42 +0800 Subject: [PATCH 16/17] remove unsupprted paramters, add force think --- .github/workflows/install.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/install.yml b/.github/workflows/install.yml index ae3fc951..4a0accce 100644 --- a/.github/workflows/install.yml +++ b/.github/workflows/install.yml @@ -62,7 +62,7 @@ jobs: export CUDA_HOME=/usr/local/cuda-12.4 cd ${{ github.workspace }} echo "Running Local Chat 1...(book.txt)" - python ktransformers/local_chat_test.py --model_path /home/qujing3/models/DeepSeek-R1-Q4_K_M/config --gguf_path /home/qujing3/models/DeepSeek-R1-Q4_K_M/ --max_new_tokens 256 --cache_len 1536 --cpu_infer 64 --prompt_file /home/qujing3/prompts/book.txt + python ktransformers/local_chat_test.py --model_path /home/qujing3/models/DeepSeek-R1-Q4_K_M/config --gguf_path /home/qujing3/models/DeepSeek-R1-Q4_K_M/ --max_new_tokens 256 --cpu_infer 64 --prompt_file /home/qujing3/prompts/book.txt - name: Test Local Chat 2 run: | set -e @@ -72,7 +72,7 @@ jobs: export LD_LIBRARY_PATH=/usr/local/cuda-12.4/lib64:$LD_LIBRARY_PATH export CUDA_HOME=/usr/local/cuda-12.4 cd ${{ github.workspace }} - echo "Running Local Chat 2...(chinese.txt)" - python ktransformers/local_chat_test.py --model_path /home/qujing3/models/DeepSeek-R1-Q4_K_M/config --gguf_path /home/qujing3/models/DeepSeek-R1-Q4_K_M/ --max_new_tokens 256 --cache_len 1536 --cpu_infer 64 --prompt_file /home/qujing3/prompts/chinese.txt + echo "Running Local Chat 2 [force think]...(chinese.txt)" + python ktransformers/local_chat_test.py --model_path /home/qujing3/models/DeepSeek-R1-Q4_K_M/config --gguf_path /home/qujing3/models/DeepSeek-R1-Q4_K_M/ --max_new_tokens 256 --cpu_infer 64 --prompt_file /home/qujing3/prompts/chinese.txt -f - run: echo "This job's status is ${{ job.status }}." From c66ca65778156b939b754f42b965ebc664a0d6f0 Mon Sep 17 00:00:00 2001 From: SkqLiao Date: Sat, 15 Mar 2025 17:10:44 +0800 Subject: [PATCH 17/17] write to log --- .github/workflows/install.yml | 19 ++++++------------- 1 file changed, 6 insertions(+), 13 deletions(-) diff --git a/.github/workflows/install.yml b/.github/workflows/install.yml index 4a0accce..3c2b9cf3 100644 --- a/.github/workflows/install.yml +++ b/.github/workflows/install.yml @@ -61,18 +61,11 @@ jobs: export LD_LIBRARY_PATH=/usr/local/cuda-12.4/lib64:$LD_LIBRARY_PATH export CUDA_HOME=/usr/local/cuda-12.4 cd ${{ github.workspace }} - echo "Running Local Chat 1...(book.txt)" - python ktransformers/local_chat_test.py --model_path /home/qujing3/models/DeepSeek-R1-Q4_K_M/config --gguf_path /home/qujing3/models/DeepSeek-R1-Q4_K_M/ --max_new_tokens 256 --cpu_infer 64 --prompt_file /home/qujing3/prompts/book.txt - - name: Test Local Chat 2 - run: | - set -e - source /home/qujing3/anaconda3/etc/profile.d/conda.sh - conda activate ktransformers-dev - export PATH=/usr/local/cuda-12.4/bin:$PATH - export LD_LIBRARY_PATH=/usr/local/cuda-12.4/lib64:$LD_LIBRARY_PATH - export CUDA_HOME=/usr/local/cuda-12.4 - cd ${{ github.workspace }} - echo "Running Local Chat 2 [force think]...(chinese.txt)" - python ktransformers/local_chat_test.py --model_path /home/qujing3/models/DeepSeek-R1-Q4_K_M/config --gguf_path /home/qujing3/models/DeepSeek-R1-Q4_K_M/ --max_new_tokens 256 --cpu_infer 64 --prompt_file /home/qujing3/prompts/chinese.txt -f + echo "Running Local Chat 1 (book.txt) ..." + python ktransformers/local_chat_test.py --model_path /home/qujing3/models/DeepSeek-R1-Q4_K_M/config --gguf_path /home/qujing3/models/DeepSeek-R1-Q4_K_M/ --max_new_tokens 256 --cpu_infer 64 --prompt_file /home/qujing3/prompts/book.txt > log1.txt + sed -n '/Prompt:/,$p' log1.txt + echo "Running Local Chat 2 [force think] (chinese.txt) ..." + python ktransformers/local_chat_test.py --model_path /home/qujing3/models/DeepSeek-R1-Q4_K_M/config --gguf_path /home/qujing3/models/DeepSeek-R1-Q4_K_M/ --max_new_tokens 256 --cpu_infer 64 --prompt_file /home/qujing3/prompts/chinese.txt -f > log2.txt + sed -n '/Prompt:/,$p' log2.txt - run: echo "This job's status is ${{ job.status }}."