From bc369b256cdbc80e20622efae9e80fe3c6448bc1 Mon Sep 17 00:00:00 2001 From: SkqLiao Date: Wed, 19 Mar 2025 21:25:21 +0800 Subject: [PATCH] add CI/CD for human eval score benchmarking --- .github/workflows/score.yml | 24 +++++++ ktransformers/tests/score.py | 135 +++++++++++++++++++++++++++++++++++ 2 files changed, 159 insertions(+) create mode 100644 .github/workflows/score.yml create mode 100644 ktransformers/tests/score.py diff --git a/.github/workflows/score.yml b/.github/workflows/score.yml new file mode 100644 index 0000000..dce6a9f --- /dev/null +++ b/.github/workflows/score.yml @@ -0,0 +1,24 @@ +name: Human Eval Score KTransformers +run-name: Human Eval Score KTransformers +on: workflow_dispatch +jobs: + Human-Eval-Score-KTransformers: + runs-on: self-hosted + steps: + - run: echo "🎉 The job was automatically triggered by a ${{ github.event_name }} event." + - run: echo "🔎 The name of your branch is ${{ github.ref }} and your repository is ${{ github.repository }}." + - name: Check out repository code + uses: actions/checkout@v4 + - run: echo "💡 The ${{ github.repository }} repository has been cloned to the runner." + - name: Human Eval Run + run: | + set -e + source /home/qujing3/anaconda3/etc/profile.d/conda.sh + conda activate ktransformers-dev + export PATH=/usr/local/cuda-12.4/bin:$PATH + export LD_LIBRARY_PATH=/usr/local/cuda-12.4/lib64:$LD_LIBRARY_PATH + export CUDA_HOME=/usr/local/cuda-12.4 + cd ${{ github.workspace }} + python transformers/tests/score.py + + - run: echo "This job's status is ${{ job.status }}." diff --git a/ktransformers/tests/score.py b/ktransformers/tests/score.py new file mode 100644 index 0000000..6be19d9 --- /dev/null +++ b/ktransformers/tests/score.py @@ -0,0 +1,135 @@ +import subprocess +import time +import requests +import sys +import os + +def wait_for_server(base_url: str, timeout: int = None) -> None: + start_time = time.time() + while True: + try: + response = requests.get( + f"{base_url}/v1/models", + headers={"Authorization": "Bearer None"}, + ) + if response.status_code == 200: + time.sleep(5) + print("Server is ready.") + break + except requests.exceptions.RequestException: + time.sleep(1) + if timeout and time.time() - start_time > timeout: + raise TimeoutError("Server did not become ready within timeout period") + +server_cmd = [ + "numactl", "-N", "1", "-m", "1", + "/home/qujing3/anaconda3/envs/ktransformers-dev/bin/ktransformers", + "--model_path", "/home/qujing3/models/DeepSeek-R1-Q4_K_M/config", + "--gguf_path", "/home/qujing3/models/DeepSeek-R1-Q4_K_M/", + "--port", "10002", + "--cpu_infer", "64" +] + +print("Starting ktransformers server...") +print(" ".join(server_cmd)) +with open("/tmp/server_log.txt", "w") as f: + server_process = subprocess.Popen(server_cmd, stdout=f, stderr=f, text=True) + +try: + wait_for_server("http://localhost:10002", timeout=300) + + eval_cmd = ["python", "ktransformers/tests/humaneval/eval_api.py"] + print("Running eval_api.py...") + print(f"Command: {' '.join(eval_cmd)}") + + env = os.environ.copy() + env["PYTHONUNBUFFERED"] = "1" + + eval_process = subprocess.Popen( + eval_cmd, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + bufsize=1, + env=env, + universal_newlines=True + ) + + import threading + import queue + + def enqueue_output(out, queue): + for line in iter(out.readline, ''): + queue.put(line) + out.close() + + stdout_queue = queue.Queue() + stderr_queue = queue.Queue() + + stdout_thread = threading.Thread(target=enqueue_output, args=(eval_process.stdout, stdout_queue)) + stderr_thread = threading.Thread(target=enqueue_output, args=(eval_process.stderr, stderr_queue)) + + stdout_thread.daemon = True + stderr_thread.daemon = True + stdout_thread.start() + stderr_thread.start() + + while eval_process.poll() is None: + try: + line = stdout_queue.get_nowait() + print(line, end='', flush=True) + except queue.Empty: + pass + + try: + line = stderr_queue.get_nowait() + print(line, end='', file=sys.stderr, flush=True) + except queue.Empty: + pass + + time.sleep(1) + + while not stdout_queue.empty(): + print(stdout_queue.get(), end='', flush=True) + while not stderr_queue.empty(): + print(stderr_queue.get(), end='', file=sys.stderr, flush=True) + + eval_process.wait() + print(f"eval_api.py completed with exit code: {eval_process.returncode}") + + evaluate_cmd = [ + "evaluate_functional_correctness", + "ktransformers/tests/humaneval/results/api/eval_b.jsonl" + ] + print("Running evaluate_functional_correctness...") + print(f"Command: {' '.join(evaluate_cmd)}") + + evaluate_process = subprocess.Popen( + evaluate_cmd, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + bufsize=1, + universal_newlines=True + ) + + for line in evaluate_process.stdout: + print(line, end='', flush=True) + for line in evaluate_process.stderr: + print(line, end='', file=sys.stderr, flush=True) + + evaluate_process.wait() + + print(f"evaluate_functional_correctness completed with exit code: {evaluate_process.returncode}") + if evaluate_process.returncode != 0: + print(f"evaluate_functional_correctness exited with code {evaluate_process.returncode}") + sys.exit(evaluate_process.returncode) + +finally: + print("Stopping ktransformers server...") + server_process.terminate() + try: + server_process.wait(timeout=30) + except subprocess.TimeoutExpired: + print("Server did not terminate gracefully, forcing...") + server_process.kill() \ No newline at end of file