mirror of
https://github.com/kvcache-ai/ktransformers.git
synced 2026-04-22 15:28:59 +00:00
add ci (#1642)
This commit is contained in:
5
kt-kernel/test/__init__.py
Normal file
5
kt-kernel/test/__init__.py
Normal file
@@ -0,0 +1,5 @@
|
||||
"""KT-Kernel Test Suite
|
||||
|
||||
This test suite is adapted from SGLang's CI testing framework.
|
||||
It provides hardware-aware test registration and execution with timeout control.
|
||||
"""
|
||||
1
kt-kernel/test/ci/__init__.py
Normal file
1
kt-kernel/test/ci/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
"""CI test registration and execution utilities."""
|
||||
112
kt-kernel/test/ci/ci_register.py
Normal file
112
kt-kernel/test/ci/ci_register.py
Normal file
@@ -0,0 +1,112 @@
|
||||
import ast
|
||||
import warnings
|
||||
from dataclasses import dataclass
|
||||
from enum import Enum, auto
|
||||
from typing import List
|
||||
|
||||
|
||||
class HWBackend(Enum):
|
||||
CPU = auto()
|
||||
CUDA = auto()
|
||||
AMD = auto()
|
||||
|
||||
|
||||
@dataclass
|
||||
class CIRegistry:
|
||||
backend: HWBackend
|
||||
filename: str
|
||||
est_time: float
|
||||
suite: str
|
||||
|
||||
|
||||
def register_cpu_ci(est_time: float, suite: str):
|
||||
pass
|
||||
|
||||
|
||||
def register_cuda_ci(est_time: float, suite: str):
|
||||
pass
|
||||
|
||||
|
||||
def register_amd_ci(est_time: float, suite: str):
|
||||
pass
|
||||
|
||||
|
||||
REGISTER_MAPPING = {
|
||||
"register_cpu_ci": HWBackend.CPU,
|
||||
"register_cuda_ci": HWBackend.CUDA,
|
||||
"register_amd_ci": HWBackend.AMD,
|
||||
}
|
||||
|
||||
|
||||
class RegistryVisitor(ast.NodeVisitor):
|
||||
def __init__(self, filename: str):
|
||||
self.filename = filename
|
||||
self.registries: list[CIRegistry] = []
|
||||
|
||||
def _collect_ci_registry(self, func_call: ast.Call):
|
||||
if not isinstance(func_call.func, ast.Name):
|
||||
return None
|
||||
|
||||
if func_call.func.id not in REGISTER_MAPPING:
|
||||
return None
|
||||
|
||||
hw = REGISTER_MAPPING[func_call.func.id]
|
||||
est_time, suite = None, None
|
||||
for kw in func_call.keywords:
|
||||
if kw.arg == "est_time":
|
||||
if isinstance(kw.value, ast.Constant):
|
||||
est_time = kw.value.value
|
||||
elif kw.arg == "suite":
|
||||
if isinstance(kw.value, ast.Constant):
|
||||
suite = kw.value.value
|
||||
|
||||
for i, arg in enumerate(func_call.args):
|
||||
if isinstance(arg, ast.Constant):
|
||||
if i == 0:
|
||||
est_time = arg.value
|
||||
elif i == 1:
|
||||
suite = arg.value
|
||||
assert (
|
||||
est_time is not None
|
||||
), "esimation_time is required and should be a constant"
|
||||
assert suite is not None, "suite is required and should be a constant"
|
||||
return CIRegistry(
|
||||
backend=hw, filename=self.filename, est_time=est_time, suite=suite
|
||||
)
|
||||
|
||||
def visit_Module(self, node):
|
||||
for stmt in node.body:
|
||||
if not isinstance(stmt, ast.Expr) or not isinstance(stmt.value, ast.Call):
|
||||
continue
|
||||
|
||||
cr = self._collect_ci_registry(stmt.value)
|
||||
if cr is not None:
|
||||
self.registries.append(cr)
|
||||
|
||||
self.generic_visit(node)
|
||||
|
||||
|
||||
def ut_parse_one_file(filename: str) -> List[CIRegistry]:
|
||||
with open(filename, "r") as f:
|
||||
file_content = f.read()
|
||||
tree = ast.parse(file_content, filename=filename)
|
||||
visitor = RegistryVisitor(filename=filename)
|
||||
visitor.visit(tree)
|
||||
return visitor.registries
|
||||
|
||||
|
||||
def collect_tests(files: list[str], sanity_check: bool = True) -> List[CIRegistry]:
|
||||
ci_tests = []
|
||||
for file in files:
|
||||
registries = ut_parse_one_file(file)
|
||||
if len(registries) == 0:
|
||||
msg = f"No CI registry found in {file}"
|
||||
if sanity_check:
|
||||
raise ValueError(msg)
|
||||
else:
|
||||
warnings.warn(msg)
|
||||
continue
|
||||
|
||||
ci_tests.extend(registries)
|
||||
|
||||
return ci_tests
|
||||
171
kt-kernel/test/ci/ci_utils.py
Normal file
171
kt-kernel/test/ci/ci_utils.py
Normal file
@@ -0,0 +1,171 @@
|
||||
import os
|
||||
import subprocess
|
||||
import threading
|
||||
import time
|
||||
from dataclasses import dataclass
|
||||
from typing import Callable, List, Optional
|
||||
|
||||
import psutil, signal, sys
|
||||
def kill_process_tree(parent_pid, include_parent: bool = True, skip_pid: int = None):
|
||||
"""Kill the process and all its child processes."""
|
||||
# Remove sigchld handler to avoid spammy logs.
|
||||
if threading.current_thread() is threading.main_thread():
|
||||
signal.signal(signal.SIGCHLD, signal.SIG_DFL)
|
||||
|
||||
if parent_pid is None:
|
||||
parent_pid = os.getpid()
|
||||
include_parent = False
|
||||
|
||||
try:
|
||||
itself = psutil.Process(parent_pid)
|
||||
except psutil.NoSuchProcess:
|
||||
return
|
||||
|
||||
children = itself.children(recursive=True)
|
||||
for child in children:
|
||||
if child.pid == skip_pid:
|
||||
continue
|
||||
try:
|
||||
child.kill()
|
||||
except psutil.NoSuchProcess:
|
||||
pass
|
||||
|
||||
if include_parent:
|
||||
try:
|
||||
if parent_pid == os.getpid():
|
||||
itself.kill()
|
||||
sys.exit(0)
|
||||
|
||||
itself.kill()
|
||||
|
||||
# Sometime processes cannot be killed with SIGKILL (e.g, PID=1 launched by kubernetes),
|
||||
# so we send an additional signal to kill them.
|
||||
itself.send_signal(signal.SIGQUIT)
|
||||
except psutil.NoSuchProcess:
|
||||
pass
|
||||
|
||||
|
||||
@dataclass
|
||||
class TestFile:
|
||||
name: str
|
||||
estimated_time: float = 60
|
||||
|
||||
|
||||
def run_with_timeout(
|
||||
func: Callable,
|
||||
args: tuple = (),
|
||||
kwargs: Optional[dict] = None,
|
||||
timeout: float = None,
|
||||
):
|
||||
"""Run a function with timeout."""
|
||||
ret_value = []
|
||||
|
||||
def _target_func():
|
||||
ret_value.append(func(*args, **(kwargs or {})))
|
||||
|
||||
t = threading.Thread(target=_target_func)
|
||||
t.start()
|
||||
t.join(timeout=timeout)
|
||||
if t.is_alive():
|
||||
raise TimeoutError()
|
||||
|
||||
if not ret_value:
|
||||
raise RuntimeError()
|
||||
|
||||
return ret_value[0]
|
||||
|
||||
|
||||
def run_unittest_files(
|
||||
files: List[TestFile], timeout_per_file: float, continue_on_error: bool = False
|
||||
):
|
||||
"""
|
||||
Run a list of test files.
|
||||
|
||||
Args:
|
||||
files: List of TestFile objects to run
|
||||
timeout_per_file: Timeout in seconds for each test file
|
||||
continue_on_error: If True, continue running remaining tests even if one fails.
|
||||
If False, stop at first failure (default behavior for PR tests).
|
||||
"""
|
||||
tic = time.perf_counter()
|
||||
success = True
|
||||
passed_tests = []
|
||||
failed_tests = []
|
||||
|
||||
for i, file in enumerate(files):
|
||||
filename, estimated_time = file.name, file.estimated_time
|
||||
process = None
|
||||
|
||||
def run_one_file(filename):
|
||||
nonlocal process
|
||||
|
||||
filename = os.path.join(os.getcwd(), filename)
|
||||
print(
|
||||
f".\n.\nBegin ({i}/{len(files) - 1}):\npython3 {filename}\n.\n.\n",
|
||||
flush=True,
|
||||
)
|
||||
tic = time.perf_counter()
|
||||
|
||||
process = subprocess.Popen(
|
||||
["python3", filename], stdout=None, stderr=None, env=os.environ
|
||||
)
|
||||
process.wait()
|
||||
elapsed = time.perf_counter() - tic
|
||||
|
||||
print(
|
||||
f".\n.\nEnd ({i}/{len(files) - 1}):\n{filename=}, {elapsed=:.0f}, {estimated_time=}\n.\n.\n",
|
||||
flush=True,
|
||||
)
|
||||
return process.returncode
|
||||
|
||||
try:
|
||||
ret_code = run_with_timeout(
|
||||
run_one_file, args=(filename,), timeout=timeout_per_file
|
||||
)
|
||||
if ret_code != 0:
|
||||
print(
|
||||
f"\n✗ FAILED: {filename} returned exit code {ret_code}\n",
|
||||
flush=True,
|
||||
)
|
||||
success = False
|
||||
failed_tests.append((filename, f"exit code {ret_code}"))
|
||||
if not continue_on_error:
|
||||
# Stop at first failure for PR tests
|
||||
break
|
||||
# Otherwise continue to next test for nightly tests
|
||||
else:
|
||||
passed_tests.append(filename)
|
||||
except TimeoutError:
|
||||
kill_process_tree(process.pid)
|
||||
time.sleep(5)
|
||||
print(
|
||||
f"\n✗ TIMEOUT: {filename} after {timeout_per_file} seconds\n",
|
||||
flush=True,
|
||||
)
|
||||
success = False
|
||||
failed_tests.append((filename, f"timeout after {timeout_per_file}s"))
|
||||
if not continue_on_error:
|
||||
# Stop at first timeout for PR tests
|
||||
break
|
||||
# Otherwise continue to next test for nightly tests
|
||||
|
||||
if success:
|
||||
print(f"Success. Time elapsed: {time.perf_counter() - tic:.2f}s", flush=True)
|
||||
else:
|
||||
print(f"Fail. Time elapsed: {time.perf_counter() - tic:.2f}s", flush=True)
|
||||
|
||||
# Print summary
|
||||
print(f"\n{'='*60}", flush=True)
|
||||
print(f"Test Summary: {len(passed_tests)}/{len(files)} passed", flush=True)
|
||||
print(f"{'='*60}", flush=True)
|
||||
if passed_tests:
|
||||
print("✓ PASSED:", flush=True)
|
||||
for test in passed_tests:
|
||||
print(f" {test}", flush=True)
|
||||
if failed_tests:
|
||||
print("\n✗ FAILED:", flush=True)
|
||||
for test, reason in failed_tests:
|
||||
print(f" {test} ({reason})", flush=True)
|
||||
print(f"{'='*60}\n", flush=True)
|
||||
|
||||
return 0 if success else -1
|
||||
4
kt-kernel/test/per_commit/__init__.py
Normal file
4
kt-kernel/test/per_commit/__init__.py
Normal file
@@ -0,0 +1,4 @@
|
||||
"""Per-commit tests for KT-Kernel.
|
||||
|
||||
Tests in this directory are run on every commit in CI.
|
||||
"""
|
||||
36
kt-kernel/test/per_commit/test_amd_placeholder.py
Normal file
36
kt-kernel/test/per_commit/test_amd_placeholder.py
Normal file
@@ -0,0 +1,36 @@
|
||||
"""AMD/ROCm backend tests for KT-Kernel (Placeholder).
|
||||
|
||||
This file is a placeholder for future AMD/ROCm backend tests.
|
||||
Currently, KT-Kernel focuses on CPU optimizations (Intel AMX/AVX512).
|
||||
|
||||
To implement AMD tests:
|
||||
1. Add actual test functions with @pytest.mark.amd
|
||||
2. Update the estimated time in register_amd_ci()
|
||||
3. Implement AMD/ROCm-specific initialization and validation tests
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
# Add parent directory to path for CI registration
|
||||
sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
|
||||
from ci.ci_register import register_amd_ci
|
||||
|
||||
# Register this test for AMD CI (estimated time: 10 seconds, placeholder)
|
||||
# Update suite name when implementing: currently using "stage-a-test-1"
|
||||
register_amd_ci(est_time=10, suite="stage-a-test-1")
|
||||
|
||||
|
||||
def test_amd_placeholder():
|
||||
"""Placeholder test for AMD/ROCm backend.
|
||||
|
||||
TODO: Implement actual AMD/ROCm tests when AMD support is added to kt-kernel.
|
||||
"""
|
||||
# Currently a no-op placeholder
|
||||
pass
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Allow running standalone (required by test runner)
|
||||
print("⚠ AMD/ROCm tests are not yet implemented (placeholder)")
|
||||
print("✓ Placeholder test passed")
|
||||
80
kt-kernel/test/per_commit/test_basic_cpu.py
Normal file
80
kt-kernel/test/per_commit/test_basic_cpu.py
Normal file
@@ -0,0 +1,80 @@
|
||||
"""Basic CPU backend tests for KT-Kernel.
|
||||
|
||||
These tests verify basic functionality without requiring model files.
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import pytest
|
||||
|
||||
# Add parent directory to path for CI registration
|
||||
sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
|
||||
from ci.ci_register import register_cpu_ci
|
||||
|
||||
# Register this test for CPU CI with estimated runtime of 30 seconds
|
||||
register_cpu_ci(est_time=30, suite="default")
|
||||
|
||||
# Check if kt_kernel_ext is available
|
||||
try:
|
||||
import kt_kernel_ext
|
||||
HAS_KT_KERNEL = True
|
||||
except ImportError:
|
||||
HAS_KT_KERNEL = False
|
||||
kt_kernel_ext = None
|
||||
|
||||
|
||||
@pytest.mark.cpu
|
||||
def test_kt_kernel_import():
|
||||
"""Test that kt_kernel_ext can be imported."""
|
||||
if not HAS_KT_KERNEL:
|
||||
pytest.skip("kt_kernel_ext not built or available")
|
||||
|
||||
assert kt_kernel_ext is not None, "kt_kernel_ext module should be importable"
|
||||
|
||||
|
||||
@pytest.mark.cpu
|
||||
def test_cpu_infer_initialization():
|
||||
"""Test that CPUInfer can be initialized."""
|
||||
if not HAS_KT_KERNEL:
|
||||
pytest.skip("kt_kernel_ext not built or available")
|
||||
|
||||
# Initialize CPUInfer with 4 threads
|
||||
cpuinfer = kt_kernel_ext.CPUInfer(4)
|
||||
assert cpuinfer is not None, "CPUInfer should be initialized successfully"
|
||||
|
||||
|
||||
@pytest.mark.cpu
|
||||
def test_basic_module_attributes():
|
||||
"""Test that kt_kernel_ext has expected attributes."""
|
||||
if not HAS_KT_KERNEL:
|
||||
pytest.skip("kt_kernel_ext not built or available")
|
||||
|
||||
# Check for key attributes/functions
|
||||
assert hasattr(kt_kernel_ext, 'CPUInfer'), "kt_kernel_ext should have CPUInfer class"
|
||||
|
||||
|
||||
def run_all_tests():
|
||||
"""Run all tests in this file (for standalone execution)."""
|
||||
if not HAS_KT_KERNEL:
|
||||
print("⚠ kt_kernel_ext not available, skipping tests")
|
||||
return
|
||||
|
||||
try:
|
||||
test_kt_kernel_import()
|
||||
print("✓ test_kt_kernel_import passed")
|
||||
|
||||
test_cpu_infer_initialization()
|
||||
print("✓ test_cpu_infer_initialization passed")
|
||||
|
||||
test_basic_module_attributes()
|
||||
print("✓ test_basic_module_attributes passed")
|
||||
|
||||
print("\n✓ All tests passed!")
|
||||
except Exception as e:
|
||||
print(f"\n✗ Test failed: {e}")
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Allow running standalone (required by test runner)
|
||||
run_all_tests()
|
||||
36
kt-kernel/test/per_commit/test_cuda_placeholder.py
Normal file
36
kt-kernel/test/per_commit/test_cuda_placeholder.py
Normal file
@@ -0,0 +1,36 @@
|
||||
"""CUDA backend tests for KT-Kernel (Placeholder).
|
||||
|
||||
This file is a placeholder for future CUDA backend tests.
|
||||
Currently, KT-Kernel focuses on CPU optimizations (Intel AMX/AVX512).
|
||||
|
||||
To implement CUDA tests:
|
||||
1. Add actual test functions with @pytest.mark.cuda
|
||||
2. Update the estimated time in register_cuda_ci()
|
||||
3. Implement CUDA-specific initialization and validation tests
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
# Add parent directory to path for CI registration
|
||||
sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
|
||||
from ci.ci_register import register_cuda_ci
|
||||
|
||||
# Register this test for CUDA CI (estimated time: 10 seconds, placeholder)
|
||||
# Update suite name when implementing: currently using "stage-a-test-1"
|
||||
register_cuda_ci(est_time=10, suite="stage-a-test-1")
|
||||
|
||||
|
||||
def test_cuda_placeholder():
|
||||
"""Placeholder test for CUDA backend.
|
||||
|
||||
TODO: Implement actual CUDA tests when CUDA support is added to kt-kernel.
|
||||
"""
|
||||
# Currently a no-op placeholder
|
||||
pass
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Allow running standalone (required by test runner)
|
||||
print("⚠ CUDA tests are not yet implemented (placeholder)")
|
||||
print("✓ Placeholder test passed")
|
||||
68
kt-kernel/test/run_suite.py
Normal file
68
kt-kernel/test/run_suite.py
Normal file
@@ -0,0 +1,68 @@
|
||||
import argparse
|
||||
import glob
|
||||
from typing import List
|
||||
|
||||
from ci.ci_register import HWBackend, CIRegistry, collect_tests
|
||||
from ci.ci_utils import TestFile, run_unittest_files
|
||||
|
||||
HW_MAPPING = {
|
||||
"cpu": HWBackend.CPU,
|
||||
"cuda": HWBackend.CUDA,
|
||||
"amd": HWBackend.AMD,
|
||||
}
|
||||
|
||||
LABEL_MAPPING = {
|
||||
HWBackend.CPU: ["default"],
|
||||
HWBackend.AMD: ["stage-a-test-1"],
|
||||
HWBackend.CUDA: ["stage-a-test-1"],
|
||||
}
|
||||
|
||||
|
||||
def _filter_tests(
|
||||
ci_tests: List[CIRegistry], hw: HWBackend, suite: str
|
||||
) -> List[CIRegistry]:
|
||||
ci_tests = [t for t in ci_tests if t.backend == hw]
|
||||
ret = []
|
||||
for t in ci_tests:
|
||||
assert t.suite in LABEL_MAPPING[hw], f"Unknown stage {t.suite} for backend {hw}"
|
||||
if t.suite == suite:
|
||||
ret.append(t)
|
||||
return ret
|
||||
|
||||
|
||||
def run_per_commit(hw: HWBackend, suite: str):
|
||||
files = glob.glob("per_commit/**/*.py", recursive=True)
|
||||
# Exclude __init__.py files as they don't contain test registrations
|
||||
files = [f for f in files if not f.endswith("__init__.py")]
|
||||
ci_tests = _filter_tests(collect_tests(files), hw, suite)
|
||||
test_files = [TestFile(t.filename, t.est_time) for t in ci_tests]
|
||||
|
||||
run_unittest_files(
|
||||
test_files,
|
||||
timeout_per_file=1200,
|
||||
continue_on_error=False,
|
||||
)
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument(
|
||||
"--hw",
|
||||
type=str,
|
||||
choices=["cpu", "cuda", "amd"],
|
||||
required=True,
|
||||
help="Hardware backend to run tests on.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--suite",
|
||||
type=str,
|
||||
required=True,
|
||||
help="Test suite to run.",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
hw = HW_MAPPING[args.hw]
|
||||
run_per_commit(hw, args.suite)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user