This commit is contained in:
Jianwei Dong
2025-11-25 20:52:08 +08:00
committed by GitHub
parent 2cffdf7033
commit 51745a9ea1
14 changed files with 845 additions and 48 deletions

View File

@@ -0,0 +1,5 @@
"""KT-Kernel Test Suite
This test suite is adapted from SGLang's CI testing framework.
It provides hardware-aware test registration and execution with timeout control.
"""

View File

@@ -0,0 +1 @@
"""CI test registration and execution utilities."""

View File

@@ -0,0 +1,112 @@
import ast
import warnings
from dataclasses import dataclass
from enum import Enum, auto
from typing import List
class HWBackend(Enum):
CPU = auto()
CUDA = auto()
AMD = auto()
@dataclass
class CIRegistry:
backend: HWBackend
filename: str
est_time: float
suite: str
def register_cpu_ci(est_time: float, suite: str):
pass
def register_cuda_ci(est_time: float, suite: str):
pass
def register_amd_ci(est_time: float, suite: str):
pass
REGISTER_MAPPING = {
"register_cpu_ci": HWBackend.CPU,
"register_cuda_ci": HWBackend.CUDA,
"register_amd_ci": HWBackend.AMD,
}
class RegistryVisitor(ast.NodeVisitor):
def __init__(self, filename: str):
self.filename = filename
self.registries: list[CIRegistry] = []
def _collect_ci_registry(self, func_call: ast.Call):
if not isinstance(func_call.func, ast.Name):
return None
if func_call.func.id not in REGISTER_MAPPING:
return None
hw = REGISTER_MAPPING[func_call.func.id]
est_time, suite = None, None
for kw in func_call.keywords:
if kw.arg == "est_time":
if isinstance(kw.value, ast.Constant):
est_time = kw.value.value
elif kw.arg == "suite":
if isinstance(kw.value, ast.Constant):
suite = kw.value.value
for i, arg in enumerate(func_call.args):
if isinstance(arg, ast.Constant):
if i == 0:
est_time = arg.value
elif i == 1:
suite = arg.value
assert (
est_time is not None
), "esimation_time is required and should be a constant"
assert suite is not None, "suite is required and should be a constant"
return CIRegistry(
backend=hw, filename=self.filename, est_time=est_time, suite=suite
)
def visit_Module(self, node):
for stmt in node.body:
if not isinstance(stmt, ast.Expr) or not isinstance(stmt.value, ast.Call):
continue
cr = self._collect_ci_registry(stmt.value)
if cr is not None:
self.registries.append(cr)
self.generic_visit(node)
def ut_parse_one_file(filename: str) -> List[CIRegistry]:
with open(filename, "r") as f:
file_content = f.read()
tree = ast.parse(file_content, filename=filename)
visitor = RegistryVisitor(filename=filename)
visitor.visit(tree)
return visitor.registries
def collect_tests(files: list[str], sanity_check: bool = True) -> List[CIRegistry]:
ci_tests = []
for file in files:
registries = ut_parse_one_file(file)
if len(registries) == 0:
msg = f"No CI registry found in {file}"
if sanity_check:
raise ValueError(msg)
else:
warnings.warn(msg)
continue
ci_tests.extend(registries)
return ci_tests

View File

@@ -0,0 +1,171 @@
import os
import subprocess
import threading
import time
from dataclasses import dataclass
from typing import Callable, List, Optional
import psutil, signal, sys
def kill_process_tree(parent_pid, include_parent: bool = True, skip_pid: int = None):
"""Kill the process and all its child processes."""
# Remove sigchld handler to avoid spammy logs.
if threading.current_thread() is threading.main_thread():
signal.signal(signal.SIGCHLD, signal.SIG_DFL)
if parent_pid is None:
parent_pid = os.getpid()
include_parent = False
try:
itself = psutil.Process(parent_pid)
except psutil.NoSuchProcess:
return
children = itself.children(recursive=True)
for child in children:
if child.pid == skip_pid:
continue
try:
child.kill()
except psutil.NoSuchProcess:
pass
if include_parent:
try:
if parent_pid == os.getpid():
itself.kill()
sys.exit(0)
itself.kill()
# Sometime processes cannot be killed with SIGKILL (e.g, PID=1 launched by kubernetes),
# so we send an additional signal to kill them.
itself.send_signal(signal.SIGQUIT)
except psutil.NoSuchProcess:
pass
@dataclass
class TestFile:
name: str
estimated_time: float = 60
def run_with_timeout(
func: Callable,
args: tuple = (),
kwargs: Optional[dict] = None,
timeout: float = None,
):
"""Run a function with timeout."""
ret_value = []
def _target_func():
ret_value.append(func(*args, **(kwargs or {})))
t = threading.Thread(target=_target_func)
t.start()
t.join(timeout=timeout)
if t.is_alive():
raise TimeoutError()
if not ret_value:
raise RuntimeError()
return ret_value[0]
def run_unittest_files(
files: List[TestFile], timeout_per_file: float, continue_on_error: bool = False
):
"""
Run a list of test files.
Args:
files: List of TestFile objects to run
timeout_per_file: Timeout in seconds for each test file
continue_on_error: If True, continue running remaining tests even if one fails.
If False, stop at first failure (default behavior for PR tests).
"""
tic = time.perf_counter()
success = True
passed_tests = []
failed_tests = []
for i, file in enumerate(files):
filename, estimated_time = file.name, file.estimated_time
process = None
def run_one_file(filename):
nonlocal process
filename = os.path.join(os.getcwd(), filename)
print(
f".\n.\nBegin ({i}/{len(files) - 1}):\npython3 {filename}\n.\n.\n",
flush=True,
)
tic = time.perf_counter()
process = subprocess.Popen(
["python3", filename], stdout=None, stderr=None, env=os.environ
)
process.wait()
elapsed = time.perf_counter() - tic
print(
f".\n.\nEnd ({i}/{len(files) - 1}):\n{filename=}, {elapsed=:.0f}, {estimated_time=}\n.\n.\n",
flush=True,
)
return process.returncode
try:
ret_code = run_with_timeout(
run_one_file, args=(filename,), timeout=timeout_per_file
)
if ret_code != 0:
print(
f"\n✗ FAILED: {filename} returned exit code {ret_code}\n",
flush=True,
)
success = False
failed_tests.append((filename, f"exit code {ret_code}"))
if not continue_on_error:
# Stop at first failure for PR tests
break
# Otherwise continue to next test for nightly tests
else:
passed_tests.append(filename)
except TimeoutError:
kill_process_tree(process.pid)
time.sleep(5)
print(
f"\n✗ TIMEOUT: {filename} after {timeout_per_file} seconds\n",
flush=True,
)
success = False
failed_tests.append((filename, f"timeout after {timeout_per_file}s"))
if not continue_on_error:
# Stop at first timeout for PR tests
break
# Otherwise continue to next test for nightly tests
if success:
print(f"Success. Time elapsed: {time.perf_counter() - tic:.2f}s", flush=True)
else:
print(f"Fail. Time elapsed: {time.perf_counter() - tic:.2f}s", flush=True)
# Print summary
print(f"\n{'='*60}", flush=True)
print(f"Test Summary: {len(passed_tests)}/{len(files)} passed", flush=True)
print(f"{'='*60}", flush=True)
if passed_tests:
print("✓ PASSED:", flush=True)
for test in passed_tests:
print(f" {test}", flush=True)
if failed_tests:
print("\n✗ FAILED:", flush=True)
for test, reason in failed_tests:
print(f" {test} ({reason})", flush=True)
print(f"{'='*60}\n", flush=True)
return 0 if success else -1

View File

@@ -0,0 +1,4 @@
"""Per-commit tests for KT-Kernel.
Tests in this directory are run on every commit in CI.
"""

View File

@@ -0,0 +1,36 @@
"""AMD/ROCm backend tests for KT-Kernel (Placeholder).
This file is a placeholder for future AMD/ROCm backend tests.
Currently, KT-Kernel focuses on CPU optimizations (Intel AMX/AVX512).
To implement AMD tests:
1. Add actual test functions with @pytest.mark.amd
2. Update the estimated time in register_amd_ci()
3. Implement AMD/ROCm-specific initialization and validation tests
"""
import os
import sys
# Add parent directory to path for CI registration
sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
from ci.ci_register import register_amd_ci
# Register this test for AMD CI (estimated time: 10 seconds, placeholder)
# Update suite name when implementing: currently using "stage-a-test-1"
register_amd_ci(est_time=10, suite="stage-a-test-1")
def test_amd_placeholder():
"""Placeholder test for AMD/ROCm backend.
TODO: Implement actual AMD/ROCm tests when AMD support is added to kt-kernel.
"""
# Currently a no-op placeholder
pass
if __name__ == "__main__":
# Allow running standalone (required by test runner)
print("⚠ AMD/ROCm tests are not yet implemented (placeholder)")
print("✓ Placeholder test passed")

View File

@@ -0,0 +1,80 @@
"""Basic CPU backend tests for KT-Kernel.
These tests verify basic functionality without requiring model files.
"""
import os
import sys
import pytest
# Add parent directory to path for CI registration
sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
from ci.ci_register import register_cpu_ci
# Register this test for CPU CI with estimated runtime of 30 seconds
register_cpu_ci(est_time=30, suite="default")
# Check if kt_kernel_ext is available
try:
import kt_kernel_ext
HAS_KT_KERNEL = True
except ImportError:
HAS_KT_KERNEL = False
kt_kernel_ext = None
@pytest.mark.cpu
def test_kt_kernel_import():
"""Test that kt_kernel_ext can be imported."""
if not HAS_KT_KERNEL:
pytest.skip("kt_kernel_ext not built or available")
assert kt_kernel_ext is not None, "kt_kernel_ext module should be importable"
@pytest.mark.cpu
def test_cpu_infer_initialization():
"""Test that CPUInfer can be initialized."""
if not HAS_KT_KERNEL:
pytest.skip("kt_kernel_ext not built or available")
# Initialize CPUInfer with 4 threads
cpuinfer = kt_kernel_ext.CPUInfer(4)
assert cpuinfer is not None, "CPUInfer should be initialized successfully"
@pytest.mark.cpu
def test_basic_module_attributes():
"""Test that kt_kernel_ext has expected attributes."""
if not HAS_KT_KERNEL:
pytest.skip("kt_kernel_ext not built or available")
# Check for key attributes/functions
assert hasattr(kt_kernel_ext, 'CPUInfer'), "kt_kernel_ext should have CPUInfer class"
def run_all_tests():
"""Run all tests in this file (for standalone execution)."""
if not HAS_KT_KERNEL:
print("⚠ kt_kernel_ext not available, skipping tests")
return
try:
test_kt_kernel_import()
print("✓ test_kt_kernel_import passed")
test_cpu_infer_initialization()
print("✓ test_cpu_infer_initialization passed")
test_basic_module_attributes()
print("✓ test_basic_module_attributes passed")
print("\n✓ All tests passed!")
except Exception as e:
print(f"\n✗ Test failed: {e}")
sys.exit(1)
if __name__ == "__main__":
# Allow running standalone (required by test runner)
run_all_tests()

View File

@@ -0,0 +1,36 @@
"""CUDA backend tests for KT-Kernel (Placeholder).
This file is a placeholder for future CUDA backend tests.
Currently, KT-Kernel focuses on CPU optimizations (Intel AMX/AVX512).
To implement CUDA tests:
1. Add actual test functions with @pytest.mark.cuda
2. Update the estimated time in register_cuda_ci()
3. Implement CUDA-specific initialization and validation tests
"""
import os
import sys
# Add parent directory to path for CI registration
sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
from ci.ci_register import register_cuda_ci
# Register this test for CUDA CI (estimated time: 10 seconds, placeholder)
# Update suite name when implementing: currently using "stage-a-test-1"
register_cuda_ci(est_time=10, suite="stage-a-test-1")
def test_cuda_placeholder():
"""Placeholder test for CUDA backend.
TODO: Implement actual CUDA tests when CUDA support is added to kt-kernel.
"""
# Currently a no-op placeholder
pass
if __name__ == "__main__":
# Allow running standalone (required by test runner)
print("⚠ CUDA tests are not yet implemented (placeholder)")
print("✓ Placeholder test passed")

View File

@@ -0,0 +1,68 @@
import argparse
import glob
from typing import List
from ci.ci_register import HWBackend, CIRegistry, collect_tests
from ci.ci_utils import TestFile, run_unittest_files
HW_MAPPING = {
"cpu": HWBackend.CPU,
"cuda": HWBackend.CUDA,
"amd": HWBackend.AMD,
}
LABEL_MAPPING = {
HWBackend.CPU: ["default"],
HWBackend.AMD: ["stage-a-test-1"],
HWBackend.CUDA: ["stage-a-test-1"],
}
def _filter_tests(
ci_tests: List[CIRegistry], hw: HWBackend, suite: str
) -> List[CIRegistry]:
ci_tests = [t for t in ci_tests if t.backend == hw]
ret = []
for t in ci_tests:
assert t.suite in LABEL_MAPPING[hw], f"Unknown stage {t.suite} for backend {hw}"
if t.suite == suite:
ret.append(t)
return ret
def run_per_commit(hw: HWBackend, suite: str):
files = glob.glob("per_commit/**/*.py", recursive=True)
# Exclude __init__.py files as they don't contain test registrations
files = [f for f in files if not f.endswith("__init__.py")]
ci_tests = _filter_tests(collect_tests(files), hw, suite)
test_files = [TestFile(t.filename, t.est_time) for t in ci_tests]
run_unittest_files(
test_files,
timeout_per_file=1200,
continue_on_error=False,
)
def main():
parser = argparse.ArgumentParser()
parser.add_argument(
"--hw",
type=str,
choices=["cpu", "cuda", "amd"],
required=True,
help="Hardware backend to run tests on.",
)
parser.add_argument(
"--suite",
type=str,
required=True,
help="Test suite to run.",
)
args = parser.parse_args()
hw = HW_MAPPING[args.hw]
run_per_commit(hw, args.suite)
if __name__ == "__main__":
main()