diff --git a/kt-kernel/test/per_commit/test_moe_amx_accuracy_int4.py b/kt-kernel/test/per_commit/test_moe_amx_accuracy_int4.py index f6d5500..5ad4ee2 100644 --- a/kt-kernel/test/per_commit/test_moe_amx_accuracy_int4.py +++ b/kt-kernel/test/per_commit/test_moe_amx_accuracy_int4.py @@ -99,7 +99,7 @@ def test_moe_amx_int4_accuracy(): data=range(expert_num), device="cpu", dtype=torch.int64 ).contiguous() - CPUInfer = kt_kernel_ext.CPUInfer(90) + CPUInfer = kt_kernel_ext.CPUInfer(60) with torch.inference_mode(mode=True): # Initialize MoE layers diff --git a/kt-kernel/test/per_commit/test_moe_amx_accuracy_int4_1.py b/kt-kernel/test/per_commit/test_moe_amx_accuracy_int4_1.py index 49d551e..42026ee 100644 --- a/kt-kernel/test/per_commit/test_moe_amx_accuracy_int4_1.py +++ b/kt-kernel/test/per_commit/test_moe_amx_accuracy_int4_1.py @@ -99,7 +99,7 @@ def test_moe_amx_int4_1_accuracy(): data=range(expert_num), device="cpu", dtype=torch.int64 ).contiguous() - CPUInfer = kt_kernel_ext.CPUInfer(90) + CPUInfer = kt_kernel_ext.CPUInfer(60) with torch.inference_mode(mode=True): # Initialize MoE layers diff --git a/kt-kernel/test/per_commit/test_moe_amx_accuracy_int4_1k.py b/kt-kernel/test/per_commit/test_moe_amx_accuracy_int4_1k.py index 1eac2a4..4afdbe8 100644 --- a/kt-kernel/test/per_commit/test_moe_amx_accuracy_int4_1k.py +++ b/kt-kernel/test/per_commit/test_moe_amx_accuracy_int4_1k.py @@ -100,7 +100,7 @@ def test_moe_amx_int4_1k_accuracy(): data=range(expert_num), device="cpu", dtype=torch.int64 ).contiguous() - CPUInfer = kt_kernel_ext.CPUInfer(90) + CPUInfer = kt_kernel_ext.CPUInfer(60) with torch.inference_mode(mode=True): # Initialize MoE layers diff --git a/kt-kernel/test/per_commit/test_moe_amx_accuracy_int8.py b/kt-kernel/test/per_commit/test_moe_amx_accuracy_int8.py index 66c71f5..b6527c8 100644 --- a/kt-kernel/test/per_commit/test_moe_amx_accuracy_int8.py +++ b/kt-kernel/test/per_commit/test_moe_amx_accuracy_int8.py @@ -99,7 +99,7 @@ def test_moe_amx_int8_accuracy(): data=range(expert_num), device="cpu", dtype=torch.int64 ).contiguous() - CPUInfer = kt_kernel_ext.CPUInfer(90) + CPUInfer = kt_kernel_ext.CPUInfer(60) with torch.inference_mode(mode=True): # Initialize MoE layers diff --git a/kt-kernel/test/per_commit/test_moe_amx_bench_int4.py b/kt-kernel/test/per_commit/test_moe_amx_bench_int4.py index e6267ab..504060d 100644 --- a/kt-kernel/test/per_commit/test_moe_amx_bench_int4.py +++ b/kt-kernel/test/per_commit/test_moe_amx_bench_int4.py @@ -45,9 +45,9 @@ test_iter = 2000 worker_config_dict = { "subpool_count": 2, "subpool_numa_map": [0, 1], - "subpool_thread_count": [45, 45], + "subpool_thread_count": [30, 30], } -CPUINFER_PARAM = 90 +CPUINFER_PARAM = 60 def get_git_commit(): diff --git a/kt-kernel/test/per_commit/test_moe_amx_bench_int4_1.py b/kt-kernel/test/per_commit/test_moe_amx_bench_int4_1.py deleted file mode 100644 index 796863a..0000000 --- a/kt-kernel/test/per_commit/test_moe_amx_bench_int4_1.py +++ /dev/null @@ -1,313 +0,0 @@ -#!/usr/bin/env python -# coding=utf-8 -"""AMX MOE INT4_1 benchmark tests for KT-Kernel. - -Benchmarks performance (bandwidth and FLOPS) of AMX-accelerated INT4_1 MOE operations. -""" - -import os -import sys -import time -import json -import subprocess -import platform -import pytest - -# Add parent directory to path for CI registration -sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..")) -from ci.ci_register import register_cpu_ci - -# Register this test for CPU CI with estimated runtime of 300 seconds -register_cpu_ci(est_time=300, suite="default") - -# Check if dependencies are available -try: - import torch - import kt_kernel_ext - from tqdm import tqdm - HAS_DEPS = True -except ImportError as e: - HAS_DEPS = False - import_error = str(e) - -# Test parameters (from original bench_moe_amx.py) -expert_num = 16 -hidden_size = 7168 -intermediate_size = 2048 -max_len = 25600 -num_experts_per_tok = 8 -layer_num = 2 -qlen = 2048 -warm_up_iter = 1000 -test_iter = 2000 - -# Worker configuration -worker_config_dict = { - "subpool_count": 2, - "subpool_numa_map": [0, 1], - "subpool_thread_count": [45, 45], -} -CPUINFER_PARAM = 90 - - -def get_git_commit(): - """Get current git commit information.""" - result = {} - try: - commit = subprocess.check_output(["git", "rev-parse", "HEAD"]).decode("utf-8").strip() - commit_msg = subprocess.check_output(["git", "log", "-1", "--pretty=%B"]).decode("utf-8").strip() - result["commit"] = commit - result["commit_message"] = commit_msg - - dirty_output = subprocess.check_output(["git", "status", "--porcelain"]).decode("utf-8").strip() - if dirty_output: - result["dirty"] = True - result["dirty_files"] = dirty_output.splitlines() - else: - result["dirty"] = False - except Exception as e: - result["commit"] = None - result["commit_message"] = None - result["dirty"] = None - result["error"] = str(e) - return result - - -def get_system_info(): - """Get system information including CPU model, memory, cores, and sockets.""" - info = {} - uname = platform.uname() - info["system_name"] = uname.system - info["node_name"] = uname.node - - # Get CPU model (Linux only) - cpu_model = None - if os.path.exists("/proc/cpuinfo"): - try: - with open("/proc/cpuinfo", "r") as f: - for line in f: - if "model name" in line: - cpu_model = line.split(":", 1)[1].strip() - break - except Exception as e: - cpu_model = f"Error: {e}" - info["cpu_model"] = cpu_model - - # Get memory size in GB (Linux only) - mem_total_gb = None - if os.path.exists("/proc/meminfo"): - try: - with open("/proc/meminfo", "r") as f: - for line in f: - if "MemTotal" in line: - mem_kb = float(line.split(":", 1)[1].split()[0]) - mem_total_gb = round(mem_kb / (1024 * 1024), 2) - break - except Exception as e: - mem_total_gb = f"Error: {e}" - info["memory_size_GB"] = mem_total_gb - - # Get CPU core count - info["cpu_core_count"] = os.cpu_count() - - # Get socket count - sockets = set() - if os.path.exists("/proc/cpuinfo"): - try: - with open("/proc/cpuinfo", "r") as f: - for line in f: - if "physical id" in line: - sockets.add(line.split(":", 1)[1].strip()) - except Exception as e: - sockets = set() - info["cpu_socket_count"] = len(sockets) if len(sockets) > 0 else 1 - - return info - - -def record_results(result, filename): - """Append results to JSONL file.""" - with open(filename, "a") as f: - f.write(json.dumps(result) + "\n") - - -@pytest.mark.cpu -def test_moe_amx_int4_1_benchmark(): - """Benchmark AMX INT4_1 MOE performance.""" - if not HAS_DEPS: - pytest.skip(f"Dependencies not available: {import_error}") - - quant_mode = "int4_1" - bytes_per_elem = 0.5 - - # Setup output file - script_dir = os.path.dirname(os.path.abspath(__file__)) - json_path = os.path.join(script_dir, "bench_moe_amx_int4_1.jsonl") - - with torch.inference_mode(): - # Initialize CPUInfer with worker config - worker_config = kt_kernel_ext.WorkerPoolConfig() - worker_config.subpool_count = worker_config_dict["subpool_count"] - worker_config.subpool_numa_map = worker_config_dict["subpool_numa_map"] - worker_config.subpool_thread_count = worker_config_dict["subpool_thread_count"] - CPUInfer = kt_kernel_ext.CPUInfer(worker_config) - - # Initialize MOE layers - moes = [] - for layer_index in range(layer_num): - gate_proj = ( - torch.randn((expert_num, intermediate_size, hidden_size), dtype=torch.float32, device="cuda") - .to("cpu") - .contiguous() - ) - up_proj = ( - torch.randn((expert_num, intermediate_size, hidden_size), dtype=torch.float32, device="cuda") - .to("cpu") - .contiguous() - ) - down_proj = ( - torch.randn((expert_num, hidden_size, intermediate_size), dtype=torch.float32, device="cuda") - .to("cpu") - .contiguous() - ) - config = kt_kernel_ext.moe.MOEConfig(expert_num, num_experts_per_tok, hidden_size, intermediate_size, 0) - config.max_len = max_len - config.gate_proj = gate_proj.data_ptr() - config.up_proj = up_proj.data_ptr() - config.down_proj = down_proj.data_ptr() - config.pool = CPUInfer.backend_ - - moe = kt_kernel_ext.moe.AMXInt4_1_MOE(config) - CPUInfer.submit(moe.load_weights_task()) - CPUInfer.sync() - moes.append(moe) - - # Generate test data - gen_iter = 3000 - expert_ids = ( - torch.rand(gen_iter * qlen, expert_num, device="cpu") - .argsort(dim=-1)[:, :num_experts_per_tok] - .reshape(gen_iter, qlen * num_experts_per_tok) - .to("cpu") - .contiguous() - ) - weights = ( - torch.rand((gen_iter, qlen, num_experts_per_tok), dtype=torch.float32, device="cpu").to("cpu").contiguous() - ) - input_tensor = ( - torch.randn((layer_num, qlen, hidden_size), dtype=torch.bfloat16, device="cuda").to("cpu").contiguous() - ) - output_tensor = ( - torch.empty((layer_num, qlen, hidden_size), dtype=torch.bfloat16, device="cuda").to("cpu").contiguous() - ) - bsz_tensor = torch.tensor([qlen], device="cpu") - - # Warm-up iterations - print(f"Running warm-up for {warm_up_iter} iterations...") - for i in tqdm(range(warm_up_iter), desc="Warm-up"): - CPUInfer.submit( - moes[i % layer_num].forward_task( - bsz_tensor.data_ptr(), - num_experts_per_tok, - expert_ids[i % gen_iter].data_ptr(), - weights[i % gen_iter].data_ptr(), - input_tensor[i % layer_num].data_ptr(), - output_tensor[i % layer_num].data_ptr(), - False, - ) - ) - CPUInfer.sync() - - # Test iterations - print(f"Running test for {test_iter} iterations...") - start = time.perf_counter() - for i in tqdm(range(test_iter), desc="Testing"): - CPUInfer.submit( - moes[i % layer_num].forward_task( - bsz_tensor.data_ptr(), - num_experts_per_tok, - expert_ids[i % gen_iter].data_ptr(), - weights[i % gen_iter].data_ptr(), - input_tensor[i % layer_num].data_ptr(), - output_tensor[i % layer_num].data_ptr(), - False, - ) - ) - CPUInfer.sync() - end = time.perf_counter() - total_time = end - start - - # Calculate performance metrics - time_per_iter_us = total_time / test_iter * 1e6 - bandwidth = ( - hidden_size - * intermediate_size - * 3 - * num_experts_per_tok - * (1 / 8 * 256 * (1 - (31 / 32) ** qlen)) - * bytes_per_elem - * test_iter - / total_time - / 1e9 - ) # GB/s - flops = ( - hidden_size * intermediate_size * qlen * 3 * num_experts_per_tok * 2 * test_iter / total_time / 1e12 - ) # TFLOPS - - print("Quant mode: ", quant_mode) - print("Time(s): ", total_time) - print("Iteration: ", test_iter) - print("Time(us) per iteration: ", time_per_iter_us) - print("Bandwidth: ", bandwidth, "GB/s") - print("Flops: ", flops, "TFLOPS") - - # Record results - result = { - "quant_mode": quant_mode, - "total_time_seconds": total_time, - "iterations": test_iter, - "time_per_iteration_us": time_per_iter_us, - "bandwidth_GBs": bandwidth, - "flops_TFLOPS": flops, - "timestamp": time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()), - "test_parameters": { - "expert_num": expert_num, - "hidden_size": hidden_size, - "intermediate_size": intermediate_size, - "max_len": max_len, - "num_experts_per_tok": num_experts_per_tok, - "layer_num": layer_num, - "qlen": qlen, - "warm_up_iter": warm_up_iter, - "test_iter": test_iter, - "CPUInfer_parameter": CPUINFER_PARAM, - }, - } - result.update(get_git_commit()) - result.update(get_system_info()) - record_results(result, json_path) - - print(f"Results saved to {json_path}") - - -def run_all_tests(): - """Run all tests in this file (for standalone execution).""" - if not HAS_DEPS: - print(f"⚠ Dependencies not available: {import_error}") - print("Skipping AMX MOE INT4_1 benchmark tests") - return - - try: - print("Running AMX MOE INT4_1 benchmark test...") - test_moe_amx_int4_1_benchmark() - print("✓ AMX MOE INT4_1 benchmark test passed") - print("\n✓ All tests passed!") - except Exception as e: - print(f"\n✗ Test failed: {e}") - import traceback - traceback.print_exc() - sys.exit(1) - - -if __name__ == "__main__": - run_all_tests() diff --git a/kt-kernel/test/per_commit/test_moe_amx_bench_int4_1k.py b/kt-kernel/test/per_commit/test_moe_amx_bench_int4_1k.py deleted file mode 100644 index 764f0b1..0000000 --- a/kt-kernel/test/per_commit/test_moe_amx_bench_int4_1k.py +++ /dev/null @@ -1,320 +0,0 @@ -#!/usr/bin/env python -# coding=utf-8 -"""AMX MOE INT4_1K benchmark tests for KT-Kernel. - -Benchmarks performance (bandwidth and FLOPS) of AMX-accelerated INT4_1K group quantization MOE operations. -""" - -import os -import sys -import time -import json -import subprocess -import platform -import pytest - -# Add parent directory to path for CI registration -sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..")) -from ci.ci_register import register_cpu_ci - -# Register this test for CPU CI with estimated runtime of 300 seconds -register_cpu_ci(est_time=300, suite="default") - -# Check if dependencies are available -try: - import torch - import kt_kernel_ext - from tqdm import tqdm - HAS_DEPS = True -except ImportError as e: - HAS_DEPS = False - import_error = str(e) - -# Test parameters (from original bench_moe_amx.py) -expert_num = 16 -hidden_size = 7168 -intermediate_size = 2048 -max_len = 25600 -num_experts_per_tok = 8 -layer_num = 2 -qlen = 2048 -warm_up_iter = 1000 -test_iter = 2000 -k_group_size = 64 - -# Worker configuration -worker_config_dict = { - "subpool_count": 2, - "subpool_numa_map": [0, 1], - "subpool_thread_count": [45, 45], -} -CPUINFER_PARAM = 90 - - -def get_git_commit(): - """Get current git commit information.""" - result = {} - try: - commit = subprocess.check_output(["git", "rev-parse", "HEAD"]).decode("utf-8").strip() - commit_msg = subprocess.check_output(["git", "log", "-1", "--pretty=%B"]).decode("utf-8").strip() - result["commit"] = commit - result["commit_message"] = commit_msg - - dirty_output = subprocess.check_output(["git", "status", "--porcelain"]).decode("utf-8").strip() - if dirty_output: - result["dirty"] = True - result["dirty_files"] = dirty_output.splitlines() - else: - result["dirty"] = False - except Exception as e: - result["commit"] = None - result["commit_message"] = None - result["dirty"] = None - result["error"] = str(e) - return result - - -def get_system_info(): - """Get system information including CPU model, memory, cores, and sockets.""" - info = {} - uname = platform.uname() - info["system_name"] = uname.system - info["node_name"] = uname.node - - # Get CPU model (Linux only) - cpu_model = None - if os.path.exists("/proc/cpuinfo"): - try: - with open("/proc/cpuinfo", "r") as f: - for line in f: - if "model name" in line: - cpu_model = line.split(":", 1)[1].strip() - break - except Exception as e: - cpu_model = f"Error: {e}" - info["cpu_model"] = cpu_model - - # Get memory size in GB (Linux only) - mem_total_gb = None - if os.path.exists("/proc/meminfo"): - try: - with open("/proc/meminfo", "r") as f: - for line in f: - if "MemTotal" in line: - mem_kb = float(line.split(":", 1)[1].split()[0]) - mem_total_gb = round(mem_kb / (1024 * 1024), 2) - break - except Exception as e: - mem_total_gb = f"Error: {e}" - info["memory_size_GB"] = mem_total_gb - - # Get CPU core count - info["cpu_core_count"] = os.cpu_count() - - # Get socket count - sockets = set() - if os.path.exists("/proc/cpuinfo"): - try: - with open("/proc/cpuinfo", "r") as f: - for line in f: - if "physical id" in line: - sockets.add(line.split(":", 1)[1].strip()) - except Exception as e: - sockets = set() - info["cpu_socket_count"] = len(sockets) if len(sockets) > 0 else 1 - - return info - - -def record_results(result, filename): - """Append results to JSONL file.""" - with open(filename, "a") as f: - f.write(json.dumps(result) + "\n") - - -@pytest.mark.cpu -def test_moe_amx_int4_1k_benchmark(): - """Benchmark AMX INT4_1K MOE performance.""" - if not HAS_DEPS: - pytest.skip(f"Dependencies not available: {import_error}") - - quant_mode = "int4_1k" - bytes_per_elem = 0.5 - - # Setup output file - script_dir = os.path.dirname(os.path.abspath(__file__)) - json_path = os.path.join(script_dir, "bench_moe_amx_int4_1k.jsonl") - - with torch.inference_mode(): - # Initialize CPUInfer with worker config - worker_config = kt_kernel_ext.WorkerPoolConfig() - worker_config.subpool_count = worker_config_dict["subpool_count"] - worker_config.subpool_numa_map = worker_config_dict["subpool_numa_map"] - worker_config.subpool_thread_count = worker_config_dict["subpool_thread_count"] - CPUInfer = kt_kernel_ext.CPUInfer(worker_config) - - # Initialize MOE layers - moes = [] - for layer_index in range(layer_num): - gate_proj = ( - torch.randn((expert_num, intermediate_size, hidden_size), dtype=torch.float32, device="cuda") - .to("cpu") - .contiguous() - ) - up_proj = ( - torch.randn((expert_num, intermediate_size, hidden_size), dtype=torch.float32, device="cuda") - .to("cpu") - .contiguous() - ) - down_proj = ( - torch.randn((expert_num, hidden_size, intermediate_size), dtype=torch.float32, device="cuda") - .to("cpu") - .contiguous() - ) - config = kt_kernel_ext.moe.MOEConfig(expert_num, num_experts_per_tok, hidden_size, intermediate_size, 0) - config.max_len = max_len - config.gate_proj = gate_proj.data_ptr() - config.up_proj = up_proj.data_ptr() - config.down_proj = down_proj.data_ptr() - config.pool = CPUInfer.backend_ - - # Configure INT4_1K quantization settings - config.quant_config.bits = 4 - config.quant_config.group_size = k_group_size - config.quant_config.zero_point = True - - moe = kt_kernel_ext.moe.AMXInt4_1KGroup_MOE(config) - CPUInfer.submit(moe.load_weights_task()) - CPUInfer.sync() - moes.append(moe) - - # Generate test data - gen_iter = 3000 - expert_ids = ( - torch.rand(gen_iter * qlen, expert_num, device="cpu") - .argsort(dim=-1)[:, :num_experts_per_tok] - .reshape(gen_iter, qlen * num_experts_per_tok) - .to("cpu") - .contiguous() - ) - weights = ( - torch.rand((gen_iter, qlen, num_experts_per_tok), dtype=torch.float32, device="cpu").to("cpu").contiguous() - ) - input_tensor = ( - torch.randn((layer_num, qlen, hidden_size), dtype=torch.bfloat16, device="cuda").to("cpu").contiguous() - ) - output_tensor = ( - torch.empty((layer_num, qlen, hidden_size), dtype=torch.bfloat16, device="cuda").to("cpu").contiguous() - ) - bsz_tensor = torch.tensor([qlen], device="cpu") - - # Warm-up iterations - print(f"Running warm-up for {warm_up_iter} iterations...") - for i in tqdm(range(warm_up_iter), desc="Warm-up"): - CPUInfer.submit( - moes[i % layer_num].forward_task( - bsz_tensor.data_ptr(), - num_experts_per_tok, - expert_ids[i % gen_iter].data_ptr(), - weights[i % gen_iter].data_ptr(), - input_tensor[i % layer_num].data_ptr(), - output_tensor[i % layer_num].data_ptr(), - False, - ) - ) - CPUInfer.sync() - - # Test iterations - print(f"Running test for {test_iter} iterations...") - start = time.perf_counter() - for i in tqdm(range(test_iter), desc="Testing"): - CPUInfer.submit( - moes[i % layer_num].forward_task( - bsz_tensor.data_ptr(), - num_experts_per_tok, - expert_ids[i % gen_iter].data_ptr(), - weights[i % gen_iter].data_ptr(), - input_tensor[i % layer_num].data_ptr(), - output_tensor[i % layer_num].data_ptr(), - False, - ) - ) - CPUInfer.sync() - end = time.perf_counter() - total_time = end - start - - # Calculate performance metrics - time_per_iter_us = total_time / test_iter * 1e6 - bandwidth = ( - hidden_size - * intermediate_size - * 3 - * num_experts_per_tok - * (1 / 8 * 256 * (1 - (31 / 32) ** qlen)) - * bytes_per_elem - * test_iter - / total_time - / 1e9 - ) # GB/s - flops = ( - hidden_size * intermediate_size * qlen * 3 * num_experts_per_tok * 2 * test_iter / total_time / 1e12 - ) # TFLOPS - - print("Quant mode: ", quant_mode) - print("Time(s): ", total_time) - print("Iteration: ", test_iter) - print("Time(us) per iteration: ", time_per_iter_us) - print("Bandwidth: ", bandwidth, "GB/s") - print("Flops: ", flops, "TFLOPS") - - # Record results - result = { - "quant_mode": quant_mode, - "total_time_seconds": total_time, - "iterations": test_iter, - "time_per_iteration_us": time_per_iter_us, - "bandwidth_GBs": bandwidth, - "flops_TFLOPS": flops, - "timestamp": time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()), - "test_parameters": { - "expert_num": expert_num, - "hidden_size": hidden_size, - "intermediate_size": intermediate_size, - "max_len": max_len, - "num_experts_per_tok": num_experts_per_tok, - "layer_num": layer_num, - "qlen": qlen, - "warm_up_iter": warm_up_iter, - "test_iter": test_iter, - "k_group_size": k_group_size, - "CPUInfer_parameter": CPUINFER_PARAM, - }, - } - result.update(get_git_commit()) - result.update(get_system_info()) - record_results(result, json_path) - - print(f"Results saved to {json_path}") - - -def run_all_tests(): - """Run all tests in this file (for standalone execution).""" - if not HAS_DEPS: - print(f"⚠ Dependencies not available: {import_error}") - print("Skipping AMX MOE INT4_1K benchmark tests") - return - - try: - print("Running AMX MOE INT4_1K benchmark test...") - test_moe_amx_int4_1k_benchmark() - print("✓ AMX MOE INT4_1K benchmark test passed") - print("\n✓ All tests passed!") - except Exception as e: - print(f"\n✗ Test failed: {e}") - import traceback - traceback.print_exc() - sys.exit(1) - - -if __name__ == "__main__": - run_all_tests() diff --git a/kt-kernel/test/per_commit/test_moe_amx_bench_int8.py b/kt-kernel/test/per_commit/test_moe_amx_bench_int8.py index ef3d7f0..2ad7423 100644 --- a/kt-kernel/test/per_commit/test_moe_amx_bench_int8.py +++ b/kt-kernel/test/per_commit/test_moe_amx_bench_int8.py @@ -45,9 +45,10 @@ test_iter = 2000 worker_config_dict = { "subpool_count": 2, "subpool_numa_map": [0, 1], - "subpool_thread_count": [45, 45], + "subpool_thread_count": [30, 30], } -CPUINFER_PARAM = 90 +CPUINFER_PARAM = 60 + def get_git_commit(): diff --git a/kt-kernel/test/run_suite.py b/kt-kernel/test/run_suite.py index 3de70af..ec8c81a 100644 --- a/kt-kernel/test/run_suite.py +++ b/kt-kernel/test/run_suite.py @@ -1,5 +1,6 @@ import argparse import glob +import sys from typing import List from ci.ci_register import HWBackend, CIRegistry, collect_tests @@ -37,7 +38,7 @@ def run_per_commit(hw: HWBackend, suite: str): ci_tests = _filter_tests(collect_tests(files), hw, suite) test_files = [TestFile(t.filename, t.est_time) for t in ci_tests] - run_unittest_files( + return run_unittest_files( test_files, timeout_per_file=1200, continue_on_error=False, @@ -61,7 +62,10 @@ def main(): ) args = parser.parse_args() hw = HW_MAPPING[args.hw] - run_per_commit(hw, args.suite) + exit_code = run_per_commit(hw, args.suite) + # run_unittest_files returns 0 for success, -1 for failure + # Convert to standard exit codes: 0 for success, 1 for failure + sys.exit(0 if exit_code == 0 else 1) if __name__ == "__main__":