mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-13 17:55:48 +00:00
Introduce dependency-based CI test selection. (#2377)
* Selective test filter initial commit. * Expanded folder paths for parsing ninja dependencies. * Fixing default branch name in the test evaluation script. * Fixing paths for robustness and adding ctest command to the launch script. * change jenkins file and few tests to upgrade CI * Setting ninja build path. * Fixing typo in Jenkinsfile, and wrong paths. * Fixing typo in launch script. * add few more tests to check CI logic * Fixing header for shell script. * turn off performance test by default, add option to run all unit tests * revert dummy changes in source code to trigger tests * make sure develop branch runs all unit tests --------- Co-authored-by: Vidyasagar Ananthan <vidyasagar.ananthan@amd.com>
This commit is contained in:
315
script/dependency-parser/src/enhanced_ninja_parser.py
Normal file
315
script/dependency-parser/src/enhanced_ninja_parser.py
Normal file
@@ -0,0 +1,315 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Enhanced Ninja Dependency Parser
|
||||
|
||||
This script combines ninja build file parsing with ninja -t deps to create a comprehensive
|
||||
mapping that includes both source files AND header files, and properly handles files
|
||||
used by multiple executables.
|
||||
"""
|
||||
|
||||
import re
|
||||
import os
|
||||
import sys
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
from collections import defaultdict
|
||||
import json
|
||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||
import threading
|
||||
|
||||
class EnhancedNinjaDependencyParser:
|
||||
def __init__(self, build_file_path, ninja_executable="ninja"):
|
||||
self.build_file_path = build_file_path
|
||||
self.build_dir = os.path.dirname(build_file_path)
|
||||
self.ninja_executable = ninja_executable
|
||||
|
||||
# Core data structures
|
||||
self.executable_to_objects = {} # exe -> [object_files]
|
||||
self.object_to_source = {} # object -> primary_source
|
||||
self.object_to_all_deps = {} # object -> [all_dependencies]
|
||||
self.file_to_executables = defaultdict(set) # file -> {executables}
|
||||
|
||||
# Thread safety
|
||||
self.lock = threading.Lock()
|
||||
|
||||
def parse_dependencies(self):
|
||||
"""Main method to parse all dependencies."""
|
||||
print(f"Parsing ninja dependencies from: {self.build_file_path}")
|
||||
|
||||
# Step 1: Parse build file for executable -> object mappings
|
||||
self._parse_build_file()
|
||||
|
||||
# Step 2: Get all object files and their dependencies
|
||||
print(f"Found {len(self.object_to_source)} object files")
|
||||
print("Extracting detailed dependencies for all object files...")
|
||||
self._extract_object_dependencies()
|
||||
|
||||
# Step 3: Build the final file -> executables mapping
|
||||
self._build_file_to_executable_mapping()
|
||||
|
||||
def _parse_build_file(self):
|
||||
"""Parse the ninja build file to extract executable -> object mappings."""
|
||||
print("Parsing ninja build file...")
|
||||
|
||||
with open(self.build_file_path, 'r') as f:
|
||||
content = f.read()
|
||||
# Parse executable build rules
|
||||
exe_pattern = r'^build (bin/[^:]+):\s+\S+\s+([^|]+)'
|
||||
obj_pattern = r'^build ([^:]+\.(?:cpp|cu|hip)\.o):\s+\S+\s+([^\s|]+)'
|
||||
|
||||
lines = content.split('\n')
|
||||
|
||||
for line in lines:
|
||||
# Match executable rules
|
||||
exe_match = re.match(exe_pattern, line)
|
||||
if exe_match and ('EXECUTABLE' in line or 'test_' in exe_match.group(1) or 'example_' in exe_match.group(1)):
|
||||
exe = exe_match.group(1)
|
||||
deps_part = exe_match.group(2).strip()
|
||||
|
||||
object_files = []
|
||||
for dep in deps_part.split():
|
||||
if dep.endswith('.o') and not dep.startswith('/'):
|
||||
object_files.append(dep)
|
||||
|
||||
self.executable_to_objects[exe] = object_files
|
||||
continue
|
||||
|
||||
# Match object compilation rules
|
||||
obj_match = re.match(obj_pattern, line)
|
||||
if obj_match:
|
||||
object_file = obj_match.group(1)
|
||||
source_file = obj_match.group(2)
|
||||
self.object_to_source[object_file] = source_file
|
||||
|
||||
print(f"Found {len(self.executable_to_objects)} executables")
|
||||
print(f"Found {len(self.object_to_source)} object-to-source mappings")
|
||||
|
||||
def _extract_object_dependencies(self):
|
||||
"""Extract detailed dependencies for all object files using ninja -t deps."""
|
||||
object_files = list(self.object_to_source.keys())
|
||||
# Process object files in parallel for better performance
|
||||
if not object_files:
|
||||
print("No object files found - skipping dependency extraction")
|
||||
return
|
||||
|
||||
max_workers = min(16, len(object_files)) # Limit concurrent processes
|
||||
|
||||
with ThreadPoolExecutor(max_workers=max_workers) as executor:
|
||||
# Submit all object files for processing
|
||||
future_to_obj = {
|
||||
executor.submit(self._get_object_dependencies, obj): obj
|
||||
for obj in object_files
|
||||
}
|
||||
# Process completed futures
|
||||
completed = 0
|
||||
for future in as_completed(future_to_obj):
|
||||
obj_file = future_to_obj[future]
|
||||
try:
|
||||
dependencies = future.result()
|
||||
with self.lock:
|
||||
self.object_to_all_deps[obj_file] = dependencies
|
||||
completed += 1
|
||||
if completed % 100 == 0:
|
||||
print(f"Processed {completed}/{len(object_files)} object files...")
|
||||
except Exception as e:
|
||||
print(f"Error processing {obj_file}: {e}")
|
||||
|
||||
print(f"Completed dependency extraction for {len(self.object_to_all_deps)} object files")
|
||||
|
||||
def _get_object_dependencies(self, object_file):
|
||||
"""Get all dependencies for a single object file using ninja -t deps."""
|
||||
try:
|
||||
# Run ninja -t deps for this object file
|
||||
cmd = [self.ninja_executable, "-t", "deps", object_file]
|
||||
result = subprocess.run(
|
||||
cmd,
|
||||
cwd=self.build_dir,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=30
|
||||
)
|
||||
|
||||
if result.returncode != 0:
|
||||
return []
|
||||
|
||||
dependencies = []
|
||||
lines = result.stdout.strip().split('\n')
|
||||
|
||||
for line in lines[1:]: # Skip first line with metadata
|
||||
line = line.strip()
|
||||
if line and not line.startswith('#'):
|
||||
# Convert absolute paths to relative paths from workspace root
|
||||
dep_file = line
|
||||
ws_root = getattr(self, "workspace_root", "..")
|
||||
ws_prefix = ws_root.rstrip("/") + "/"
|
||||
if dep_file.startswith(ws_prefix):
|
||||
dep_file = dep_file[len(ws_prefix):]
|
||||
dependencies.append(dep_file)
|
||||
|
||||
return dependencies
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error getting dependencies for {object_file}: {e}")
|
||||
return []
|
||||
|
||||
def _build_file_to_executable_mapping(self):
|
||||
"""Build the final mapping from files to executables."""
|
||||
print("Building file-to-executable mapping...")
|
||||
|
||||
for exe, object_files in self.executable_to_objects.items():
|
||||
for obj_file in object_files:
|
||||
# Add all dependencies of this object file
|
||||
if obj_file in self.object_to_all_deps:
|
||||
for dep_file in self.object_to_all_deps[obj_file]:
|
||||
# Filter out system files and focus on project files
|
||||
if self._is_project_file(dep_file):
|
||||
self.file_to_executables[dep_file].add(exe)
|
||||
|
||||
print(f"Built mapping for {len(self.file_to_executables)} files")
|
||||
|
||||
# Show statistics
|
||||
multi_exe_files = {f: exes for f, exes in self.file_to_executables.items() if len(exes) > 1}
|
||||
print(f"Files used by multiple executables: {len(multi_exe_files)}")
|
||||
|
||||
if multi_exe_files:
|
||||
print("Sample files with multiple dependencies:")
|
||||
for f, exes in sorted(multi_exe_files.items())[:5]:
|
||||
print(f" {f}: {len(exes)} executables")
|
||||
|
||||
def _is_project_file(self, file_path):
|
||||
"""Determine if a file is part of the project (not system files)."""
|
||||
# Include files that are clearly part of the project
|
||||
if any(file_path.startswith(prefix) for prefix in [
|
||||
'include/', 'library/', 'test/', 'example/', 'src/', 'profiler/',
|
||||
'build/include/', 'build/_deps/gtest', 'client_example', 'codegen', 'tile_engine'
|
||||
]):
|
||||
return True
|
||||
|
||||
# Exclude system files
|
||||
if any(file_path.startswith(prefix) for prefix in [
|
||||
'/usr/', '/opt/rocm', '/lib/', '/system/', '/local/'
|
||||
]):
|
||||
return False
|
||||
|
||||
# Include files with common source/header extensions
|
||||
if file_path.endswith(('.cpp', '.hpp', '.h', '.c', '.cc', '.cxx', '.cu', '.hip', '.inc')):
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
def export_to_csv(self, output_file):
|
||||
"""Export the file-to-executable mapping to CSV with proper comma separation."""
|
||||
print(f"Exporting mapping to {output_file}")
|
||||
|
||||
with open(output_file, 'w') as f:
|
||||
f.write("source_file,executables\n")
|
||||
for file_path in sorted(self.file_to_executables.keys()):
|
||||
executables = sorted(self.file_to_executables[file_path])
|
||||
# Use semicolon to separate multiple executables within the field
|
||||
exe_list = ';'.join(executables)
|
||||
f.write(f'"{file_path}","{exe_list}"\n')
|
||||
|
||||
def export_to_json(self, output_file):
|
||||
"""Export the complete mapping to JSON."""
|
||||
print(f"Exporting complete mapping to {output_file}")
|
||||
|
||||
# Build reverse mapping (executable -> files)
|
||||
exe_to_files = defaultdict(set)
|
||||
for file_path, exes in self.file_to_executables.items():
|
||||
for exe in exes:
|
||||
exe_to_files[exe].add(file_path)
|
||||
|
||||
mapping_data = {
|
||||
'file_to_executables': {
|
||||
file_path: list(exes) for file_path, exes in self.file_to_executables.items()
|
||||
},
|
||||
'executable_to_files': {
|
||||
exe: sorted(files) for exe, files in exe_to_files.items()
|
||||
},
|
||||
'statistics': {
|
||||
'total_files': len(self.file_to_executables),
|
||||
'total_executables': len(self.executable_to_objects),
|
||||
'total_object_files': len(self.object_to_source),
|
||||
'files_with_multiple_executables': len([f for f, exes in self.file_to_executables.items() if len(exes) > 1])
|
||||
}
|
||||
}
|
||||
|
||||
with open(output_file, 'w') as f:
|
||||
json.dump(mapping_data, f, indent=2)
|
||||
|
||||
def print_summary(self):
|
||||
"""Print a summary of the parsed dependencies."""
|
||||
print("\n=== Enhanced Dependency Mapping Summary ===")
|
||||
print(f"Total executables: {len(self.executable_to_objects)}")
|
||||
print(f"Total files mapped: {len(self.file_to_executables)}")
|
||||
print(f"Total object files processed: {len(self.object_to_all_deps)}")
|
||||
|
||||
# Files by type
|
||||
cpp_files = sum(1 for f in self.file_to_executables.keys() if f.endswith('.cpp'))
|
||||
hpp_files = sum(1 for f in self.file_to_executables.keys() if f.endswith('.hpp'))
|
||||
h_files = sum(1 for f in self.file_to_executables.keys() if f.endswith('.h'))
|
||||
|
||||
print(f"\nFile types:")
|
||||
print(f" .cpp files: {cpp_files}")
|
||||
print(f" .hpp files: {hpp_files}")
|
||||
print(f" .h files: {h_files}")
|
||||
|
||||
# Multi-executable files
|
||||
multi_exe_files = {f: exes for f, exes in self.file_to_executables.items() if len(exes) > 1}
|
||||
print(f"\nFiles used by multiple executables: {len(multi_exe_files)}")
|
||||
|
||||
if multi_exe_files:
|
||||
print("\nTop files with most dependencies:")
|
||||
sorted_multi = sorted(multi_exe_files.items(), key=lambda x: len(x[1]), reverse=True)
|
||||
for file_path, exes in sorted_multi[:10]:
|
||||
print(f" {file_path}: {len(exes)} executables")
|
||||
|
||||
def main():
|
||||
# Accept: build_file, ninja_path, workspace_root
|
||||
default_workspace_root = ".."
|
||||
if len(sys.argv) > 3:
|
||||
build_file = sys.argv[1]
|
||||
ninja_path = sys.argv[2]
|
||||
workspace_root = sys.argv[3]
|
||||
elif len(sys.argv) > 2:
|
||||
build_file = sys.argv[1]
|
||||
ninja_path = sys.argv[2]
|
||||
workspace_root = default_workspace_root
|
||||
elif len(sys.argv) > 1:
|
||||
build_file = sys.argv[1]
|
||||
ninja_path = "ninja"
|
||||
workspace_root = default_workspace_root
|
||||
else:
|
||||
build_file = f"{default_workspace_root}/build/build.ninja"
|
||||
ninja_path = "ninja"
|
||||
workspace_root = default_workspace_root
|
||||
|
||||
if not os.path.exists(build_file):
|
||||
print(f"Error: Build file not found: {build_file}")
|
||||
sys.exit(1)
|
||||
|
||||
try:
|
||||
subprocess.run([ninja_path, "--version"], capture_output=True, check=True)
|
||||
except (subprocess.CalledProcessError, FileNotFoundError):
|
||||
print(f"Error: ninja executable not found: {ninja_path}")
|
||||
sys.exit(1)
|
||||
|
||||
parser = EnhancedNinjaDependencyParser(build_file, ninja_path)
|
||||
parser.workspace_root = workspace_root # Attach for use in _get_object_dependencies
|
||||
parser.parse_dependencies()
|
||||
parser.print_summary()
|
||||
|
||||
# Export results
|
||||
output_dir = os.path.dirname(build_file)
|
||||
csv_file = os.path.join(output_dir, 'enhanced_file_executable_mapping.csv')
|
||||
json_file = os.path.join(output_dir, 'enhanced_dependency_mapping.json')
|
||||
|
||||
parser.export_to_csv(csv_file)
|
||||
parser.export_to_json(json_file)
|
||||
|
||||
print(f"\nResults exported to:")
|
||||
print(f" CSV: {csv_file}")
|
||||
print(f" JSON: {json_file}")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
136
script/dependency-parser/src/selective_test_filter.py
Normal file
136
script/dependency-parser/src/selective_test_filter.py
Normal file
@@ -0,0 +1,136 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Selective Test Filter Tool
|
||||
|
||||
Given two git refs (branches or commit IDs), this tool:
|
||||
- Identifies changed files between the refs
|
||||
- Loads the enhanced dependency mapping JSON (from enhanced_ninja_parser.py)
|
||||
- Maps changed files to affected test executables (optionally filtering for "test_" prefix)
|
||||
- Exports the list of tests to run to tests_to_run.json
|
||||
|
||||
Usage:
|
||||
python selective_test_filter.py <depmap_json> <ref1> <ref2> [--all | --test-prefix] [--output <output_json>]
|
||||
|
||||
Arguments:
|
||||
<depmap_json> Path to enhanced_dependency_mapping.json
|
||||
<ref1> Source git ref (branch or commit)
|
||||
<ref2> Target git ref (branch or commit)
|
||||
|
||||
Options:
|
||||
--all Include all executables (default)
|
||||
--test-prefix Only include executables starting with "test_"
|
||||
--output Output JSON file (default: tests_to_run.json)
|
||||
"""
|
||||
|
||||
import sys
|
||||
import subprocess
|
||||
import json
|
||||
import os
|
||||
|
||||
def get_changed_files(ref1, ref2):
|
||||
"""Return a set of files changed between two git refs."""
|
||||
try:
|
||||
result = subprocess.run(
|
||||
["git", "diff", "--name-only", ref1, ref2],
|
||||
capture_output=True, text=True, check=True
|
||||
)
|
||||
files = set(line.strip() for line in result.stdout.splitlines() if line.strip())
|
||||
return files
|
||||
except subprocess.CalledProcessError as e:
|
||||
print(f"Error running git diff: {e}")
|
||||
sys.exit(1)
|
||||
|
||||
def load_depmap(depmap_json):
|
||||
"""Load the dependency mapping JSON."""
|
||||
with open(depmap_json, "r") as f:
|
||||
data = json.load(f)
|
||||
# Support both old and new formats
|
||||
if "file_to_executables" in data:
|
||||
return data["file_to_executables"]
|
||||
return data
|
||||
|
||||
def select_tests(file_to_executables, changed_files, filter_mode):
|
||||
"""Return a set of test executables affected by changed files."""
|
||||
affected = set()
|
||||
for f in changed_files:
|
||||
if f in file_to_executables:
|
||||
for exe in file_to_executables[f]:
|
||||
if filter_mode == "all":
|
||||
affected.add(exe)
|
||||
elif filter_mode == "test_prefix" and exe.startswith("test_"):
|
||||
affected.add(exe)
|
||||
return sorted(affected)
|
||||
|
||||
def main():
|
||||
if "--audit" in sys.argv:
|
||||
if len(sys.argv) < 2:
|
||||
print("Usage: python selective_test_filter.py <depmap_json> --audit")
|
||||
sys.exit(1)
|
||||
depmap_json = sys.argv[1]
|
||||
if not os.path.exists(depmap_json):
|
||||
print(f"Dependency map JSON not found: {depmap_json}")
|
||||
sys.exit(1)
|
||||
file_to_executables = load_depmap(depmap_json)
|
||||
for f, exes in file_to_executables.items():
|
||||
print(f"{f}: {', '.join(exes)}")
|
||||
print(f"Total files: {len(file_to_executables)}")
|
||||
sys.exit(0)
|
||||
|
||||
if "--optimize-build" in sys.argv:
|
||||
if len(sys.argv) < 3:
|
||||
print("Usage: python selective_test_filter.py <depmap_json> --optimize-build <changed_file1> [<changed_file2> ...]")
|
||||
sys.exit(1)
|
||||
depmap_json = sys.argv[1]
|
||||
changed_files = set(sys.argv[sys.argv.index("--optimize-build") + 1 :])
|
||||
if not os.path.exists(depmap_json):
|
||||
print(f"Dependency map JSON not found: {depmap_json}")
|
||||
sys.exit(1)
|
||||
file_to_executables = load_depmap(depmap_json)
|
||||
affected_executables = set()
|
||||
for f in changed_files:
|
||||
if f in file_to_executables:
|
||||
affected_executables.update(file_to_executables[f])
|
||||
print("Affected executables:")
|
||||
for exe in sorted(affected_executables):
|
||||
print(exe)
|
||||
print(f"Total affected executables: {len(affected_executables)}")
|
||||
sys.exit(0)
|
||||
|
||||
if len(sys.argv) < 4:
|
||||
print("Usage: python selective_test_filter.py <depmap_json> <ref1> <ref2> [--all | --test-prefix] [--output <output_json>]")
|
||||
sys.exit(1)
|
||||
|
||||
depmap_json = sys.argv[1]
|
||||
ref1 = sys.argv[2]
|
||||
ref2 = sys.argv[3]
|
||||
filter_mode = "all"
|
||||
output_json = "tests_to_run.json"
|
||||
|
||||
if "--test-prefix" in sys.argv:
|
||||
filter_mode = "test_prefix"
|
||||
if "--all" in sys.argv:
|
||||
filter_mode = "all"
|
||||
if "--output" in sys.argv:
|
||||
idx = sys.argv.index("--output")
|
||||
if idx + 1 < len(sys.argv):
|
||||
output_json = sys.argv[idx + 1]
|
||||
|
||||
if not os.path.exists(depmap_json):
|
||||
print(f"Dependency map JSON not found: {depmap_json}")
|
||||
sys.exit(1)
|
||||
|
||||
changed_files = get_changed_files(ref1, ref2)
|
||||
if not changed_files:
|
||||
print("No changed files detected.")
|
||||
tests = []
|
||||
else:
|
||||
file_to_executables = load_depmap(depmap_json)
|
||||
tests = select_tests(file_to_executables, changed_files, filter_mode)
|
||||
|
||||
with open(output_json, "w") as f:
|
||||
json.dump({"tests_to_run": tests, "changed_files": sorted(changed_files)}, f, indent=2)
|
||||
|
||||
print(f"Exported {len(tests)} tests to run to {output_json}")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user