Introduce dependency-based CI test selection. (#2377)

* Selective test filter initial commit. * Expanded folder paths for parsing ninja dependencies. * Fixing default branch name in the test evaluation script. * Fixing paths for robustness and adding ctest command to the launch script. * change jenkins file and few tests to upgrade CI * Setting ninja build path. * Fixing typo in Jenkinsfile, and wrong paths. * Fixing typo in launch script. * add few more tests to check CI logic * Fixing header for shell script. * turn off performance test by default, add option to run all unit tests * revert dummy changes in source code to trigger tests * make sure develop branch runs all unit tests --------- Co-authored-by: Vidyasagar Ananthan <vidyasagar.ananthan@amd.com>
2026-05-13 17:55:48 +00:00 · 2025-06-20 12:48:00 -07:00
parent 107e3623c7
commit c3c8c6a10f
6 changed files with 786 additions and 18 deletions
--- a/script/dependency-parser/src/enhanced_ninja_parser.py
+++ b/script/dependency-parser/src/enhanced_ninja_parser.py
@@ -0,0 +1,315 @@
+#!/usr/bin/env python3
+"""
+Enhanced Ninja Dependency Parser
+
+This script combines ninja build file parsing with ninja -t deps to create a comprehensive
+mapping that includes both source files AND header files, and properly handles files
+used by multiple executables.
+"""
+
+import re
+import os
+import sys
+import subprocess
+from pathlib import Path
+from collections import defaultdict
+import json
+from concurrent.futures import ThreadPoolExecutor, as_completed
+import threading
+
+class EnhancedNinjaDependencyParser:
+    def __init__(self, build_file_path, ninja_executable="ninja"):
+        self.build_file_path = build_file_path
+        self.build_dir = os.path.dirname(build_file_path)
+        self.ninja_executable = ninja_executable
+        
+        # Core data structures
+        self.executable_to_objects = {}  # exe -> [object_files]
+        self.object_to_source = {}       # object -> primary_source
+        self.object_to_all_deps = {}     # object -> [all_dependencies]
+        self.file_to_executables = defaultdict(set)  # file -> {executables}
+        
+        # Thread safety
+        self.lock = threading.Lock()
+        
+    def parse_dependencies(self):
+        """Main method to parse all dependencies."""
+        print(f"Parsing ninja dependencies from: {self.build_file_path}")
+        
+        # Step 1: Parse build file for executable -> object mappings
+        self._parse_build_file()
+        
+        # Step 2: Get all object files and their dependencies
+        print(f"Found {len(self.object_to_source)} object files")
+        print("Extracting detailed dependencies for all object files...")
+        self._extract_object_dependencies()
+        
+        # Step 3: Build the final file -> executables mapping
+        self._build_file_to_executable_mapping()
+        
+    def _parse_build_file(self):
+        """Parse the ninja build file to extract executable -> object mappings."""
+        print("Parsing ninja build file...")
+        
+        with open(self.build_file_path, 'r') as f:
+            content = f.read()
+          # Parse executable build rules
+        exe_pattern = r'^build (bin/[^:]+):\s+\S+\s+([^|]+)'
+        obj_pattern = r'^build ([^:]+\.(?:cpp|cu|hip)\.o):\s+\S+\s+([^\s|]+)'
+        
+        lines = content.split('\n')
+        
+        for line in lines:
+            # Match executable rules
+            exe_match = re.match(exe_pattern, line)
+            if exe_match and ('EXECUTABLE' in line or 'test_' in exe_match.group(1) or 'example_' in exe_match.group(1)):
+                exe = exe_match.group(1)
+                deps_part = exe_match.group(2).strip()
+                
+                object_files = []
+                for dep in deps_part.split():
+                    if dep.endswith('.o') and not dep.startswith('/'):
+                        object_files.append(dep)
+                
+                self.executable_to_objects[exe] = object_files
+                continue
+            
+            # Match object compilation rules
+            obj_match = re.match(obj_pattern, line)
+            if obj_match:
+                object_file = obj_match.group(1)
+                source_file = obj_match.group(2)
+                self.object_to_source[object_file] = source_file
+                
+        print(f"Found {len(self.executable_to_objects)} executables")
+        print(f"Found {len(self.object_to_source)} object-to-source mappings")
+        
+    def _extract_object_dependencies(self):
+        """Extract detailed dependencies for all object files using ninja -t deps."""
+        object_files = list(self.object_to_source.keys())
+          # Process object files in parallel for better performance
+        if not object_files:
+            print("No object files found - skipping dependency extraction")
+            return
+            
+        max_workers = min(16, len(object_files))  # Limit concurrent processes
+        
+        with ThreadPoolExecutor(max_workers=max_workers) as executor:
+            # Submit all object files for processing
+            future_to_obj = {
+                executor.submit(self._get_object_dependencies, obj): obj 
+                for obj in object_files
+            }
+              # Process completed futures
+            completed = 0
+            for future in as_completed(future_to_obj):
+                obj_file = future_to_obj[future]
+                try:
+                    dependencies = future.result()
+                    with self.lock:
+                        self.object_to_all_deps[obj_file] = dependencies
+                        completed += 1
+                        if completed % 100 == 0:
+                            print(f"Processed {completed}/{len(object_files)} object files...")
+                except Exception as e:
+                    print(f"Error processing {obj_file}: {e}")
+                    
+        print(f"Completed dependency extraction for {len(self.object_to_all_deps)} object files")
+        
+    def _get_object_dependencies(self, object_file):
+        """Get all dependencies for a single object file using ninja -t deps."""
+        try:
+            # Run ninja -t deps for this object file
+            cmd = [self.ninja_executable, "-t", "deps", object_file]
+            result = subprocess.run(
+                cmd, 
+                cwd=self.build_dir,
+                capture_output=True, 
+                text=True, 
+                timeout=30
+            )
+            
+            if result.returncode != 0:
+                return []
+                
+            dependencies = []
+            lines = result.stdout.strip().split('\n')
+            
+            for line in lines[1:]:  # Skip first line with metadata
+                line = line.strip()
+                if line and not line.startswith('#'):
+                    # Convert absolute paths to relative paths from workspace root
+                    dep_file = line
+                    ws_root = getattr(self, "workspace_root", "..")
+                    ws_prefix = ws_root.rstrip("/") + "/"
+                    if dep_file.startswith(ws_prefix):
+                        dep_file = dep_file[len(ws_prefix):]
+                    dependencies.append(dep_file)
+                    
+            return dependencies
+            
+        except Exception as e:
+            print(f"Error getting dependencies for {object_file}: {e}")
+            return []
+    
+    def _build_file_to_executable_mapping(self):
+        """Build the final mapping from files to executables."""
+        print("Building file-to-executable mapping...")
+        
+        for exe, object_files in self.executable_to_objects.items():
+            for obj_file in object_files:
+                # Add all dependencies of this object file
+                if obj_file in self.object_to_all_deps:
+                    for dep_file in self.object_to_all_deps[obj_file]:
+                        # Filter out system files and focus on project files
+                        if self._is_project_file(dep_file):
+                            self.file_to_executables[dep_file].add(exe)
+                            
+        print(f"Built mapping for {len(self.file_to_executables)} files")
+        
+        # Show statistics
+        multi_exe_files = {f: exes for f, exes in self.file_to_executables.items() if len(exes) > 1}
+        print(f"Files used by multiple executables: {len(multi_exe_files)}")
+        
+        if multi_exe_files:
+            print("Sample files with multiple dependencies:")
+            for f, exes in sorted(multi_exe_files.items())[:5]:
+                print(f"  {f}: {len(exes)} executables")
+                
+    def _is_project_file(self, file_path):
+        """Determine if a file is part of the project (not system files)."""
+        # Include files that are clearly part of the project
+        if any(file_path.startswith(prefix) for prefix in [
+            'include/', 'library/', 'test/', 'example/', 'src/', 'profiler/',
+            'build/include/', 'build/_deps/gtest', 'client_example', 'codegen', 'tile_engine'
+        ]):
+            return True
+            
+        # Exclude system files
+        if any(file_path.startswith(prefix) for prefix in [
+            '/usr/', '/opt/rocm', '/lib/', '/system/', '/local/'
+        ]):
+            return False
+            
+        # Include files with common source/header extensions
+        if file_path.endswith(('.cpp', '.hpp', '.h', '.c', '.cc', '.cxx', '.cu', '.hip', '.inc')):
+            return True
+            
+        return False
+          
+    def export_to_csv(self, output_file):
+        """Export the file-to-executable mapping to CSV with proper comma separation."""
+        print(f"Exporting mapping to {output_file}")
+        
+        with open(output_file, 'w') as f:
+            f.write("source_file,executables\n")
+            for file_path in sorted(self.file_to_executables.keys()):
+                executables = sorted(self.file_to_executables[file_path])
+                # Use semicolon to separate multiple executables within the field
+                exe_list = ';'.join(executables)
+                f.write(f'"{file_path}","{exe_list}"\n')
+                
+    def export_to_json(self, output_file):
+        """Export the complete mapping to JSON."""
+        print(f"Exporting complete mapping to {output_file}")
+        
+        # Build reverse mapping (executable -> files)
+        exe_to_files = defaultdict(set)
+        for file_path, exes in self.file_to_executables.items():
+            for exe in exes:
+                exe_to_files[exe].add(file_path)
+        
+        mapping_data = {
+            'file_to_executables': {
+                file_path: list(exes) for file_path, exes in self.file_to_executables.items()
+            },
+            'executable_to_files': {
+                exe: sorted(files) for exe, files in exe_to_files.items()
+            },
+            'statistics': {
+                'total_files': len(self.file_to_executables),
+                'total_executables': len(self.executable_to_objects),
+                'total_object_files': len(self.object_to_source),
+                'files_with_multiple_executables': len([f for f, exes in self.file_to_executables.items() if len(exes) > 1])
+            }
+        }
+        
+        with open(output_file, 'w') as f:
+            json.dump(mapping_data, f, indent=2)
+            
+    def print_summary(self):
+        """Print a summary of the parsed dependencies."""        
+        print("\n=== Enhanced Dependency Mapping Summary ===")
+        print(f"Total executables: {len(self.executable_to_objects)}")
+        print(f"Total files mapped: {len(self.file_to_executables)}")
+        print(f"Total object files processed: {len(self.object_to_all_deps)}")
+        
+        # Files by type
+        cpp_files = sum(1 for f in self.file_to_executables.keys() if f.endswith('.cpp'))
+        hpp_files = sum(1 for f in self.file_to_executables.keys() if f.endswith('.hpp'))
+        h_files = sum(1 for f in self.file_to_executables.keys() if f.endswith('.h'))
+        
+        print(f"\nFile types:")
+        print(f"  .cpp files: {cpp_files}")
+        print(f"  .hpp files: {hpp_files}")
+        print(f"  .h files: {h_files}")
+        
+        # Multi-executable files
+        multi_exe_files = {f: exes for f, exes in self.file_to_executables.items() if len(exes) > 1}
+        print(f"\nFiles used by multiple executables: {len(multi_exe_files)}")
+        
+        if multi_exe_files:
+            print("\nTop files with most dependencies:")
+            sorted_multi = sorted(multi_exe_files.items(), key=lambda x: len(x[1]), reverse=True)
+            for file_path, exes in sorted_multi[:10]:
+                print(f"  {file_path}: {len(exes)} executables")
+
+def main():
+    # Accept: build_file, ninja_path, workspace_root
+    default_workspace_root = ".."
+    if len(sys.argv) > 3:
+        build_file = sys.argv[1]
+        ninja_path = sys.argv[2]
+        workspace_root = sys.argv[3]
+    elif len(sys.argv) > 2:
+        build_file = sys.argv[1]
+        ninja_path = sys.argv[2]
+        workspace_root = default_workspace_root
+    elif len(sys.argv) > 1:
+        build_file = sys.argv[1]
+        ninja_path = "ninja"
+        workspace_root = default_workspace_root
+    else:
+        build_file = f"{default_workspace_root}/build/build.ninja"
+        ninja_path = "ninja"
+        workspace_root = default_workspace_root
+
+    if not os.path.exists(build_file):
+        print(f"Error: Build file not found: {build_file}")
+        sys.exit(1)
+
+    try:
+        subprocess.run([ninja_path, "--version"], capture_output=True, check=True)
+    except (subprocess.CalledProcessError, FileNotFoundError):
+        print(f"Error: ninja executable not found: {ninja_path}")
+        sys.exit(1)
+
+    parser = EnhancedNinjaDependencyParser(build_file, ninja_path)
+    parser.workspace_root = workspace_root  # Attach for use in _get_object_dependencies
+    parser.parse_dependencies()
+    parser.print_summary()
+
+    # Export results
+    output_dir = os.path.dirname(build_file)
+    csv_file = os.path.join(output_dir, 'enhanced_file_executable_mapping.csv')
+    json_file = os.path.join(output_dir, 'enhanced_dependency_mapping.json')
+
+    parser.export_to_csv(csv_file)
+    parser.export_to_json(json_file)
+
+    print(f"\nResults exported to:")
+    print(f"  CSV: {csv_file}")
+    print(f"  JSON: {json_file}")
+
+if __name__ == "__main__":
+    main()
--- a/script/dependency-parser/src/selective_test_filter.py
+++ b/script/dependency-parser/src/selective_test_filter.py
@@ -0,0 +1,136 @@
+#!/usr/bin/env python3
+"""
+Selective Test Filter Tool
+
+Given two git refs (branches or commit IDs), this tool:
+- Identifies changed files between the refs
+- Loads the enhanced dependency mapping JSON (from enhanced_ninja_parser.py)
+- Maps changed files to affected test executables (optionally filtering for "test_" prefix)
+- Exports the list of tests to run to tests_to_run.json
+
+Usage:
+  python selective_test_filter.py <depmap_json> <ref1> <ref2> [--all | --test-prefix] [--output <output_json>]
+
+Arguments:
+  <depmap_json>   Path to enhanced_dependency_mapping.json
+  <ref1>          Source git ref (branch or commit)
+  <ref2>          Target git ref (branch or commit)
+
+Options:
+  --all           Include all executables (default)
+  --test-prefix   Only include executables starting with "test_"
+  --output        Output JSON file (default: tests_to_run.json)
+"""
+
+import sys
+import subprocess
+import json
+import os
+
+def get_changed_files(ref1, ref2):
+    """Return a set of files changed between two git refs."""
+    try:
+        result = subprocess.run(
+            ["git", "diff", "--name-only", ref1, ref2],
+            capture_output=True, text=True, check=True
+        )
+        files = set(line.strip() for line in result.stdout.splitlines() if line.strip())
+        return files
+    except subprocess.CalledProcessError as e:
+        print(f"Error running git diff: {e}")
+        sys.exit(1)
+
+def load_depmap(depmap_json):
+    """Load the dependency mapping JSON."""
+    with open(depmap_json, "r") as f:
+        data = json.load(f)
+    # Support both old and new formats
+    if "file_to_executables" in data:
+        return data["file_to_executables"]
+    return data
+
+def select_tests(file_to_executables, changed_files, filter_mode):
+    """Return a set of test executables affected by changed files."""
+    affected = set()
+    for f in changed_files:
+        if f in file_to_executables:
+            for exe in file_to_executables[f]:
+                if filter_mode == "all":
+                    affected.add(exe)
+                elif filter_mode == "test_prefix" and exe.startswith("test_"):
+                    affected.add(exe)
+    return sorted(affected)
+
+def main():
+    if "--audit" in sys.argv:
+        if len(sys.argv) < 2:
+            print("Usage: python selective_test_filter.py <depmap_json> --audit")
+            sys.exit(1)
+        depmap_json = sys.argv[1]
+        if not os.path.exists(depmap_json):
+            print(f"Dependency map JSON not found: {depmap_json}")
+            sys.exit(1)
+        file_to_executables = load_depmap(depmap_json)
+        for f, exes in file_to_executables.items():
+            print(f"{f}: {', '.join(exes)}")
+        print(f"Total files: {len(file_to_executables)}")
+        sys.exit(0)
+
+    if "--optimize-build" in sys.argv:
+        if len(sys.argv) < 3:
+            print("Usage: python selective_test_filter.py <depmap_json> --optimize-build <changed_file1> [<changed_file2> ...]")
+            sys.exit(1)
+        depmap_json = sys.argv[1]
+        changed_files = set(sys.argv[sys.argv.index("--optimize-build") + 1 :])
+        if not os.path.exists(depmap_json):
+            print(f"Dependency map JSON not found: {depmap_json}")
+            sys.exit(1)
+        file_to_executables = load_depmap(depmap_json)
+        affected_executables = set()
+        for f in changed_files:
+            if f in file_to_executables:
+                affected_executables.update(file_to_executables[f])
+        print("Affected executables:")
+        for exe in sorted(affected_executables):
+            print(exe)
+        print(f"Total affected executables: {len(affected_executables)}")
+        sys.exit(0)
+
+    if len(sys.argv) < 4:
+        print("Usage: python selective_test_filter.py <depmap_json> <ref1> <ref2> [--all | --test-prefix] [--output <output_json>]")
+        sys.exit(1)
+
+    depmap_json = sys.argv[1]
+    ref1 = sys.argv[2]
+    ref2 = sys.argv[3]
+    filter_mode = "all"
+    output_json = "tests_to_run.json"
+
+    if "--test-prefix" in sys.argv:
+        filter_mode = "test_prefix"
+    if "--all" in sys.argv:
+        filter_mode = "all"
+    if "--output" in sys.argv:
+        idx = sys.argv.index("--output")
+        if idx + 1 < len(sys.argv):
+            output_json = sys.argv[idx + 1]
+
+    if not os.path.exists(depmap_json):
+        print(f"Dependency map JSON not found: {depmap_json}")
+        sys.exit(1)
+
+    changed_files = get_changed_files(ref1, ref2)
+    if not changed_files:
+        print("No changed files detected.")
+        tests = []
+    else:
+        file_to_executables = load_depmap(depmap_json)
+        tests = select_tests(file_to_executables, changed_files, filter_mode)
+
+    with open(output_json, "w") as f:
+        json.dump({"tests_to_run": tests, "changed_files": sorted(changed_files)}, f, indent=2)
+
+    print(f"Exported {len(tests)} tests to run to {output_json}")
+
+if __name__ == "__main__":
+    main()