From d546ec0a533410ab3f8855dbbc519db2f5cccc73 Mon Sep 17 00:00:00 2001 From: Randy Spaulding <167460321+randyspauldingamd@users.noreply.github.com> Date: Tue, 10 Feb 2026 18:38:21 +0000 Subject: [PATCH] [rocm-libraries] ROCm/rocm-libraries#4269 (commit 209f62f) Adapt parser to monorepo MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Proposed changes Addressing issues found trying to run the dependency parser on MIOpen: - Ninja is recording the full path, e.g.: [json] ``` "file_to_executables": { "/home/rspauldi/repos/rocm-libraries/projects/miopen/include/miopen/miopen.h": [ ``` - Running git in monorepo reports the full _relative_ path, e.g.: ``` "projects/miopen/include/miopen/miopen.h" ``` Of course, `git diff` also returns all files modified in every other project's commits. These are filtered out as early as possible. This solution searches for `rocm-libraries` in the `parsing` step, and if found extracts the project name and stores it in `enhanced_dependency_mapping.json`. Leading folders are truncated from each file path, up to and including the project name. This allows `_is_project_file` to remain unchanged. The `selection` step then retrieves the project name from the json if it is defined, and truncates the project folder from the `git diff` output so the filenames exactly match the json entries. ## Checklist Please put an `x` into the boxes that apply. You can also fill these out after creating the PR. If you're not sure, please don't hesitate to ask. - [ ] I have added tests relevant to the introduced functionality, and the unit tests are passing locally - [ ] I have added the test to REGRESSION_TESTS list defined at the top of CMakeLists.txt in tests/CMakeLists.txt, **IF** the test takes more than 30 seconds to run. - [X] I have added inline documentation which enables the maintainers with understanding the motivation - [ ] I have removed the stale documentation which is no longer relevant after this pull request - [ ] (If this change is user-facing) I have added release notes which provide the end users with a brief summary of the improvement from this pull request - [ ] I have run `clang-format` on all changed files - [ ] Any dependent changes have been merged ## Discussion Successfully runs on rocm-libraries MIOpen PRs and produces a list of tests. I haven't verified the results yet. This version is not applicable to CI since it operates on a per-executable level and MIOpen CI uses the single gtest binary. I'll be working towards that in future PRs over the next few weeks. ``` /home/rspauldi/repos/rocm-libraries/projects/miopen# git checkout miopen/sgundabo_enable_ck_bwd_wrw_navi # ninja tests # root@rjs1:/home/rspauldi/repos/rocm-libraries/projects/miopen# python3 /dep/main.py parse build/build.ninja Parsing ninja dependencies from: build/build.ninja Parsing ninja build file... Found 312 executables Found 820 object-to-source mappings Found 820 object files Extracting detailed dependencies for all object files... Processed 100/820 object files... Processed 200/820 object files... Processed 300/820 object files... Processed 400/820 object files... Processed 500/820 object files... Processed 600/820 object files... Processed 700/820 object files... Processed 800/820 object files... Completed dependency extraction for 820 object files Building file-to-executable mapping... Found rocm-libraries project: 'miopen' Built mapping for 608 files Files used by multiple executables: 216 Sample files with multiple dependencies: build/include/miopen/config.h: 306 executables build/include/miopen/export.h: 306 executables build/include/miopen/export_internals.h: 304 executables driver/InputFlags.hpp: 2 executables driver/driver.hpp: 2 executables === Enhanced Dependency Mapping Summary === Total executables: 312 Total files mapped: 608 Total object files processed: 820 File types: .cpp files: 310 .hpp files: 292 .h files: 6 Files used by multiple executables: 216 Top files with most dependencies: build/include/miopen/config.h: 306 executables build/include/miopen/export.h: 306 executables include/miopen/miopen.h: 304 executables src/include/miopen/config.hpp: 304 executables build/include/miopen/export_internals.h: 304 executables src/include/miopen/rank.hpp: 303 executables src/include/miopen/errors.hpp: 302 executables src/include/miopen/object.hpp: 302 executables src/include/miopen/returns.hpp: 302 executables src/include/miopen/sysinfo_utils.hpp: 302 executables Exporting mapping to build/enhanced_file_executable_mapping.csv Exporting complete mapping to build/enhanced_dependency_mapping.json Results exported to: CSV: build/enhanced_file_executable_mapping.csv JSON: build/enhanced_dependency_mapping.json root@rjs1:/home/rspauldi/repos/rocm-libraries/projects/miopen# python3 /dep/main.py select build/enhanced_dependency_mapping.json 1b13d8b72d54e34bdc7ae70dd2b6e809dca8b10e 09e5965d55ebbfacfd1ed18e5092580c2ffae748 Identified 30 files modified in project 'miopen' Exported 304 tests to run to tests_to_run.json ``` I don't know if clang-format applies to scripts. If so, could someone show me how to run it in CK? --- .../src/enhanced_ninja_parser.py | 13 +++++++++ .../src/selective_test_filter.py | 29 +++++++++++++++---- 2 files changed, 36 insertions(+), 6 deletions(-) diff --git a/script/dependency-parser/src/enhanced_ninja_parser.py b/script/dependency-parser/src/enhanced_ninja_parser.py index 72386628ab..ec1f835a6b 100644 --- a/script/dependency-parser/src/enhanced_ninja_parser.py +++ b/script/dependency-parser/src/enhanced_ninja_parser.py @@ -163,11 +163,20 @@ class EnhancedNinjaDependencyParser: """Build the final mapping from files to executables.""" print("Building file-to-executable mapping...") + # For monorepo, truncate the path before and including projects/ + self.project = None + rl_regex = rf"rocm-libraries[\\/]+projects[\\/]+([^\\/]+)[\\/]+(.*)" for exe, object_files in self.executable_to_objects.items(): for obj_file in object_files: # Add all dependencies of this object file if obj_file in self.object_to_all_deps: for dep_file in self.object_to_all_deps[obj_file]: + match = re.search(rl_regex, dep_file, re.IGNORECASE) + if match: + dep_file = match.group(2) + if not self.project: + print(f"Found rocm-libraries project: '{match.group(1)}'") + self.project = match.group(1) # Filter out system files and focus on project files if self._is_project_file(dep_file): self.file_to_executables[dep_file].add(exe) @@ -244,6 +253,10 @@ class EnhancedNinjaDependencyParser: exe_to_files[exe].add(file_path) mapping_data = { + "repo": { + "type": "monorepo" if self.project else "component", + "project": self.project + }, "file_to_executables": { file_path: list(exes) for file_path, exes in self.file_to_executables.items() diff --git a/script/dependency-parser/src/selective_test_filter.py b/script/dependency-parser/src/selective_test_filter.py index 782fac5606..465db38615 100644 --- a/script/dependency-parser/src/selective_test_filter.py +++ b/script/dependency-parser/src/selective_test_filter.py @@ -31,7 +31,7 @@ import json import os -def get_changed_files(ref1, ref2): +def get_changed_files(ref1, ref2, project: str = None): """Return a set of files changed between two git refs.""" try: result = subprocess.run( @@ -47,7 +47,21 @@ def get_changed_files(ref1, ref2): text=True, check=True, ) - files = set(line.strip() for line in result.stdout.splitlines() if line.strip()) + + raw_files = set(line.strip() for line in result.stdout.splitlines() if line.strip()) + + if project is None: + files = raw_files + print(f"Identified {len(files)} modified files") + else: + root = f"projects/{project}/" + root_len = len(root) + files = set() + for f in raw_files: + if f.startswith(root): + files.add(f[root_len:]) + print(f"Identified {len(files)} files modified in project '{project}'") + return files except subprocess.CalledProcessError as e: print(f"Command '{e.cmd}' returned non-zero exit status {e.returncode}.") @@ -61,9 +75,12 @@ def load_depmap(depmap_json): with open(depmap_json, "r") as f: data = json.load(f) # Support both old and new formats + json_project = None + if "repo" in data and data["repo"]["type"] == "monorepo": + json_project = data["repo"]["project"] if "file_to_executables" in data: - return data["file_to_executables"] - return data + return data["file_to_executables"], json_project + return data, json_project def select_tests(file_to_executables, changed_files, filter_mode): @@ -141,12 +158,12 @@ def main(): print(f"Dependency map JSON not found: {depmap_json}") sys.exit(1) - changed_files = get_changed_files(ref1, ref2) + file_to_executables, json_project = load_depmap(depmap_json) + changed_files = get_changed_files(ref1, ref2, json_project) if not changed_files: print("No changed files detected.") tests = [] else: - file_to_executables = load_depmap(depmap_json) tests = select_tests(file_to_executables, changed_files, filter_mode) with open(output_json, "w") as f: