Update pre-commit to fixed versions, run remod for ck_tile (#2895)

* Fix ruff linter errors * Fix remod dos2unix command * Clang format * Ignore utility in remod * Run remod * Specify clang-format version in pre-commit * Specify ruff version * Include PoolKernelArgs in reference_pool * Add calculate_total_elements to reference batched contraction * Fix calculate_total_elements declaration * Refactor remod pre-commit hook * Fix Aquant tests --------- Co-authored-by: Illia Silin <98187287+illsilin@users.noreply.github.com> [ROCm/composable_kernel commit: d40b50b9d5]
2026-05-14 02:02:46 +00:00 · 2025-10-17 00:29:17 +02:00
parent 6066662785
commit 580a54b400
77 changed files with 21671 additions and 9858 deletions
--- a/script/dependency-parser/main.py
+++ b/script/dependency-parser/main.py
@@ -14,43 +14,69 @@ Features:

 import argparse
 import sys
-import os
+

 def run_dependency_parser(args):
    from src.enhanced_ninja_parser import main as ninja_main
+
    sys.argv = ["enhanced_ninja_parser.py"] + args
    ninja_main()

+
 def run_selective_test_filter(args):
    from src.selective_test_filter import main as filter_main
+
    sys.argv = ["selective_test_filter.py"] + args
    filter_main()

+
 def main():
-    parser = argparse.ArgumentParser(description="Unified Ninja Dependency & Selective Testing Tool")
+    parser = argparse.ArgumentParser(
+        description="Unified Ninja Dependency & Selective Testing Tool"
+    )
    subparsers = parser.add_subparsers(dest="command", required=True)

    # Dependency parsing
-    parser_parse = subparsers.add_parser("parse", help="Parse build.ninja and generate dependency mapping")
+    parser_parse = subparsers.add_parser(
+        "parse", help="Parse build.ninja and generate dependency mapping"
+    )
    parser_parse.add_argument("build_ninja", help="Path to build.ninja")
-    parser_parse.add_argument("--ninja", help="Path to ninja executable", default="ninja")
-    parser_parse.add_argument("--workspace-root", help="Path to workspace root", default=None)
+    parser_parse.add_argument(
+        "--ninja", help="Path to ninja executable", default="ninja"
+    )
+    parser_parse.add_argument(
+        "--workspace-root", help="Path to workspace root", default=None
+    )

    # Selective testing
-    parser_test = subparsers.add_parser("select", help="Selective test filtering between git refs")
+    parser_test = subparsers.add_parser(
+        "select", help="Selective test filtering between git refs"
+    )
    parser_test.add_argument("depmap_json", help="Path to dependency mapping JSON")
    parser_test.add_argument("ref1", help="Source git ref")
    parser_test.add_argument("ref2", help="Target git ref")
-    parser_test.add_argument("--all", action="store_true", help="Include all executables")
-    parser_test.add_argument("--test-prefix", action="store_true", help="Only include executables starting with 'test_'")
-    parser_test.add_argument("--output", help="Output JSON file", default="tests_to_run.json")
+    parser_test.add_argument(
+        "--all", action="store_true", help="Include all executables"
+    )
+    parser_test.add_argument(
+        "--test-prefix",
+        action="store_true",
+        help="Only include executables starting with 'test_'",
+    )
+    parser_test.add_argument(
+        "--output", help="Output JSON file", default="tests_to_run.json"
+    )

    # Code auditing
-    parser_audit = subparsers.add_parser("audit", help="List all files and their dependent executables")
+    parser_audit = subparsers.add_parser(
+        "audit", help="List all files and their dependent executables"
+    )
    parser_audit.add_argument("depmap_json", help="Path to dependency mapping JSON")

    # Build optimization
-    parser_opt = subparsers.add_parser("optimize", help="List affected executables for changed files")
+    parser_opt = subparsers.add_parser(
+        "optimize", help="List affected executables for changed files"
+    )
    parser_opt.add_argument("depmap_json", help="Path to dependency mapping JSON")
    parser_opt.add_argument("changed_files", nargs="+", help="List of changed files")

@@ -73,9 +99,12 @@ def main():
    elif args.command == "audit":
        run_selective_test_filter([args.depmap_json, "--audit"])
    elif args.command == "optimize":
-        run_selective_test_filter([args.depmap_json, "--optimize-build"] + args.changed_files)
+        run_selective_test_filter(
+            [args.depmap_json, "--optimize-build"] + args.changed_files
+        )
    else:
        parser.print_help()

+
 if __name__ == "__main__":
    main()
--- a/script/dependency-parser/src/enhanced_ninja_parser.py
+++ b/script/dependency-parser/src/enhanced_ninja_parser.py
@@ -14,96 +14,100 @@ import re
 import os
 import sys
 import subprocess
-from pathlib import Path
 from collections import defaultdict
 import json
 from concurrent.futures import ThreadPoolExecutor, as_completed
 import threading

+
 class EnhancedNinjaDependencyParser:
    def __init__(self, build_file_path, ninja_executable="ninja"):
        self.build_file_path = build_file_path
        self.build_dir = os.path.dirname(build_file_path)
        self.ninja_executable = ninja_executable
-        
+
        # Core data structures
        self.executable_to_objects = {}  # exe -> [object_files]
-        self.object_to_source = {}       # object -> primary_source
-        self.object_to_all_deps = {}     # object -> [all_dependencies]
+        self.object_to_source = {}  # object -> primary_source
+        self.object_to_all_deps = {}  # object -> [all_dependencies]
        self.file_to_executables = defaultdict(set)  # file -> {executables}
-        
+
        # Thread safety
        self.lock = threading.Lock()
-        
+
    def parse_dependencies(self):
        """Main method to parse all dependencies."""
        print(f"Parsing ninja dependencies from: {self.build_file_path}")
-        
+
        # Step 1: Parse build file for executable -> object mappings
        self._parse_build_file()
-        
+
        # Step 2: Get all object files and their dependencies
        print(f"Found {len(self.object_to_source)} object files")
        print("Extracting detailed dependencies for all object files...")
        self._extract_object_dependencies()
-        
+
        # Step 3: Build the final file -> executables mapping
        self._build_file_to_executable_mapping()
-        
+
    def _parse_build_file(self):
        """Parse the ninja build file to extract executable -> object mappings."""
        print("Parsing ninja build file...")
-        
-        with open(self.build_file_path, 'r') as f:
+
+        with open(self.build_file_path, "r") as f:
            content = f.read()
-          # Parse executable build rules
-        exe_pattern = r'^build (bin/[^:]+):\s+\S+\s+([^|]+)'
-        obj_pattern = r'^build ([^:]+\.(?:cpp|cu|hip)\.o):\s+\S+\s+([^\s|]+)'
-        
-        lines = content.split('\n')
-        
+        # Parse executable build rules
+        exe_pattern = r"^build (bin/[^:]+):\s+\S+\s+([^|]+)"
+        obj_pattern = r"^build ([^:]+\.(?:cpp|cu|hip)\.o):\s+\S+\s+([^\s|]+)"
+
+        lines = content.split("\n")
+
        for line in lines:
            # Match executable rules
            exe_match = re.match(exe_pattern, line)
-            if exe_match and ('EXECUTABLE' in line or 'test_' in exe_match.group(1) or 'example_' in exe_match.group(1)):
+            if exe_match and (
+                "EXECUTABLE" in line
+                or "test_" in exe_match.group(1)
+                or "example_" in exe_match.group(1)
+            ):
                exe = exe_match.group(1)
                deps_part = exe_match.group(2).strip()
-                
+
                object_files = []
                for dep in deps_part.split():
-                    if dep.endswith('.o') and not dep.startswith('/'):
+                    if dep.endswith(".o") and not dep.startswith("/"):
                        object_files.append(dep)
-                
+
                self.executable_to_objects[exe] = object_files
                continue
-            
+
            # Match object compilation rules
            obj_match = re.match(obj_pattern, line)
            if obj_match:
                object_file = obj_match.group(1)
                source_file = obj_match.group(2)
                self.object_to_source[object_file] = source_file
-                
+
        print(f"Found {len(self.executable_to_objects)} executables")
        print(f"Found {len(self.object_to_source)} object-to-source mappings")
-        
+
    def _extract_object_dependencies(self):
        """Extract detailed dependencies for all object files using ninja -t deps."""
        object_files = list(self.object_to_source.keys())
-          # Process object files in parallel for better performance
+        # Process object files in parallel for better performance
        if not object_files:
            print("No object files found - skipping dependency extraction")
            return
-            
+
        max_workers = min(16, len(object_files))  # Limit concurrent processes
-        
+
        with ThreadPoolExecutor(max_workers=max_workers) as executor:
            # Submit all object files for processing
            future_to_obj = {
-                executor.submit(self._get_object_dependencies, obj): obj 
+                executor.submit(self._get_object_dependencies, obj): obj
                for obj in object_files
            }
-              # Process completed futures
+            # Process completed futures
            completed = 0
            for future in as_completed(future_to_obj):
                obj_file = future_to_obj[future]
@@ -113,52 +117,52 @@ class EnhancedNinjaDependencyParser:
                        self.object_to_all_deps[obj_file] = dependencies
                        completed += 1
                        if completed % 100 == 0:
-                            print(f"Processed {completed}/{len(object_files)} object files...")
+                            print(
+                                f"Processed {completed}/{len(object_files)} object files..."
+                            )
                except Exception as e:
                    print(f"Error processing {obj_file}: {e}")
-                    
-        print(f"Completed dependency extraction for {len(self.object_to_all_deps)} object files")
-        
+
+        print(
+            f"Completed dependency extraction for {len(self.object_to_all_deps)} object files"
+        )
+
    def _get_object_dependencies(self, object_file):
        """Get all dependencies for a single object file using ninja -t deps."""
        try:
            # Run ninja -t deps for this object file
            cmd = [self.ninja_executable, "-t", "deps", object_file]
            result = subprocess.run(
-                cmd, 
-                cwd=self.build_dir,
-                capture_output=True, 
-                text=True, 
-                timeout=30
+                cmd, cwd=self.build_dir, capture_output=True, text=True, timeout=30
            )
-            
+
            if result.returncode != 0:
                return []
-                
+
            dependencies = []
-            lines = result.stdout.strip().split('\n')
-            
+            lines = result.stdout.strip().split("\n")
+
            for line in lines[1:]:  # Skip first line with metadata
                line = line.strip()
-                if line and not line.startswith('#'):
+                if line and not line.startswith("#"):
                    # Convert absolute paths to relative paths from workspace root
                    dep_file = line
                    ws_root = getattr(self, "workspace_root", "..")
                    ws_prefix = ws_root.rstrip("/") + "/"
                    if dep_file.startswith(ws_prefix):
-                        dep_file = dep_file[len(ws_prefix):]
+                        dep_file = dep_file[len(ws_prefix) :]
                    dependencies.append(dep_file)
-                    
+
            return dependencies
-            
+
        except Exception as e:
            print(f"Error getting dependencies for {object_file}: {e}")
            return []
-    
+
    def _build_file_to_executable_mapping(self):
        """Build the final mapping from files to executables."""
        print("Building file-to-executable mapping...")
-        
+
        for exe, object_files in self.executable_to_objects.items():
            for obj_file in object_files:
                # Add all dependencies of this object file
@@ -167,106 +171,135 @@ class EnhancedNinjaDependencyParser:
                        # Filter out system files and focus on project files
                        if self._is_project_file(dep_file):
                            self.file_to_executables[dep_file].add(exe)
-                            
+
        print(f"Built mapping for {len(self.file_to_executables)} files")
-        
+
        # Show statistics
-        multi_exe_files = {f: exes for f, exes in self.file_to_executables.items() if len(exes) > 1}
+        multi_exe_files = {
+            f: exes for f, exes in self.file_to_executables.items() if len(exes) > 1
+        }
        print(f"Files used by multiple executables: {len(multi_exe_files)}")
-        
+
        if multi_exe_files:
            print("Sample files with multiple dependencies:")
            for f, exes in sorted(multi_exe_files.items())[:5]:
                print(f"  {f}: {len(exes)} executables")
-                
+
    def _is_project_file(self, file_path):
        """Determine if a file is part of the project (not system files)."""
        # Include files that are clearly part of the project
-        if any(file_path.startswith(prefix) for prefix in [
-            'include/', 'library/', 'test/', 'example/', 'src/', 'profiler/',
-            'build/include/', 'build/_deps/gtest', 'client_example', 'codegen', 'tile_engine'
-        ]):
+        if any(
+            file_path.startswith(prefix)
+            for prefix in [
+                "include/",
+                "library/",
+                "test/",
+                "example/",
+                "src/",
+                "profiler/",
+                "build/include/",
+                "build/_deps/gtest",
+                "client_example",
+                "codegen",
+                "tile_engine",
+            ]
+        ):
            return True
-            
+
        # Exclude system files
-        if any(file_path.startswith(prefix) for prefix in [
-            '/usr/', '/opt/rocm', '/lib/', '/system/', '/local/'
-        ]):
+        if any(
+            file_path.startswith(prefix)
+            for prefix in ["/usr/", "/opt/rocm", "/lib/", "/system/", "/local/"]
+        ):
            return False
-            
+
        # Include files with common source/header extensions
-        if file_path.endswith(('.cpp', '.hpp', '.h', '.c', '.cc', '.cxx', '.cu', '.hip', '.inc')):
+        if file_path.endswith(
+            (".cpp", ".hpp", ".h", ".c", ".cc", ".cxx", ".cu", ".hip", ".inc")
+        ):
            return True
-            
+
        return False
-          
+
    def export_to_csv(self, output_file):
        """Export the file-to-executable mapping to CSV with proper comma separation."""
        print(f"Exporting mapping to {output_file}")
-        
-        with open(output_file, 'w') as f:
+
+        with open(output_file, "w") as f:
            f.write("source_file,executables\n")
            for file_path in sorted(self.file_to_executables.keys()):
                executables = sorted(self.file_to_executables[file_path])
                # Use semicolon to separate multiple executables within the field
-                exe_list = ';'.join(executables)
+                exe_list = ";".join(executables)
                f.write(f'"{file_path}","{exe_list}"\n')
-                
+
    def export_to_json(self, output_file):
        """Export the complete mapping to JSON."""
        print(f"Exporting complete mapping to {output_file}")
-        
+
        # Build reverse mapping (executable -> files)
        exe_to_files = defaultdict(set)
        for file_path, exes in self.file_to_executables.items():
            for exe in exes:
                exe_to_files[exe].add(file_path)
-        
+
        mapping_data = {
-            'file_to_executables': {
-                file_path: list(exes) for file_path, exes in self.file_to_executables.items()
+            "file_to_executables": {
+                file_path: list(exes)
+                for file_path, exes in self.file_to_executables.items()
            },
-            'executable_to_files': {
+            "executable_to_files": {
                exe: sorted(files) for exe, files in exe_to_files.items()
            },
-            'statistics': {
-                'total_files': len(self.file_to_executables),
-                'total_executables': len(self.executable_to_objects),
-                'total_object_files': len(self.object_to_source),
-                'files_with_multiple_executables': len([f for f, exes in self.file_to_executables.items() if len(exes) > 1])
-            }
+            "statistics": {
+                "total_files": len(self.file_to_executables),
+                "total_executables": len(self.executable_to_objects),
+                "total_object_files": len(self.object_to_source),
+                "files_with_multiple_executables": len(
+                    [f for f, exes in self.file_to_executables.items() if len(exes) > 1]
+                ),
+            },
        }
-        
-        with open(output_file, 'w') as f:
+
+        with open(output_file, "w") as f:
            json.dump(mapping_data, f, indent=2)
-            
+
    def print_summary(self):
-        """Print a summary of the parsed dependencies."""        
+        """Print a summary of the parsed dependencies."""
        print("\n=== Enhanced Dependency Mapping Summary ===")
        print(f"Total executables: {len(self.executable_to_objects)}")
        print(f"Total files mapped: {len(self.file_to_executables)}")
        print(f"Total object files processed: {len(self.object_to_all_deps)}")
-        
+
        # Files by type
-        cpp_files = sum(1 for f in self.file_to_executables.keys() if f.endswith('.cpp'))
-        hpp_files = sum(1 for f in self.file_to_executables.keys() if f.endswith('.hpp'))
-        h_files = sum(1 for f in self.file_to_executables.keys() if f.endswith('.h'))
-        
-        print(f"\nFile types:")
+        cpp_files = sum(
+            1 for f in self.file_to_executables.keys() if f.endswith(".cpp")
+        )
+        hpp_files = sum(
+            1 for f in self.file_to_executables.keys() if f.endswith(".hpp")
+        )
+        h_files = sum(1 for f in self.file_to_executables.keys() if f.endswith(".h"))
+
+        print("\nFile types:")
        print(f"  .cpp files: {cpp_files}")
        print(f"  .hpp files: {hpp_files}")
        print(f"  .h files: {h_files}")
-        
+
        # Multi-executable files
-        multi_exe_files = {f: exes for f, exes in self.file_to_executables.items() if len(exes) > 1}
+        multi_exe_files = {
+            f: exes for f, exes in self.file_to_executables.items() if len(exes) > 1
+        }
        print(f"\nFiles used by multiple executables: {len(multi_exe_files)}")
-        
+
        if multi_exe_files:
            print("\nTop files with most dependencies:")
-            sorted_multi = sorted(multi_exe_files.items(), key=lambda x: len(x[1]), reverse=True)
+            sorted_multi = sorted(
+                multi_exe_files.items(), key=lambda x: len(x[1]), reverse=True
+            )
            for file_path, exes in sorted_multi[:10]:
                print(f"  {file_path}: {len(exes)} executables")

+
 def main():
    # Accept: build_file, ninja_path, workspace_root
    default_workspace_root = ".."
@@ -304,15 +337,16 @@ def main():

    # Export results
    output_dir = os.path.dirname(build_file)
-    csv_file = os.path.join(output_dir, 'enhanced_file_executable_mapping.csv')
-    json_file = os.path.join(output_dir, 'enhanced_dependency_mapping.json')
+    csv_file = os.path.join(output_dir, "enhanced_file_executable_mapping.csv")
+    json_file = os.path.join(output_dir, "enhanced_dependency_mapping.json")

    parser.export_to_csv(csv_file)
    parser.export_to_json(json_file)

-    print(f"\nResults exported to:")
+    print("\nResults exported to:")
    print(f"  CSV: {csv_file}")
    print(f"  JSON: {json_file}")

+
 if __name__ == "__main__":
    main()
--- a/script/dependency-parser/src/selective_test_filter.py
+++ b/script/dependency-parser/src/selective_test_filter.py
@@ -30,12 +30,15 @@ import subprocess
 import json
 import os

+
 def get_changed_files(ref1, ref2):
    """Return a set of files changed between two git refs."""
    try:
        result = subprocess.run(
            ["git", "diff", "--name-only", ref1, ref2],
-            capture_output=True, text=True, check=True
+            capture_output=True,
+            text=True,
+            check=True,
        )
        files = set(line.strip() for line in result.stdout.splitlines() if line.strip())
        return files
@@ -43,6 +46,7 @@ def get_changed_files(ref1, ref2):
        print(f"Error running git diff: {e}")
        sys.exit(1)

+
 def load_depmap(depmap_json):
    """Load the dependency mapping JSON."""
    with open(depmap_json, "r") as f:
@@ -52,6 +56,7 @@ def load_depmap(depmap_json):
        return data["file_to_executables"]
    return data

+
 def select_tests(file_to_executables, changed_files, filter_mode):
    """Return a set of test executables affected by changed files."""
    affected = set()
@@ -64,6 +69,7 @@ def select_tests(file_to_executables, changed_files, filter_mode):
                    affected.add(exe)
    return sorted(affected)

+
 def main():
    if "--audit" in sys.argv:
        if len(sys.argv) < 2:
@@ -81,7 +87,9 @@ def main():

    if "--optimize-build" in sys.argv:
        if len(sys.argv) < 3:
-            print("Usage: python selective_test_filter.py <depmap_json> --optimize-build <changed_file1> [<changed_file2> ...]")
+            print(
+                "Usage: python selective_test_filter.py <depmap_json> --optimize-build <changed_file1> [<changed_file2> ...]"
+            )
            sys.exit(1)
        depmap_json = sys.argv[1]
        changed_files = set(sys.argv[sys.argv.index("--optimize-build") + 1 :])
@@ -100,7 +108,9 @@ def main():
        sys.exit(0)

    if len(sys.argv) < 4:
-        print("Usage: python selective_test_filter.py <depmap_json> <ref1> <ref2> [--all | --test-prefix] [--output <output_json>]")
+        print(
+            "Usage: python selective_test_filter.py <depmap_json> <ref1> <ref2> [--all | --test-prefix] [--output <output_json>]"
+        )
        sys.exit(1)

    depmap_json = sys.argv[1]
@@ -131,9 +141,12 @@ def main():
        tests = select_tests(file_to_executables, changed_files, filter_mode)

    with open(output_json, "w") as f:
-        json.dump({"tests_to_run": tests, "changed_files": sorted(changed_files)}, f, indent=2)
+        json.dump(
+            {"tests_to_run": tests, "changed_files": sorted(changed_files)}, f, indent=2
+        )

    print(f"Exported {len(tests)} tests to run to {output_json}")

+
 if __name__ == "__main__":
    main()
--- a/script/ninja_json_converter.py
+++ b/script/ninja_json_converter.py
@@ -12,38 +12,38 @@ import os
 import re
 import sys
 from pathlib import Path
-from typing import Dict, List, Optional, Tuple, Iterator
+from typing import Dict, List, Optional, Iterator


 class BuildTarget:
    """Represents a single build target with timing information."""
-    
+
    def __init__(self, start_time: int, end_time: int, output_name: str, cmd_hash: str):
        self.start_time = int(start_time)
        self.end_time = int(end_time)
        self.cmd_hash = cmd_hash
        self.duration = self.end_time - self.start_time
        self.targets = [output_name]  # List of target names for this command hash
-        
+
    @property
    def category(self) -> str:
        """Categorize the build target based on file extension."""
        # Use the first target for categorization
        primary_target = self.targets[0] if self.targets else ""
        ext = Path(primary_target).suffix.lower()
-        if ext in ['.o', '.obj']:
-            return 'compile'
-        elif ext in ['.a', '.lib']:
-            return 'archive'
-        elif ext in ['.so', '.dll', '.dylib']:
-            return 'link_shared'
-        elif ext in ['.exe', '.out']:
-            return 'link_executable'
-        elif 'test' in primary_target.lower():
-            return 'test'
+        if ext in [".o", ".obj"]:
+            return "compile"
+        elif ext in [".a", ".lib"]:
+            return "archive"
+        elif ext in [".so", ".dll", ".dylib"]:
+            return "link_shared"
+        elif ext in [".exe", ".out"]:
+            return "link_executable"
+        elif "test" in primary_target.lower():
+            return "test"
        else:
-            return 'other'
-    
+            return "other"
+
    @property
    def output_name(self) -> str:
        """Get the primary output name (for backward compatibility)."""
@@ -52,11 +52,11 @@ class BuildTarget:

 class ThreadScheduler:
    """Simulates thread allocation for parallelism analysis."""
-    
+
    def __init__(self, legacy_mode: bool = False):
        self.workers: List[int] = []
        self.legacy_mode = legacy_mode
-        
+
    def allocate_thread(self, target: BuildTarget) -> int:
        """Allocate a thread for the given target."""
        if self.legacy_mode:
@@ -73,7 +73,7 @@ class ThreadScheduler:
                if worker_end_time <= target.start_time:
                    self.workers[i] = target.end_time
                    return i
-            
+
            # No available worker, create a new one
            self.workers.append(target.end_time)
            return len(self.workers) - 1
@@ -81,62 +81,67 @@ class ThreadScheduler:

 class NinjaLogParser:
    """Parser for ninja build log files."""
-    
+
    def __init__(self, show_all_builds: bool = False):
        self.show_all_builds = show_all_builds
-        
+
    def parse_log_file(self, log_path: str) -> List[BuildTarget]:
        """Parse the ninja log file and return build targets."""
        if not os.path.exists(log_path):
            raise FileNotFoundError(f"Ninja log file not found: {log_path}")
-            
-        with open(log_path, 'r', encoding='utf-8') as file:
+
+        with open(log_path, "r", encoding="utf-8") as file:
            lines = file.readlines()
-            
+
        if not lines:
            raise ValueError("Empty ninja log file")
-            
+
        # Parse and validate header
        header = lines[0].strip()
-        version_match = re.match(r'^# ninja log v(\d+)$', header)
+        version_match = re.match(r"^# ninja log v(\d+)$", header)
        if not version_match:
            raise ValueError(f"Invalid ninja log header: {header}")
-            
+
        version = int(version_match.group(1))
        if version < 5:
            raise ValueError(f"Unsupported ninja log version: {version}")
-            
+
        # Skip additional header line for version 6
        start_line = 2 if version > 5 else 1
-        
+
        targets: Dict[str, BuildTarget] = {}
        last_end_time = 0
-        
+
        for line_num, line in enumerate(lines[start_line:], start=start_line + 1):
            line = line.strip()
-            
+
            # Skip empty lines and comments
-            if not line or line.startswith('#'):
+            if not line or line.startswith("#"):
                continue
-                
-            parts = line.split('\t')
+
+            parts = line.split("\t")
            if len(parts) < 5:
-                print(f"Warning: Skipping malformed line {line_num}: {line}", file=sys.stderr)
+                print(
+                    f"Warning: Skipping malformed line {line_num}: {line}",
+                    file=sys.stderr,
+                )
                continue
-                
+
            try:
                start_time, end_time, _, output_name, cmd_hash = parts[:5]
                start_time, end_time = int(start_time), int(end_time)
-                
+
                # Handle incremental builds
                if not self.show_all_builds and end_time < last_end_time:
                    targets.clear()
-                    
+
                last_end_time = end_time
-                
+
                # Group targets by command hash
                if cmd_hash not in targets:
-                    targets[cmd_hash] = BuildTarget(start_time, end_time, output_name, cmd_hash)
+                    targets[cmd_hash] = BuildTarget(
+                        start_time, end_time, output_name, cmd_hash
+                    )
                else:
                    # Update with the latest timing and add output
                    existing = targets[cmd_hash]
@@ -144,223 +149,260 @@ class NinjaLogParser:
                    existing.end_time = max(existing.end_time, end_time)
                    existing.duration = existing.end_time - existing.start_time
                    existing.targets.append(output_name)
-                    
+
            except (ValueError, IndexError) as e:
                print(f"Warning: Error parsing line {line_num}: {e}", file=sys.stderr)
                continue
-                
+
        return sorted(targets.values(), key=lambda t: t.end_time, reverse=True)


 class FTimeTraceReader:
    """Reads and processes Clang -ftime-trace JSON files."""
-    
+
    def __init__(self, granularity_us: int = 50000):
        self.granularity_us = granularity_us
-        
+
    def read_trace_file(self, trace_path: str) -> Optional[Dict]:
        """Read and parse a Clang time trace file."""
        try:
-            with open(trace_path, 'r', encoding='utf-8') as f:
+            with open(trace_path, "r", encoding="utf-8") as f:
                return json.load(f)
        except (FileNotFoundError, json.JSONDecodeError, IOError):
            return None
-            
+
    def filter_events(self, trace_data: Dict) -> List[Dict]:
        """Filter trace events based on criteria."""
-        if 'traceEvents' not in trace_data:
+        if "traceEvents" not in trace_data:
            return []
-            
+
        filtered_events = []
-        for event in trace_data['traceEvents']:
+        for event in trace_data["traceEvents"]:
            # Only include complete events (ph=X) that meet duration threshold
-            if (event.get('ph') == 'X' and 
-                event.get('dur', 0) >= self.granularity_us and
-                not event.get('name', '').startswith('Total')):
+            if (
+                event.get("ph") == "X"
+                and event.get("dur", 0) >= self.granularity_us
+                and not event.get("name", "").startswith("Total")
+            ):
                filtered_events.append(event)
-                
+
        return filtered_events
-        
-    def adjust_event_timing(self, event: Dict, target: BuildTarget, pid: int, tid: int) -> Dict:
+
+    def adjust_event_timing(
+        self, event: Dict, target: BuildTarget, pid: int, tid: int
+    ) -> Dict:
        """Adjust event timing to align with ninja build timing."""
        ninja_duration_us = target.duration * 1000
-        
+
        # Validate event duration against ninja timing
-        if event.get('dur', 0) > ninja_duration_us:
-            print(f"Warning: Clang trace event duration ({event['dur']}μs) exceeds "
-                  f"ninja duration ({ninja_duration_us}μs) for {target.output_name}", 
-                  file=sys.stderr)
+        if event.get("dur", 0) > ninja_duration_us:
+            print(
+                f"Warning: Clang trace event duration ({event['dur']}μs) exceeds "
+                f"ninja duration ({ninja_duration_us}μs) for {target.output_name}",
+                file=sys.stderr,
+            )
            return None
-            
+
        # Adjust event timing
        adjusted_event = event.copy()
-        adjusted_event['pid'] = pid
-        adjusted_event['tid'] = tid
-        adjusted_event['ts'] += target.start_time * 1000  # Offset by ninja start time
-        
+        adjusted_event["pid"] = pid
+        adjusted_event["tid"] = tid
+        adjusted_event["ts"] += target.start_time * 1000  # Offset by ninja start time
+
        return adjusted_event

+
 class ChromeTraceGenerator:
    """Generates Chrome tracing format from build targets."""
-    
-    def __init__(self, process_id: int = 1, embed_ftime_traces: bool = False, 
-                 granularity_us: int = 50000, ninja_log_dir: Optional[str] = None,
-                 legacy_format: bool = False):
+
+    def __init__(
+        self,
+        process_id: int = 1,
+        embed_ftime_traces: bool = False,
+        granularity_us: int = 50000,
+        ninja_log_dir: Optional[str] = None,
+        legacy_format: bool = False,
+    ):
        self.process_id = process_id
        self.scheduler = ThreadScheduler(legacy_mode=legacy_format)
        self.embed_ftime_traces = embed_ftime_traces
        self.ninja_log_dir = ninja_log_dir
-        self.ftime_reader = FTimeTraceReader(granularity_us) if embed_ftime_traces else None
+        self.ftime_reader = (
+            FTimeTraceReader(granularity_us) if embed_ftime_traces else None
+        )
        self.legacy_format = legacy_format
-        
+
    def find_ftime_trace_files(self, target: BuildTarget) -> List[str]:
        """Find Clang -ftime-trace files for a build target."""
        if not self.ninja_log_dir:
            return []
-            
+
        trace_files = []
-        
+
        # Look for .json files adjacent to object files
        obj_path = Path(self.ninja_log_dir) / target.output_name
-        json_path = obj_path.with_suffix('.json')
-        
+        json_path = obj_path.with_suffix(".json")
+
        if json_path.exists():
            trace_files.append(str(json_path))
-            
+
        return trace_files
-        
+
    def generate_ftime_events(self, target: BuildTarget, tid: int) -> Iterator[Dict]:
        """Generate Clang -ftime-trace events for a target."""
        if not self.embed_ftime_traces or not self.ftime_reader:
            return
-            
+
        trace_files = self.find_ftime_trace_files(target)
-        
+
        for trace_file in trace_files:
            trace_data = self.ftime_reader.read_trace_file(trace_file)
            if not trace_data:
                continue
-                
+
            filtered_events = self.ftime_reader.filter_events(trace_data)
-            
+
            for event in filtered_events:
                adjusted_event = self.ftime_reader.adjust_event_timing(
                    event, target, self.process_id, tid
                )
                if adjusted_event:
                    yield adjusted_event
-        
+
    def generate_trace_events(self, targets: List[BuildTarget]) -> List[Dict]:
        """Generate Chrome trace events from build targets."""
        events = []
-        
+
        for target in targets:
            thread_id = self.scheduler.allocate_thread(target)
-            
+
            # Add main ninja build event
            if self.legacy_format:
                # Legacy format: join multiple targets with commas, use "targets" category, empty args
-                target_name = ', '.join(target.targets) if len(target.targets) > 1 else target.output_name
+                target_name = (
+                    ", ".join(target.targets)
+                    if len(target.targets) > 1
+                    else target.output_name
+                )
                ninja_event = {
-                    'name': target_name,
-                    'cat': 'targets',
-                    'ph': 'X',  # Complete event
-                    'ts': target.start_time * 1000,  # Convert to microseconds
-                    'dur': target.duration * 1000,   # Convert to microseconds
-                    'pid': self.process_id,
-                    'tid': thread_id,
-                    'args': {}
+                    "name": target_name,
+                    "cat": "targets",
+                    "ph": "X",  # Complete event
+                    "ts": target.start_time * 1000,  # Convert to microseconds
+                    "dur": target.duration * 1000,  # Convert to microseconds
+                    "pid": self.process_id,
+                    "tid": thread_id,
+                    "args": {},
                }
            else:
                # New format: smart categorization, detailed args
                ninja_event = {
-                    'name': target.output_name,
-                    'cat': target.category,
-                    'ph': 'X',  # Complete event
-                    'ts': target.start_time * 1000,  # Convert to microseconds
-                    'dur': target.duration * 1000,   # Convert to microseconds
-                    'pid': self.process_id,
-                    'tid': thread_id,
-                    'args': {
-                        'output': target.output_name,
-                        'duration_ms': target.duration,
-                        'cmd_hash': target.cmd_hash
-                    }
+                    "name": target.output_name,
+                    "cat": target.category,
+                    "ph": "X",  # Complete event
+                    "ts": target.start_time * 1000,  # Convert to microseconds
+                    "dur": target.duration * 1000,  # Convert to microseconds
+                    "pid": self.process_id,
+                    "tid": thread_id,
+                    "args": {
+                        "output": target.output_name,
+                        "duration_ms": target.duration,
+                        "cmd_hash": target.cmd_hash,
+                    },
                }
            events.append(ninja_event)
-            
+
            # Add embedded Clang -ftime-trace events
            if self.embed_ftime_traces:
                ftime_events = list(self.generate_ftime_events(target, thread_id))
                events.extend(ftime_events)
-                
+
                if ftime_events:
-                    print(f"Embedded {len(ftime_events)} -ftime-trace events for {target.output_name}", 
-                          file=sys.stderr)
-            
+                    print(
+                        f"Embedded {len(ftime_events)} -ftime-trace events for {target.output_name}",
+                        file=sys.stderr,
+                    )
+
        return events


 class BuildAnalyzer:
    """Analyzes build performance and provides statistics."""
-    
+
    def __init__(self, targets: List[BuildTarget]):
        self.targets = targets
-        
+
    def get_build_summary(self) -> Dict:
        """Generate build performance summary."""
        if not self.targets:
            return {}
-            
+
        total_duration = sum(t.duration for t in self.targets)
        total_targets = len(self.targets)
-        
+
        # Category statistics
        category_stats = {}
        for target in self.targets:
            cat = target.category
            if cat not in category_stats:
-                category_stats[cat] = {'count': 0, 'total_time': 0}
-            category_stats[cat]['count'] += 1
-            category_stats[cat]['total_time'] += target.duration
-            
+                category_stats[cat] = {"count": 0, "total_time": 0}
+            category_stats[cat]["count"] += 1
+            category_stats[cat]["total_time"] += target.duration
+
        # Top slowest targets
-        slowest_targets = sorted(self.targets, key=lambda t: t.duration, reverse=True)[:10]
-        
+        slowest_targets = sorted(self.targets, key=lambda t: t.duration, reverse=True)[
+            :10
+        ]
+
        return {
-            'total_targets': total_targets,
-            'total_duration_ms': total_duration,
-            'total_duration_sec': total_duration / 1000,
-            'average_duration_ms': total_duration / total_targets if total_targets > 0 else 0,
-            'category_stats': category_stats,
-            'slowest_targets': [
-                {'name': t.output_name, 'duration_ms': t.duration, 'category': t.category}
+            "total_targets": total_targets,
+            "total_duration_ms": total_duration,
+            "total_duration_sec": total_duration / 1000,
+            "average_duration_ms": total_duration / total_targets
+            if total_targets > 0
+            else 0,
+            "category_stats": category_stats,
+            "slowest_targets": [
+                {
+                    "name": t.output_name,
+                    "duration_ms": t.duration,
+                    "category": t.category,
+                }
                for t in slowest_targets
-            ]
+            ],
        }
-        
+
    def print_summary(self):
        """Print build summary to stderr."""
        summary = self.get_build_summary()
        if not summary:
            print("No build data available", file=sys.stderr)
            return
-            
-        print(f"\n=== Build Summary ===", file=sys.stderr)
+
+        print("\n=== Build Summary ===", file=sys.stderr)
        print(f"Total targets: {summary['total_targets']}", file=sys.stderr)
        print(f"Total time: {summary['total_duration_sec']:.2f}s", file=sys.stderr)
-        print(f"Average time per target: {summary['average_duration_ms']:.2f}ms", file=sys.stderr)
-        
-        print(f"\nBy category:", file=sys.stderr)
-        for category, stats in summary['category_stats'].items():
-            avg_time = stats['total_time'] / stats['count'] if stats['count'] > 0 else 0
-            print(f"  {category:15} {stats['count']:6} targets "
-                  f"{stats['total_time']/1000:8.2f}s "
-                  f"(avg: {avg_time/1000:.3f}s)", file=sys.stderr)
-                  
-        print(f"\nSlowest targets:", file=sys.stderr)
-        for i, target in enumerate(summary['slowest_targets'][:5], 1):
-            print(f"  {i:2}. {target['name']} ({target['duration_ms']}ms, {target['category']})", file=sys.stderr)
+        print(
+            f"Average time per target: {summary['average_duration_ms']:.2f}ms",
+            file=sys.stderr,
+        )
+
+        print("\nBy category:", file=sys.stderr)
+        for category, stats in summary["category_stats"].items():
+            avg_time = stats["total_time"] / stats["count"] if stats["count"] > 0 else 0
+            print(
+                f"  {category:15} {stats['count']:6} targets "
+                f"{stats['total_time'] / 1000:8.2f}s "
+                f"(avg: {avg_time / 1000:.3f}s)",
+                file=sys.stderr,
+            )
+
+        print("\nSlowest targets:", file=sys.stderr)
+        for i, target in enumerate(summary["slowest_targets"][:5], 1):
+            print(
+                f"  {i:2}. {target['name']} ({target['duration_ms']}ms, {target['category']})",
+                file=sys.stderr,
+            )


 def create_argument_parser() -> argparse.ArgumentParser:
@@ -376,57 +418,48 @@ Examples:
  %(prog)s build/.ninja_log --show-all      # Include all builds
  %(prog)s build/.ninja_log --embed-ftime-trace  # Include Clang timing data
  %(prog)s build/.ninja_log --granularity 10000  # Custom granularity threshold
-        """
+        """,
    )
-    
+
    parser.add_argument(
-        'ninja_logs',
-        nargs='+',  # Accept one or more ninja log files
-        help='Path(s) to the .ninja_log file(s)'
+        "ninja_logs",
+        nargs="+",  # Accept one or more ninja log files
+        help="Path(s) to the .ninja_log file(s)",
    )
-    
+
+    parser.add_argument("-o", "--output", help="Output file (default: stdout)")
+
    parser.add_argument(
-        '-o', '--output',
-        help='Output file (default: stdout)'
+        "--show-all", action="store_true", help="Show all builds, not just the last one"
    )
-    
+
    parser.add_argument(
-        '--show-all',
-        action='store_true',
-        help='Show all builds, not just the last one'
+        "--summary", action="store_true", help="Print build summary to stderr"
    )
-    
+
    parser.add_argument(
-        '--summary',
-        action='store_true',
-        help='Print build summary to stderr'
+        "--pretty", action="store_true", help="Pretty-print JSON output"
    )
-    
+
    parser.add_argument(
-        '--pretty',
-        action='store_true',
-        help='Pretty-print JSON output'
+        "--embed-ftime-trace",
+        action="store_true",
+        help="Embed Clang -ftime-trace JSON files found adjacent to targets",
    )
-    
+
    parser.add_argument(
-        '--embed-ftime-trace',
-        action='store_true',
-        help='Embed Clang -ftime-trace JSON files found adjacent to targets'
-    )
-    
-    parser.add_argument(
-        '--granularity',
+        "--granularity",
        type=int,
        default=50000,
-        help='Minimum duration for -ftime-trace events in microseconds (default: 50000)'
+        help="Minimum duration for -ftime-trace events in microseconds (default: 50000)",
    )
-    
+
    parser.add_argument(
-        '--legacy-format',
-        action='store_true',
-        help='Output in legacy format compatible with old ninjatracer (simple JSON array, all categories as "targets", empty args)'
+        "--legacy-format",
+        action="store_true",
+        help='Output in legacy format compatible with old ninjatracer (simple JSON array, all categories as "targets", empty args)',
    )
-    
+
    return parser


@@ -434,75 +467,79 @@ def main():
    """Main entry point."""
    parser = create_argument_parser()
    args = parser.parse_args()
-    
+
    try:
        # Process multiple ninja log files
        all_events = []
-        
+
        for pid, ninja_log_path in enumerate(args.ninja_logs):
            # Parse ninja log
            log_parser = NinjaLogParser(show_all_builds=args.show_all)
            targets = log_parser.parse_log_file(ninja_log_path)
-            
+
            if not targets:
-                print(f"No build targets found in ninja log: {ninja_log_path}", file=sys.stderr)
+                print(
+                    f"No build targets found in ninja log: {ninja_log_path}",
+                    file=sys.stderr,
+                )
                continue
-                
+
            # Determine ninja log directory for -ftime-trace files
-            ninja_log_dir = os.path.dirname(os.path.abspath(ninja_log_path)) if args.embed_ftime_trace else None
-            
+            ninja_log_dir = (
+                os.path.dirname(os.path.abspath(ninja_log_path))
+                if args.embed_ftime_trace
+                else None
+            )
+
            # Generate trace events for this log file
            trace_generator = ChromeTraceGenerator(
                process_id=pid,  # Use different PID for each log file
                embed_ftime_traces=args.embed_ftime_trace,
                granularity_us=args.granularity,
                ninja_log_dir=ninja_log_dir,
-                legacy_format=args.legacy_format
+                legacy_format=args.legacy_format,
            )
            events = trace_generator.generate_trace_events(targets)
            all_events.extend(events)
-            
+
            # Print summary if requested (for each log file)
            if args.summary:
                print(f"\n=== Summary for {ninja_log_path} ===", file=sys.stderr)
                analyzer = BuildAnalyzer(targets)
                analyzer.print_summary()
-        
+
        if not all_events:
            print("No build targets found in any ninja log files", file=sys.stderr)
            return 1
-        
+
        # Output format logic
        if args.legacy_format:
            # Legacy format: always output simple JSON array
-            json_kwargs = {'indent': 2} if args.pretty else {}
+            json_kwargs = {"indent": 2} if args.pretty else {}
            json_output = json.dumps(all_events, **json_kwargs)
        elif args.output or args.pretty:
            # Enhanced format with metadata (when saving to file or pretty printing)
            trace_data = {
-                'traceEvents': all_events,
-                'displayTimeUnit': 'ms',
-                'systemTraceEvents': 'SystemTraceData',
-                'otherData': {
-                    'version': '1.0',
-                    'generator': 'ninja_json_converter.py'
-                }
+                "traceEvents": all_events,
+                "displayTimeUnit": "ms",
+                "systemTraceEvents": "SystemTraceData",
+                "otherData": {"version": "1.0", "generator": "ninja_json_converter.py"},
            }
-            json_kwargs = {'indent': 2} if args.pretty else {}
+            json_kwargs = {"indent": 2} if args.pretty else {}
            json_output = json.dumps(trace_data, **json_kwargs)
        else:
            # Original format (simple JSON array to stdout)
            json_output = json.dumps(all_events)
-        
+
        if args.output:
-            with open(args.output, 'w') as f:
+            with open(args.output, "w") as f:
                f.write(json_output)
            print(f"Trace written to {args.output}", file=sys.stderr)
        else:
            print(json_output)
-            
+
        return 0
-        
+
    except Exception as e:
        print(f"Error: {e}", file=sys.stderr)
        return 1
--- a/script/process_perf_data.py
+++ b/script/process_perf_data.py
@@ -1,13 +1,16 @@
 #!/usr/bin/env python3
-import os, io, argparse, datetime
-#import numpy as np
+import os
+import io
+import argparse
+import datetime
+
+# import numpy as np
 import sqlalchemy
-from sqlalchemy.types import NVARCHAR, Float, Integer
 from sqlalchemy import text
-import pymysql
 import pandas as pd
 from sshtunnel import SSHTunnelForwarder

+
 def print_to_string(*args, **kwargs):
    output = io.StringIO()
    print(*args, file=output, **kwargs)
@@ -15,15 +18,18 @@ def print_to_string(*args, **kwargs):
    output.close()
    return contents

+
 def parse_args():
-    parser = argparse.ArgumentParser(description='Parse results from tf benchmark runs')
-    parser.add_argument('filename', type=str, help='Log file to prase or directory containing log files')
+    parser = argparse.ArgumentParser(description="Parse results from tf benchmark runs")
+    parser.add_argument(
+        "filename", type=str, help="Log file to prase or directory containing log files"
+    )
    args = parser.parse_args()
    files = []
    if os.path.isdir(args.filename):
        all_files = os.listdir(args.filename)
        for name in all_files:
-            if not 'log' in name:
+            if "log" not in name:
                continue
            files.append(os.path.join(args.filename, name))
    else:
@@ -31,62 +37,76 @@ def parse_args():
    args.files = files
    return args

+
 def get_log_params(logfile):
-    print("logfile=",logfile)
-    branch_name=' '
-    node_id=' '
-    gpu_arch=' '
-    hip_vers=' '
-    compute_units=0
-    environment=' '
-    rocm_vers=' '
+    print("logfile=", logfile)
+    branch_name = " "
+    node_id = " "
+    gpu_arch = " "
+    hip_vers = " "
+    compute_units = 0
+    environment = " "
+    rocm_vers = " "
    for line in open(logfile):
-        if 'Branch name' in line:
-            lst=line.split()
-            branch_name=lst[2]
-        if 'On branch' in line:
-            lst=line.split()
-            branch_name=lst[2]
-        if 'Node name' in line:
-            lst=line.split()
-            node_id=lst[2]
-        if 'GPU_arch' in line:
-            lst=line.split()
-            gpu_arch=lst[2]
-        if 'HIP version' in line:
-            lst=line.split()
-            hip_vers=lst[2]
-        if 'Compute Unit' in line:
-            lst=line.split()
-            compute_units=lst[2]
-        if 'Environment type' in line:
-            lst=line.split()
-            environment=lst[2]
-        if 'InstalledDir' in line:
-            lst=line.split()
-            rocm_vers=lst[1][lst[1].find('/opt/rocm-')+len('/opt/rocm-'):lst[1].rfind('/llvm/bin')]
-    return branch_name, node_id, gpu_arch, compute_units, rocm_vers, hip_vers, environment
+        if "Branch name" in line:
+            lst = line.split()
+            branch_name = lst[2]
+        if "On branch" in line:
+            lst = line.split()
+            branch_name = lst[2]
+        if "Node name" in line:
+            lst = line.split()
+            node_id = lst[2]
+        if "GPU_arch" in line:
+            lst = line.split()
+            gpu_arch = lst[2]
+        if "HIP version" in line:
+            lst = line.split()
+            hip_vers = lst[2]
+        if "Compute Unit" in line:
+            lst = line.split()
+            compute_units = lst[2]
+        if "Environment type" in line:
+            lst = line.split()
+            environment = lst[2]
+        if "InstalledDir" in line:
+            lst = line.split()
+            rocm_vers = lst[1][
+                lst[1].find("/opt/rocm-") + len("/opt/rocm-") : lst[1].rfind(
+                    "/llvm/bin"
+                )
+            ]
+    return (
+        branch_name,
+        node_id,
+        gpu_arch,
+        compute_units,
+        rocm_vers,
+        hip_vers,
+        environment,
+    )
+

 def parse_logfile(logfile):
-    glue=''
-    res=[]
-    tests=[]
-    kernels=[]
-    tflops=[]
-    dtype=[]
-    alayout=[]
-    blayout=[]
-    M=[]
-    N=[]
-    K=[]
-    StrideA=[]
-    StrideB=[]
-    StrideC=[]
-    if 'perf_gemm' in logfile and 'gemm_bilinear' not in logfile:
+    glue = ""
+    res = []
+    tests = []
+    kernels = []
+    tflops = []
+    dtype = []
+    alayout = []
+    blayout = []
+    M = []
+    N = []
+    K = []
+    StrideA = []
+    StrideB = []
+    StrideC = []
+    if "perf_gemm" in logfile and "gemm_bilinear" not in logfile:
        for line in open(logfile):
-            if 'Best Perf' in line:
-                lst=line.split()
-                if len(lst)>=37: #the line is complete
+            if "Best Perf" in line:
+                lst = line.split()
+                if len(lst) >= 37:  # the line is complete
                    tests.append(glue.join(lst[5:30]))
                    kernels.append(glue.join(lst[37:]))
                    tflops.append(lst[33])
@@ -99,7 +119,7 @@ def parse_logfile(logfile):
                    StrideA.append(lst[23])
                    StrideB.append(lst[26])
                    StrideC.append(lst[29])
-                elif len(lst)<37 and len(lst)>=33: #the tflops are available
+                elif len(lst) < 37 and len(lst) >= 33:  # the tflops are available
                    tests.append(glue.join(lst[5:30]))
                    kernels.append("N/A")
                    tflops.append(lst[33])
@@ -112,87 +132,141 @@ def parse_logfile(logfile):
                    StrideA.append(lst[23])
                    StrideB.append(lst[26])
                    StrideC.append(lst[29])
-                    print("warning: incomplete line:",lst)
-                elif len(lst)<33: #even the tflops are not available
+                    print("warning: incomplete line:", lst)
+                elif len(lst) < 33:  # even the tflops are not available
                    print("Error in ckProfiler output!")
-                    print("warning: incomplete line=",lst)
-        #sort results
-        #sorted_tests = sorted(tests)
-        res = [x for _,x in sorted(zip(tests,tflops))]
-        #sorted_kernels = [x for _,x in sorted(zip(tests,kernels))]
-        test_list=list(range(1,len(tests)+1))
-    #parse conv_fwd and conv_bwd performance tests:
-    elif 'conv_fwd' in logfile or 'conv_bwd' in logfile:
+                    print("warning: incomplete line=", lst)
+        # sort results
+        # sorted_tests = sorted(tests)
+        res = [x for _, x in sorted(zip(tests, tflops))]
+        # sorted_kernels = [x for _,x in sorted(zip(tests,kernels))]
+        # test_list = list(range(1, len(tests) + 1))
+    # parse conv_fwd and conv_bwd performance tests:
+    elif "conv_fwd" in logfile or "conv_bwd" in logfile:
        for line in open(logfile):
-            if 'tflops:' in line:
-                lst=line.split()
+            if "tflops:" in line:
+                lst = line.split()
                res.append(lst[1])
-    #parse all other performance tests:
-    elif 'resnet50' in logfile or 'batched_gemm' in logfile or 'grouped_gemm' in logfile  or 'gemm_bilinear' in logfile or 'reduction' in logfile:
+    # parse all other performance tests:
+    elif (
+        "resnet50" in logfile
+        or "batched_gemm" in logfile
+        or "grouped_gemm" in logfile
+        or "gemm_bilinear" in logfile
+        or "reduction" in logfile
+    ):
        for line in open(logfile):
-            if 'Best Perf' in line:
-                lst=line.split()
+            if "Best Perf" in line:
+                lst = line.split()
                res.append(lst[4])
-    elif 'onnx_gemm' in logfile:
+    elif "onnx_gemm" in logfile:
        for line in open(logfile):
-            if 'Best Perf' in line:
-                lst=line.split()
+            if "Best Perf" in line:
+                lst = line.split()
                res.append(lst[33])
-    elif 'splitK_gemm' in logfile or 'mixed_gemm' in logfile:
+    elif "splitK_gemm" in logfile or "mixed_gemm" in logfile:
        for line in open(logfile):
-            if 'Best Perf' in line:
-                lst=line.split()
+            if "Best Perf" in line:
+                lst = line.split()
                res.append(lst[36])
-    elif 'perf_fmha' in logfile:
+    elif "perf_fmha" in logfile:
        for line in open(logfile):
-            if 'TFlops' in line:
-                lst=line.split()
-                line_dict=dict(zip(lst[1:],lst))
-                res.append(line_dict['TFlops,'])
-    elif 'perf_tile_gemm_basic' in logfile or 'perf_tile_gemm_mem_pipeline' in logfile:
+            if "TFlops" in line:
+                lst = line.split()
+                line_dict = dict(zip(lst[1:], lst))
+                res.append(line_dict["TFlops,"])
+    elif "perf_tile_gemm_basic" in logfile or "perf_tile_gemm_mem_pipeline" in logfile:
        for line in open(logfile):
-            if 'TFlops' in line:
-                lst=line.split()
-                line_dict=dict(zip(lst[1:],lst))
-                res.append(line_dict['TFlops,'])
+            if "TFlops" in line:
+                lst = line.split()
+                line_dict = dict(zip(lst[1:], lst))
+                res.append(line_dict["TFlops,"])
    return res


 def get_baseline(table, connection):
-    query = text('''SELECT * from '''+table+''' WHERE Datetime = (SELECT MAX(Datetime) FROM '''+table+''' where Branch_ID='develop' );''')
+    query = text(
+        """SELECT * from """
+        + table
+        + """ WHERE Datetime = (SELECT MAX(Datetime) FROM """
+        + table
+        + """ where Branch_ID='develop' );"""
+    )
    return pd.read_sql(query, connection)

-def store_new_test_result(table_name, test_results, testlist, branch_name, node_id, gpu_arch, compute_units, rocm_vers, hip_vers, environment, connection):
-    params=[str(branch_name),str(node_id),str(gpu_arch),compute_units,str(rocm_vers),str(hip_vers),str(environment),str(datetime.datetime.now())]
-    df=pd.DataFrame(data=[params],columns=['Branch_ID','Node_ID','GPU_arch','Compute Units','ROCM_version','HIP_version','Environment','Datetime'])
-    df_add=pd.DataFrame(data=[test_results],columns=testlist)
-    df=pd.concat([df,df_add],axis=1)
-    #print("new test results dataframe:",df)
-    df.to_sql(table_name,connection,if_exists='append',index=False)
+
+def store_new_test_result(
+    table_name,
+    test_results,
+    testlist,
+    branch_name,
+    node_id,
+    gpu_arch,
+    compute_units,
+    rocm_vers,
+    hip_vers,
+    environment,
+    connection,
+):
+    params = [
+        str(branch_name),
+        str(node_id),
+        str(gpu_arch),
+        compute_units,
+        str(rocm_vers),
+        str(hip_vers),
+        str(environment),
+        str(datetime.datetime.now()),
+    ]
+    df = pd.DataFrame(
+        data=[params],
+        columns=[
+            "Branch_ID",
+            "Node_ID",
+            "GPU_arch",
+            "Compute Units",
+            "ROCM_version",
+            "HIP_version",
+            "Environment",
+            "Datetime",
+        ],
+    )
+    df_add = pd.DataFrame(data=[test_results], columns=testlist)
+    df = pd.concat([df, df_add], axis=1)
+    # print("new test results dataframe:",df)
+    df.to_sql(table_name, connection, if_exists="append", index=False)
    return 0

-def compare_test_to_baseline(baseline,test,testlist):
-    regression=0
+
+def compare_test_to_baseline(baseline, test, testlist):
+    regression = 0
    if not baseline.empty:
-        base=baseline[testlist].to_numpy(dtype='float')
-        base_list=base[0]
-        ave_perf=0
+        base = baseline[testlist].to_numpy(dtype="float")
+        base_list = base[0]
+        ave_perf = 0
        for i in range(len(base_list)):
            # success criterion:
-            if base_list[i]>1.01*float(test[i]):
-                print("test # ",i,"shows regression by {:.3f}%".format(
-                    (float(test[i])-base_list[i])/base_list[i]*100))
-                regression=1
-            if base_list[i]>0: ave_perf=ave_perf+float(test[i])/base_list[i]
-        if regression==0:
+            if base_list[i] > 1.01 * float(test[i]):
+                print(
+                    "test # ",
+                    i,
+                    "shows regression by {:.3f}%".format(
+                        (float(test[i]) - base_list[i]) / base_list[i] * 100
+                    ),
+                )
+                regression = 1
+            if base_list[i] > 0:
+                ave_perf = ave_perf + float(test[i]) / base_list[i]
+        if regression == 0:
            print("no regressions found")
-        ave_perf=ave_perf/len(base_list)
-        print("average performance relative to baseline:",ave_perf)
+        ave_perf = ave_perf / len(base_list)
+        print("average performance relative to baseline:", ave_perf)
    else:
        print("could not find a baseline")
    return regression

-'''
+
+"""
 def post_test_params(tlist,connection):
    sorted_dtypes = [x for _,x in sorted(zip(tests,dtype))]
    sorted_alayout = [x for _,x in sorted(zip(tests,alayout))]
@@ -223,29 +297,38 @@ def post_test_params(tlist,connection):
        'StrideC': Integer()
        }
    df.to_sql("ck_gemm_test_params",connection,if_exists='replace',index=False, dtype=dtypes)
-'''
+"""
+

 def main():
    args = parse_args()
-    results=[]
-    tflops_base=[]
-    testlist=[]
-    #parse the test parameters from the logfile
+    results = []
+    tflops_base = []
+    testlist = []
+    # parse the test parameters from the logfile
    for filename in args.files:
-        branch_name, node_id, gpu_arch, compute_units, rocm_vers, hip_vers, environment = get_log_params(filename)
+        (
+            branch_name,
+            node_id,
+            gpu_arch,
+            compute_units,
+            rocm_vers,
+            hip_vers,
+            environment,
+        ) = get_log_params(filename)

-    print("Branch name:",branch_name)
-    print("Node name:",node_id)
-    print("GPU_arch:",gpu_arch)
-    print("Compute units:",compute_units)
-    print("ROCM_version:",rocm_vers)
-    print("HIP_version:",hip_vers)
-    print("Environment:",environment)
-    #parse results, get the Tflops value for "Best Perf" kernels
-    results=parse_logfile(filename)
+    print("Branch name:", branch_name)
+    print("Node name:", node_id)
+    print("GPU_arch:", gpu_arch)
+    print("Compute units:", compute_units)
+    print("ROCM_version:", rocm_vers)
+    print("HIP_version:", hip_vers)
+    print("Environment:", environment)
+    # parse results, get the Tflops value for "Best Perf" kernels
+    results = parse_logfile(filename)

-    print("Number of tests:",len(results))
-    sql_hostname = '127.0.0.1'
+    print("Number of tests:", len(results))
+    sql_hostname = "127.0.0.1"
    sql_username = os.environ["dbuser"]
    sql_password = os.environ["dbpassword"]
    sql_main_database = os.environ["ck_perf_db"]
@@ -256,127 +339,147 @@ def main():
    ssh_pass = os.environ["dbsshpassword"]

    with SSHTunnelForwarder(
-            (ssh_host, ssh_port),
-            ssh_username=ssh_user,
-            ssh_password=ssh_pass,
-            remote_bind_address=(sql_hostname, sql_port)) as tunnel:
-
-        sqlEngine = sqlalchemy.create_engine('mysql+pymysql://{0}:{1}@{2}:{3}/{4}'.
-            format(sql_username, sql_password, sql_hostname, tunnel.local_bind_port, sql_main_database))
+        (ssh_host, ssh_port),
+        ssh_username=ssh_user,
+        ssh_password=ssh_pass,
+        remote_bind_address=(sql_hostname, sql_port),
+    ) as tunnel:
+        sqlEngine = sqlalchemy.create_engine(
+            "mysql+pymysql://{0}:{1}@{2}:{3}/{4}".format(
+                sql_username,
+                sql_password,
+                sql_hostname,
+                tunnel.local_bind_port,
+                sql_main_database,
+            )
+        )
        conn = sqlEngine.connect()

-        #save gemm performance tests:
-        if 'perf_gemm' in filename and 'gemm_bilinear' not in filename:
-            #write the ck_gemm_test_params table only needed once the test set changes
-            #post_test_params(test_list,conn)
-            for i in range(1,len(results)+1):
-                testlist.append("Test%i"%i)
-            table_name="ck_gemm_tflops"
-        if 'batched_gemm' in filename:
-            for i in range(1,len(results)+1):
-                testlist.append("Test%i"%i)
-            table_name="ck_batched_gemm_tflops"
-        if 'grouped_gemm' in filename:
-            for i in range(1,len(results)+1):
-                testlist.append("Test%i"%i)
-            table_name="ck_grouped_gemm_tflops"
-        if 'perf_conv_fwd' in filename:
-            for i in range(1,len(results)+1):
-                testlist.append("Test%i"%i)
-            table_name="ck_conv_fwd_tflops"
-        if 'perf_conv_bwd_data' in filename:
-            for i in range(1,len(results)+1):
-                testlist.append("Test%i"%i)
-            table_name="ck_conv_bwd_data_tflops"
-        if 'grouped_conv_fwd' in filename:
-            for i in range(1,len(results)+1):
-                testlist.append("Test%i"%i)
-            table_name="ck_grouped_conv_fwd_tflops"
-        if 'grouped_conv_bwd_data' in filename:
-            for i in range(1,len(results)+1):
-                testlist.append("Test%i"%i)
-            table_name="ck_grouped_conv_bwd_data_tflops"
-        if 'grouped_conv_bwd_weight' in filename:
-            for i in range(1,len(results)+1):
-                testlist.append("Test%i"%i)
-            table_name="ck_grouped_conv_bwd_weight_tflops"
-        if 'gemm_bilinear' in filename:
-            for i in range(1,len(results)+1):
-                testlist.append("Test%i"%i)
-            table_name="ck_gemm_bilinear_tflops"
-        if 'reduction' in filename:
-            for i in range(1,len(results)+1):
-                testlist.append("Test%i"%i)
-            table_name="ck_reduction_GBps"
-        if 'resnet50_N4' in filename:
-            for i in range(1,50):
-                testlist.append("Layer%i"%i)
-            table_name="ck_resnet50_N4_tflops"
-        if 'resnet50_N256' in filename:
-            for i in range(1,50):
-                testlist.append("Layer%i"%i)
-            table_name="ck_resnet50_N256_tflops"
-        if 'onnx_gemm' in filename:
-            for i in range(1,len(results)+1):
-                testlist.append("Test%i"%i)
-            table_name="ck_onnx_gemm_tflops"
-        if 'splitK_gemm' in filename:
-            for i in range(1,len(results)+1):
-                testlist.append("Test%i"%i)
-            table_name="ck_splitK_gemm_tflops"
-        if 'mixed_gemm' in filename:
-            for i in range(1,len(results)+1):
-                testlist.append("Test%i"%i)
-            table_name="ck_mixed_gemm_tflops"
-        if 'fmha_fwd' in filename:
-            for i in range(1,len(results)+1):
-                testlist.append("Test%i"%i)
-            table_name="ck_fmha_fwd_tflops"
-        if 'fmha_bwd' in filename:
-            for i in range(1,len(results)+1):
-                testlist.append("Test%i"%i)
-            table_name="ck_fmha_bwd_tflops"
-        if 'gemm_basic_fp16' in filename:
-            for i in range(1, len(results)+1):
-                testlist.append("Test%i"%i)
-            table_name="ck_tile_gemm_basic_fp16_tflops"
-        if 'gemm_mem_pipeline_fp16' in filename:
-            for i in range(1, len(results)+1):
-                testlist.append("Test%i"%i)
-            table_name="ck_tile_gemm_mem_pipeline_fp16_tflops"
-        if 'gemm_basic_bf16' in filename:
-            for i in range(1, len(results)+1):
-                testlist.append("Test%i"%i)
-            table_name="ck_tile_gemm_basic_bf16_tflops"
-        if 'gemm_mem_pipeline_bf16' in filename:
-            for i in range(1, len(results)+1):
-                testlist.append("Test%i"%i)
-            table_name="ck_tile_gemm_mem_pipeline_bf16_tflops"
-        if 'gemm_basic_fp8' in filename:
-            for i in range(1, len(results)+1):
-                testlist.append("Test%i"%i)
-            table_name="ck_tile_gemm_basic_fp8_tflops"
-        if 'gemm_mem_pipeline_fp8' in filename:
-            for i in range(1, len(results)+1):
-                testlist.append("Test%i"%i)
-            table_name="ck_tile_gemm_mem_pipeline_fp8_tflops"
-        if 'gemm_basic_bf8' in filename:
-            for i in range(1, len(results)+1):
-                testlist.append("Test%i"%i)
-            table_name="ck_tile_gemm_basic_bf8_tflops"
-        if 'gemm_mem_pipeline_bf8' in filename:
-            for i in range(1, len(results)+1):
-                testlist.append("Test%i"%i)
-            table_name="ck_tile_gemm_mem_pipeline_bf8_tflops"
+        # save gemm performance tests:
+        if "perf_gemm" in filename and "gemm_bilinear" not in filename:
+            # write the ck_gemm_test_params table only needed once the test set changes
+            # post_test_params(test_list,conn)
+            for i in range(1, len(results) + 1):
+                testlist.append("Test%i" % i)
+            table_name = "ck_gemm_tflops"
+        if "batched_gemm" in filename:
+            for i in range(1, len(results) + 1):
+                testlist.append("Test%i" % i)
+            table_name = "ck_batched_gemm_tflops"
+        if "grouped_gemm" in filename:
+            for i in range(1, len(results) + 1):
+                testlist.append("Test%i" % i)
+            table_name = "ck_grouped_gemm_tflops"
+        if "perf_conv_fwd" in filename:
+            for i in range(1, len(results) + 1):
+                testlist.append("Test%i" % i)
+            table_name = "ck_conv_fwd_tflops"
+        if "perf_conv_bwd_data" in filename:
+            for i in range(1, len(results) + 1):
+                testlist.append("Test%i" % i)
+            table_name = "ck_conv_bwd_data_tflops"
+        if "grouped_conv_fwd" in filename:
+            for i in range(1, len(results) + 1):
+                testlist.append("Test%i" % i)
+            table_name = "ck_grouped_conv_fwd_tflops"
+        if "grouped_conv_bwd_data" in filename:
+            for i in range(1, len(results) + 1):
+                testlist.append("Test%i" % i)
+            table_name = "ck_grouped_conv_bwd_data_tflops"
+        if "grouped_conv_bwd_weight" in filename:
+            for i in range(1, len(results) + 1):
+                testlist.append("Test%i" % i)
+            table_name = "ck_grouped_conv_bwd_weight_tflops"
+        if "gemm_bilinear" in filename:
+            for i in range(1, len(results) + 1):
+                testlist.append("Test%i" % i)
+            table_name = "ck_gemm_bilinear_tflops"
+        if "reduction" in filename:
+            for i in range(1, len(results) + 1):
+                testlist.append("Test%i" % i)
+            table_name = "ck_reduction_GBps"
+        if "resnet50_N4" in filename:
+            for i in range(1, 50):
+                testlist.append("Layer%i" % i)
+            table_name = "ck_resnet50_N4_tflops"
+        if "resnet50_N256" in filename:
+            for i in range(1, 50):
+                testlist.append("Layer%i" % i)
+            table_name = "ck_resnet50_N256_tflops"
+        if "onnx_gemm" in filename:
+            for i in range(1, len(results) + 1):
+                testlist.append("Test%i" % i)
+            table_name = "ck_onnx_gemm_tflops"
+        if "splitK_gemm" in filename:
+            for i in range(1, len(results) + 1):
+                testlist.append("Test%i" % i)
+            table_name = "ck_splitK_gemm_tflops"
+        if "mixed_gemm" in filename:
+            for i in range(1, len(results) + 1):
+                testlist.append("Test%i" % i)
+            table_name = "ck_mixed_gemm_tflops"
+        if "fmha_fwd" in filename:
+            for i in range(1, len(results) + 1):
+                testlist.append("Test%i" % i)
+            table_name = "ck_fmha_fwd_tflops"
+        if "fmha_bwd" in filename:
+            for i in range(1, len(results) + 1):
+                testlist.append("Test%i" % i)
+            table_name = "ck_fmha_bwd_tflops"
+        if "gemm_basic_fp16" in filename:
+            for i in range(1, len(results) + 1):
+                testlist.append("Test%i" % i)
+            table_name = "ck_tile_gemm_basic_fp16_tflops"
+        if "gemm_mem_pipeline_fp16" in filename:
+            for i in range(1, len(results) + 1):
+                testlist.append("Test%i" % i)
+            table_name = "ck_tile_gemm_mem_pipeline_fp16_tflops"
+        if "gemm_basic_bf16" in filename:
+            for i in range(1, len(results) + 1):
+                testlist.append("Test%i" % i)
+            table_name = "ck_tile_gemm_basic_bf16_tflops"
+        if "gemm_mem_pipeline_bf16" in filename:
+            for i in range(1, len(results) + 1):
+                testlist.append("Test%i" % i)
+            table_name = "ck_tile_gemm_mem_pipeline_bf16_tflops"
+        if "gemm_basic_fp8" in filename:
+            for i in range(1, len(results) + 1):
+                testlist.append("Test%i" % i)
+            table_name = "ck_tile_gemm_basic_fp8_tflops"
+        if "gemm_mem_pipeline_fp8" in filename:
+            for i in range(1, len(results) + 1):
+                testlist.append("Test%i" % i)
+            table_name = "ck_tile_gemm_mem_pipeline_fp8_tflops"
+        if "gemm_basic_bf8" in filename:
+            for i in range(1, len(results) + 1):
+                testlist.append("Test%i" % i)
+            table_name = "ck_tile_gemm_basic_bf8_tflops"
+        if "gemm_mem_pipeline_bf8" in filename:
+            for i in range(1, len(results) + 1):
+                testlist.append("Test%i" % i)
+            table_name = "ck_tile_gemm_mem_pipeline_bf8_tflops"

-        tflops_base = get_baseline(table_name,conn)
-        store_new_test_result(table_name, results, testlist, branch_name, node_id, gpu_arch, compute_units, rocm_vers, hip_vers, environment, sqlEngine)
+        tflops_base = get_baseline(table_name, conn)
+        store_new_test_result(
+            table_name,
+            results,
+            testlist,
+            branch_name,
+            node_id,
+            gpu_arch,
+            compute_units,
+            rocm_vers,
+            hip_vers,
+            environment,
+            sqlEngine,
+        )
        conn.close()

-    #compare the results to the baseline if baseline exists
-    regression=0
-    regression=compare_test_to_baseline(tflops_base,results,testlist)
+    # compare the results to the baseline if baseline exists
+    regression = 0
+    regression = compare_test_to_baseline(tflops_base, results, testlist)
    return regression

-if __name__ == '__main__':
+
+if __name__ == "__main__":
    main()
--- a/script/remod_for_ck_tile.sh
+++ b/script/remod_for_ck_tile.sh
@@ -2,18 +2,6 @@
 # Copyright © Advanced Micro Devices, Inc., or its affiliates.
 # SPDX-License-Identifier: MIT

-# Get list of staged files
-STAGED_FILES=$(git diff --cached --name-only)
-
-# Check if any staged file is under include/ck_tile/ or example/ck_tile/
-if echo "$STAGED_FILES" | grep -qE '^(include/ck_tile/|example/ck_tile/)'; then
-    echo "Detected changes in ck_tile-related files. Running remod.py..."
-
-    # Run remod.py in both required locations
-    (cd include/ck_tile/ && python3 remod.py)
-    (cd example/ck_tile/ && python3 remod.py)
-
-    echo "remod.py completed."
-else
-    echo "No changes in ck_tile-related files. Skipping remod.py."
-fi
+# Run remod.py in both required locations
+(cd include/ck_tile/ && python3 remod.py)
+(cd example/ck_tile/ && python3 remod.py)
--- a/script/run_ck_profiler_gemm_with_csv_shapes.py
+++ b/script/run_ck_profiler_gemm_with_csv_shapes.py
@@ -71,7 +71,7 @@ def tuples(filename):
            try:
                m, n, k = map(int, line)
                lines.append((m, n, k))
-            except:
+            except Exception:
                pass
    return lines

@@ -163,19 +163,19 @@ def run_shape(shape, profiler_bin, op_name, dtype, layout):
    m, n, k = shape
    try:
        op = OPs[op_name]
-    except:
+    except KeyError:
        raise AssertionError(f"Invalid operator {op_name}")
    name_arg = op.name
    op_wrapper = op.value()

    try:
        dtype_arg = str(op_wrapper.dtype[dtype].value)
-    except:
+    except KeyError:
        raise AssertionError(f"Invalid dtype for {op_name}: {dtype}")

    try:
        layout_wrapper = op_wrapper.layout[layout]
-    except:
+    except KeyError:
        raise AssertionError(f"Invalid layout for {op_name}: {layout}")
    layout_arg = str(layout_wrapper.value)
    # verification: no, initialization: decimal, print tensor: no, time kernel: yes
@@ -286,7 +286,9 @@ def main():
    try:
        from tqdm import tqdm as iterate
    except ImportError:
-        iterate = lambda x: x
+
+        def iterate(x):
+            return x

    for s in iterate(shapes):
        run_shape_stdout_lines = run_shape(