feat(precommit-hooks): add check for correct copyright header (#3302)

* chore(copyright): update copyright header for left files * feat(copyright): add copyright check to precommit hooks * chore(copyright): update copyright header for include/ck_tile directory * chore(copyright): update copyright header for example directory * chore(copyright): update copyright header for .github directory * refactor: copyright_check script with better if else handling * chore(copyright): update compyright header for remaining files * feat: add script to automate copyright addition
2026-04-19 22:39:03 +00:00 · 2025-12-11 10:50:43 +04:00
parent fbbdd36ea8
commit 6d25525adc
40 changed files with 408 additions and 78 deletions
--- a/script/check_copyright_year.sh
+++ b/script/check_copyright_year.sh
@@ -2,18 +2,70 @@
 # Copyright (c) Advanced Micro Devices, Inc., or its affiliates.
 # SPDX-License-Identifier: MIT

+# This script checks if files have the correct copyright header template.
+# It supports .hpp, .cpp, .inc, .py, .sh, and .cmake files.
+#
+# Usage: ./check_copyright_year.sh <file1> <file2> ...

-current_year=$(date +%Y)
 exit_code=0

-for file in $@; do
-    if grep -q "Copyright (c)" $file
-    then
-        if ! grep -q "Copyright (c).*$current_year" $file
-        then
-            echo "ERROR: File $file has a copyright notice without the current year ($current_year)."
-            exit_code=1
-        fi
+# Expected copyright header lines (without comment characters)
+COPYRIGHT_LINE="Copyright (c) Advanced Micro Devices, Inc., or its affiliates."
+SPDX_LINE="SPDX-License-Identifier: MIT"
+
+check_file() {
+    local file=$1
+    local basename="${file##*/}"
+    local ext="${file##*.}"
+    local comment_char
+
+    # Determine comment character based on filename or extension
+    if [[ "$basename" == "CMakeLists.txt" ]]; then
+        comment_char="#"
+    else
+        case "$ext" in
+            cpp|hpp|inc)
+                comment_char="//"
+                ;;
+            py|sh|cmake)
+                comment_char="#"
+                ;;
+            *)
+                # Skip files with unsupported extensions
+                return 0
+                ;;
+        esac
+    fi
+
+    # Build expected header patterns
+    expected_copyright="$comment_char $COPYRIGHT_LINE"
+    expected_spdx="$comment_char $SPDX_LINE"
+
+    # Check if file contains both required lines
+    if ! grep -qF "$expected_copyright" "$file"; then
+        echo "ERROR: File $file is missing the correct copyright header line."
+        echo "  Expected: $expected_copyright"
+        return 1
+    fi
+
+    if ! grep -qF "$expected_spdx" "$file"; then
+        echo "ERROR: File $file is missing the correct SPDX license identifier line."
+        echo "  Expected: $expected_spdx"
+        return 1
+    fi
+
+    return 0
+}
+
+# Process each file provided as argument
+for file in "$@"; do
+    # Skip if file doesn't exist or is a directory
+    if [[ ! -f "$file" ]]; then
+        continue
+    fi
+
+    if ! check_file "$file"; then
+        exit_code=1
    fi
 done

--- a/script/update_amd_copyright_headers.py
+++ b/script/update_amd_copyright_headers.py
@@ -0,0 +1,295 @@
+#!/usr/bin/env python3
+
+# Copyright (c) Advanced Micro Devices, Inc., or its affiliates.
+# SPDX-License-Identifier: MIT
+
+"""
+Purpose:
+  Normalize and enforce AMD two-line copyright + SPDX headers across files.
+
+Target files:
+  - C/C++-style: .cpp, .hpp, .inc  -> uses "//" comment style
+  - Hash-style:  .py, .cmake, .sh, and CMakeLists.txt -> uses "#" style
+
+Header formats inserted (top of file, followed by exactly one blank line):
+  C/C++  :
+    // Copyright (c) Advanced Micro Devices, Inc., or its affiliates.
+    // SPDX-License-Identifier: MIT
+    <blank>
+  Hash   :
+    <blank>
+
+Shebang special case (hash-style only):
+  - If line 1 starts with "#!", keep shebang, then a blank line, then the
+    two hash-style header lines, then a blank line.
+
+Removal rules:
+  - Remove any comment lines (anywhere in file) containing the keywords
+    "copyright" or "spdx" (case-insensitive). Blank lines are preserved.
+  - Remove long-form MIT license block comment when:
+      a) The file starts with the block (absolute top), OR
+      b) The block appears immediately after the AMD header position
+         (i.e., when remainder at insertion point begins with "/*" and
+         the first content line is "* The MIT License (MIT)").
+
+Blank-line normalization:
+  - Enforce exactly ONE blank line immediately after the AMD header.
+    (Drop only the leading blank lines at the insertion point before
+     re-inserting the header.)
+  - Do not change blank lines between other non-copyright comments.
+
+Preservation:
+  - Preserve original newline style: CRLF (\r\n) vs LF (\n).
+  - Preserve UTF-8 BOM if present.
+  - Do not modify non-comment code lines.
+
+Idempotency:
+  - Running this script multiple times does not further modify files.
+"""
+
+from __future__ import annotations
+import re
+import sys
+from pathlib import Path
+from typing import List, Tuple
+
+AMD_CPP_HEADER_TEXT = [
+    "// Copyright (c) Advanced Micro Devices, Inc., or its affiliates.",
+    "// SPDX-License-Identifier: MIT",
+]
+AMD_HASH_HEADER_TEXT = [
+    "# Copyright (c) Advanced Micro Devices, Inc., or its affiliates.",
+    "# SPDX-License-Identifier: MIT",
+]
+
+CPP_EXTS = {".cpp", ".hpp", ".inc"}
+HASH_EXTS = {".py", ".cmake", ".sh"}
+
+# --- Encoding helpers -------------------------------------------------------
+
+
+def has_bom(raw: bytes) -> bool:
+    return raw.startswith(b"\xef\xbb\xbf")
+
+
+def decode_text(raw: bytes) -> str:
+    return raw.decode("utf-8-sig", errors="replace")
+
+
+def encode_text(text: str, bom: bool) -> bytes:
+    data = text.encode("utf-8")
+    return (b"\xef\xbb\xbf" + data) if bom else data
+
+
+# --- Newline detection ------------------------------------------------------
+
+
+def detect_newline_sequence(raw: bytes) -> str:
+    if b"\r\n" in raw:
+        return "\r\n"
+    elif b"\n" in raw:
+        return "\n"
+    else:
+        return "\n"
+
+
+# --- Utilities --------------------------------------------------------------
+
+
+def is_comment_line(line: str, style: str) -> bool:
+    stripped = line.lstrip()
+    if style == "cpp":
+        return (
+            stripped.startswith("//")
+            or stripped.startswith("/*")
+            or stripped.startswith("*")
+            or stripped.startswith("*/")
+        )
+    elif style == "hash":
+        return stripped.startswith("#")
+    return False
+
+
+def has_keywords(line: str) -> bool:
+    lower_line = line.lower()
+    return ("copyright" in lower_line) or ("spdx" in lower_line)
+
+
+# --- MIT License banner detection ------------------------------
+MIT_C_FIRST_LINE_RE = re.compile(r"^\s*\*\s*The MIT License \(MIT\)")
+MIT_HASH_FIRST_LINE_RE = re.compile(r"^\s*#\s*The MIT License \(MIT\)")
+
+
+def remove_top_mit_block(lines: List[str]) -> Tuple[List[str], bool]:
+    """
+    Unified MIT banner removal at the top of 'lines'.
+    Supports:
+      - C-style block starting with '/*' and ending with '*/'; removes only if
+        a line within the block matches MIT_C_FIRST_LINE_RE.
+      - Hash-style banner: contiguous top run of lines starting with '#';
+        removes only if any line in that run matches MIT_HASH_FIRST_LINE_RE.
+    Returns (new_lines, removed_flag). Preserves EOLs.
+    """
+    if not lines:
+        return lines, False
+
+    first = lines[0].lstrip()
+
+    # C-style block
+    if first.startswith("/*"):
+        end_idx, saw_mit = None, False
+        for i, line in enumerate(lines[1:], 1):
+            if not saw_mit and MIT_C_FIRST_LINE_RE.match(line):
+                saw_mit = True
+            s = line.lstrip()
+            if s.startswith("*/") or s.rstrip().endswith("*/"):
+                end_idx = i + 1
+                break
+        if end_idx is not None and saw_mit:
+            return lines[end_idx:], True
+        return lines, False
+
+    # Hash-style contiguous banner
+    if first.startswith("#"):
+        end_idx, saw_mit = 0, False
+        for i, line in enumerate(lines):
+            if line.lstrip().startswith("#"):
+                if not saw_mit and MIT_HASH_FIRST_LINE_RE.match(line):
+                    saw_mit = True
+                end_idx = i + 1
+            else:
+                break
+        if saw_mit:
+            return lines[end_idx:], True
+        return lines, False
+
+    return lines, False
+
+
+# --- Removal + normalization helpers ---------------------------------------
+
+
+def remove_keyword_comment_lines_globally(lines: List[str], style: str) -> List[str]:
+    """Remove comment lines containing keywords anywhere in the file.
+    **Do not** remove blank lines; preserve all other lines as-is."""
+    out: List[str] = []
+    for line in lines:
+        if is_comment_line(line, style) and has_keywords(line):
+            continue
+        out.append(line)
+    return out
+
+
+def drop_leading_blank_lines(lines: List[str]) -> List[str]:
+    """Drop only the leading blank lines at the start of the given list."""
+    i = 0
+    while i < len(lines) and lines[i].strip() == "":
+        i += 1
+    return lines[i:]
+
+
+# --- Header builder ---------------------------------------------------------
+
+
+def build_header_lines(style: str, nl: str) -> List[str]:
+    base = AMD_CPP_HEADER_TEXT if style == "cpp" else AMD_HASH_HEADER_TEXT
+    return [base[0] + nl, base[1] + nl, nl]  # header + exactly one blank
+
+
+# --- Main transforms --------------------------------------------------------
+
+
+def process_cpp(text: str, nl: str) -> str:
+    lines = text.splitlines(True)
+
+    # Remove MIT block if it is at the *absolute* top
+    lines, _ = remove_top_mit_block(lines)
+
+    # Remove keyworded comment lines globally (blank lines preserved)
+    lines = remove_keyword_comment_lines_globally(lines, style="cpp")
+
+    # Normalize insertion point and remove MIT block if it appears *after header*
+    lines = drop_leading_blank_lines(lines)
+    lines, _ = remove_top_mit_block(lines)
+
+    # Prepend AMD header (guarantee exactly one blank after)
+    return "".join(build_header_lines("cpp", nl) + lines)
+
+
+def process_hash(text: str, nl: str) -> str:
+    lines = text.splitlines(True)
+    if not lines:
+        return "".join(build_header_lines("hash", nl))
+
+    shebang = lines[0].startswith("#!")
+
+    if shebang:
+        remainder = remove_keyword_comment_lines_globally(lines[1:], style="hash")
+        remainder = drop_leading_blank_lines(remainder)
+        remainder, _ = remove_top_mit_block(remainder)  # remove MIT block after header
+        new_top = [lines[0], nl] + build_header_lines("hash", nl)
+        return "".join(new_top + remainder)
+    else:
+        remainder = remove_keyword_comment_lines_globally(lines, style="hash")
+        remainder = drop_leading_blank_lines(remainder)
+        remainder, _ = remove_top_mit_block(remainder)  # remove MIT block after header
+        return "".join(build_header_lines("hash", nl) + remainder)
+
+
+# --- File processing & CLI --------------------------------------------------
+
+
+def process_file(path: Path) -> bool:
+    name = path.name
+    suffix = path.suffix.lower()
+    if suffix in CPP_EXTS:
+        style = "cpp"
+    elif suffix in HASH_EXTS or name == "CMakeLists.txt":
+        style = "hash"
+    else:
+        return False
+
+    raw = path.read_bytes()
+    bom = has_bom(raw)
+    nl = detect_newline_sequence(raw)
+    text = decode_text(raw)
+
+    updated = process_cpp(text, nl) if style == "cpp" else process_hash(text, nl)
+    if updated != text:
+        path.write_bytes(encode_text(updated, bom))
+        return True
+    return False
+
+
+def main(argv: List[str]) -> int:
+    if len(argv) < 2:
+        print(__doc__)
+        return 2
+    changed = 0
+    skipped = 0
+    errors: List[str] = []
+    for arg in argv[1:]:
+        p = Path(arg)
+        try:
+            if not p.exists():
+                errors.append(f"Not found: {p}")
+                continue
+            if p.is_dir():
+                errors.append(f"Is a directory (pass specific files): {p}")
+                continue
+            if process_file(p):
+                changed += 1
+                print(f"Updated: {p}")
+            else:
+                skipped += 1
+                print(f"Skipped (no change needed or unsupported type): {p}")
+        except Exception as e:
+            errors.append(f"Error processing {p}: {e}")
+    print(f"\nSummary: {changed} updated, {skipped} skipped, {len(errors)} errors")
+    for msg in errors:
+        print(f" - {msg}")
+    return 0 if not errors else 1
+
+
+if __name__ == "__main__":
+    raise SystemExit(main(sys.argv))