mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-11 08:50:17 +00:00
303 lines
12 KiB
Python
Executable File
303 lines
12 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
# filepath: /home/AMD/vpietila/git/rocm-libraries/projects/composablekernel/experimental/grouped_convolution_tile_instances/check_instances.py
|
|
|
|
"""
|
|
Script to check which backward weight convolution instances compile successfully.
|
|
Compiles each .cpp file independently and reports failures grouped by layout/datatype.
|
|
"""
|
|
|
|
import subprocess
|
|
import sys
|
|
from pathlib import Path
|
|
from collections import defaultdict
|
|
import argparse
|
|
import tempfile
|
|
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
import re
|
|
|
|
# Configuration
|
|
CXX_COMPILER = "/opt/rocm/bin/hipcc"
|
|
CXX_STANDARD = "20"
|
|
|
|
def get_include_dirs(project_root: Path) -> list[str]:
|
|
"""Get the include directories needed for compilation."""
|
|
return [
|
|
str(project_root / "build" / "include"),
|
|
str(project_root / "include"),
|
|
str(project_root / "library" / "include"),
|
|
str(project_root / "experimental" / "builder" / "include"),
|
|
str(project_root / "experimental" / "builder" / "test" / "utils"),
|
|
str(project_root / "experimental" / "grouped_convolution_tile_instances" / "instances"),
|
|
]
|
|
|
|
def compile_single_file(cpp_file: Path, project_root: Path, gpu_target: str, verbose: bool) -> tuple[bool, str]:
|
|
"""
|
|
Attempt to compile a single .cpp file.
|
|
Returns (success, error_message).
|
|
"""
|
|
include_dirs = get_include_dirs(project_root)
|
|
include_flags = [f"-I{d}" for d in include_dirs]
|
|
|
|
# Create a temporary directory for output
|
|
with tempfile.TemporaryDirectory() as tmpdir:
|
|
output_file = Path(tmpdir) / "output.o"
|
|
|
|
cmd = [
|
|
CXX_COMPILER,
|
|
"-c", # Compile only, don't link
|
|
f"-std=c++{CXX_STANDARD}",
|
|
f"--offload-arch={gpu_target}",
|
|
"-D__HIP_PLATFORM_AMD__",
|
|
"-D CK_EXPERIMENTAL_BUILDER=ON",
|
|
"-O3",
|
|
*include_flags,
|
|
str(cpp_file),
|
|
"-o", str(output_file)
|
|
]
|
|
|
|
try:
|
|
result = subprocess.run(
|
|
cmd,
|
|
capture_output=True,
|
|
text=True,
|
|
timeout=300 # 5 minute timeout per file
|
|
)
|
|
|
|
if result.returncode == 0:
|
|
return True, ""
|
|
else:
|
|
# Extract the key error message
|
|
error_output = result.stderr
|
|
return False, error_output
|
|
|
|
except subprocess.TimeoutExpired:
|
|
return False, "TIMEOUT: Compilation took too long"
|
|
except Exception as e:
|
|
return False, f"EXCEPTION: {str(e)}"
|
|
|
|
def extract_key_error(error_output: str) -> str:
|
|
"""Extract the most relevant error message from compiler output."""
|
|
lines = error_output.split('\n')
|
|
for line in lines:
|
|
if 'error:' in line:
|
|
return line.strip()
|
|
# Return first non-empty line if no explicit error found
|
|
for line in lines:
|
|
if line.strip():
|
|
return line.strip()[:200] # Limit length
|
|
return "Unknown error"
|
|
|
|
def find_instance_files(instances_dir: Path, direction: str = "backward_weight") -> dict[str, list[Path]]:
|
|
"""
|
|
Find all instance .cpp files grouped by subdirectory (layout/datatype).
|
|
Returns dict: subdirectory_name -> list of cpp files
|
|
"""
|
|
target_dir = instances_dir / direction
|
|
if not target_dir.exists():
|
|
print(f"Error: Directory {target_dir} does not exist")
|
|
sys.exit(1)
|
|
|
|
files_by_subdir = defaultdict(list)
|
|
|
|
for subdir in sorted(target_dir.iterdir()):
|
|
if subdir.is_dir():
|
|
cpp_files = sorted(subdir.glob("*.cpp"))
|
|
if cpp_files:
|
|
files_by_subdir[subdir.name] = cpp_files
|
|
|
|
return files_by_subdir
|
|
|
|
def parse_args():
|
|
parser = argparse.ArgumentParser(description="Check which convolution instances compile")
|
|
parser.add_argument("--direction", default="backward_weight",
|
|
choices=["forward", "backward_weight", "backward_data"],
|
|
help="Convolution direction to check")
|
|
parser.add_argument("--subdir", default=None,
|
|
help="Only check specific subdirectory (e.g., 'nhwgc_bf16')")
|
|
parser.add_argument("--max-files", type=int, default=None,
|
|
help="Maximum number of files to check per subdirectory")
|
|
parser.add_argument("--verbose", "-v", action="store_true",
|
|
help="Show verbose output including compile commands")
|
|
parser.add_argument("--output", "-o", default=None,
|
|
help="Output file for the blacklist")
|
|
parser.add_argument("--project-root", default=None,
|
|
help="Project root directory (auto-detected if not specified)")
|
|
parser.add_argument("--instance", type=int, default=None,
|
|
help="Only check a single instance by its index in the config file.")
|
|
parser.add_argument(
|
|
"--parallel-jobs",
|
|
"-j",
|
|
type=int,
|
|
default=1,
|
|
help="Number of parallel compilation jobs (default: 1)",
|
|
)
|
|
parser.add_argument("--gpu-target", type=str, default="gfx950", help="GPU target architecture (default: gfx950)")
|
|
|
|
args = parser.parse_args()
|
|
|
|
return args
|
|
|
|
def main():
|
|
|
|
args = parse_args()
|
|
|
|
# Find project root
|
|
if args.project_root:
|
|
project_root = Path(args.project_root)
|
|
else:
|
|
# Assume script is in experimental/grouped_convolution_tile_instances/
|
|
script_dir = Path(__file__).resolve().parent
|
|
project_root = script_dir.parent.parent # Go up to composablekernel/
|
|
|
|
instances_dir = project_root / "experimental" / "grouped_convolution_tile_instances" / "instances"
|
|
|
|
print(f"Project root: {project_root}")
|
|
print(f"Instances directory: {instances_dir}")
|
|
print(f"Compiler: {CXX_COMPILER}")
|
|
print(f"GPU Target: {args.gpu_target}")
|
|
print(f"Direction: {args.direction}")
|
|
if args.instance is not None:
|
|
print(f"Checking only instance index: {args.instance}")
|
|
print()
|
|
|
|
# Find all instance files
|
|
files_by_subdir = find_instance_files(instances_dir, args.direction)
|
|
|
|
# If sub directory is defined, check only that sub directory
|
|
if args.subdir:
|
|
if args.subdir not in files_by_subdir:
|
|
print(f"Error: Subdirectory '{args.subdir}' not found")
|
|
print(f"Available: {list(files_by_subdir.keys())}")
|
|
sys.exit(1)
|
|
files_by_subdir = {args.subdir: files_by_subdir[args.subdir]}
|
|
|
|
if args.instance is not None:
|
|
# If instance index is specified, find the corresponding file for each subdir
|
|
instance_files = {}
|
|
for subdir, files in files_by_subdir.items():
|
|
if args.instance >= 0:
|
|
target_suffix = f"_{args.instance}.cpp"
|
|
matched_files = [f for f in files if f.name.endswith(target_suffix)]
|
|
if matched_files:
|
|
assert len(matched_files) == 1, f"Expected exactly one file ending with {target_suffix} in {subdir}, found {len(matched_files)}"
|
|
instance_files[subdir] = matched_files
|
|
else:
|
|
if args.subdir is None:
|
|
print(f"Warning: Subdirectory '{subdir}' does not have instance index {args.instance}")
|
|
files_by_subdir = instance_files
|
|
|
|
if args.subdir:
|
|
if args.instance is not None and args.subdir not in files_by_subdir:
|
|
print(f"Instance index {args.instance} was not found in subdirectory '{args.subdir}'")
|
|
sys.exit(1)
|
|
elif args.subdir not in files_by_subdir:
|
|
print(f"Error: Subdirectory '{args.subdir}' not found")
|
|
print(f"Available: {list(files_by_subdir.keys())}")
|
|
sys.exit(1)
|
|
files_by_subdir = {args.subdir: files_by_subdir[args.subdir]}
|
|
|
|
# Track results
|
|
all_failures = defaultdict(list) # subdir -> list of (filename, error)
|
|
all_successes = defaultdict(list) # subdir -> list of filenames
|
|
error_types = defaultdict(set) # error_key -> set of files
|
|
|
|
total_files = sum(len(files) for files in files_by_subdir.values())
|
|
if args.max_files:
|
|
total_files = min(total_files, args.max_files * len(files_by_subdir))
|
|
|
|
print(f"Found {total_files} instance files to check")
|
|
print("=" * 60)
|
|
|
|
checked = 0
|
|
for subdir_name, cpp_files in sorted(files_by_subdir.items()):
|
|
print(f"\nChecking {subdir_name}...", flush=True)
|
|
|
|
files_to_check = cpp_files
|
|
if args.max_files:
|
|
files_to_check = cpp_files[:args.max_files]
|
|
|
|
if args.parallel_jobs > 1:
|
|
# Parallel compilation
|
|
with ThreadPoolExecutor(max_workers=args.parallel_jobs) as executor:
|
|
# Submit all compilation jobs
|
|
futures = {
|
|
executor.submit(compile_single_file, cpp_file, project_root, args.gpu_target, args.verbose): cpp_file
|
|
for cpp_file in files_to_check
|
|
}
|
|
|
|
# Process results as they complete
|
|
for future in as_completed(futures):
|
|
cpp_file = futures[future]
|
|
filename = cpp_file.name
|
|
checked += 1
|
|
success, error = future.result()
|
|
|
|
if success:
|
|
print(f" [{checked}/{total_files}] {filename}... OK", flush=True)
|
|
all_successes[subdir_name].append(filename)
|
|
else:
|
|
key_error = extract_key_error(error)
|
|
print(f" [{checked}/{total_files}] {filename}... FAILED", flush=True)
|
|
if args.verbose:
|
|
print(f" Error: {key_error}")
|
|
all_failures[subdir_name].append((filename, key_error))
|
|
error_types[key_error].add(f"{subdir_name}/{filename}")
|
|
else:
|
|
# Sequential compilation
|
|
print(f"Compiling {len(files_to_check)} files sequentially...")
|
|
for cpp_file in files_to_check:
|
|
checked += 1
|
|
filename = cpp_file.name
|
|
|
|
if args.verbose:
|
|
print(f" [{checked}/{total_files}] {filename}...", end=" ", flush=True)
|
|
else:
|
|
print(f" [{checked}/{total_files}] {filename}...", end=" ", flush=True)
|
|
|
|
success, error = compile_single_file(cpp_file, project_root, args.gpu_target, args.verbose)
|
|
|
|
if success:
|
|
print("OK")
|
|
all_successes[subdir_name].append(filename)
|
|
else:
|
|
key_error = extract_key_error(error)
|
|
print(f"FAILED")
|
|
if args.verbose:
|
|
print(f" Error: {key_error}")
|
|
all_failures[subdir_name].append((filename, key_error))
|
|
error_types[key_error].add(f"{subdir_name}/{filename}")
|
|
|
|
# Print summary
|
|
print("\n" + "=" * 60)
|
|
print("SUMMARY")
|
|
print("=" * 60)
|
|
|
|
for subdir_name in sorted(files_by_subdir.keys()):
|
|
successes = len(all_successes.get(subdir_name, []))
|
|
failures = len(all_failures.get(subdir_name, []))
|
|
total = successes + failures
|
|
print(f"\n{subdir_name}: {successes}/{total} passed, {failures} failed")
|
|
|
|
if failures > 0:
|
|
print(f" Failed files:")
|
|
# Order the failures by the filename for consistency
|
|
# Each filename ends with _{instance_index}.cpp, so we can sort by instance index
|
|
sorted_failures = sorted(
|
|
all_failures[subdir_name],
|
|
key=lambda x: int(re.search(r'_(\d+)\.cpp$', x[0]).group(1))
|
|
if re.search(r'_(\d+)\.cpp$', x[0]) else 0
|
|
)
|
|
for filename, error in sorted_failures:
|
|
print(f" - {filename}")
|
|
|
|
# Return exit code based on failures
|
|
total_failures = sum(len(f) for f in all_failures.values())
|
|
if total_failures > 0:
|
|
print(f"\n{total_failures} total failures found")
|
|
return 1
|
|
else:
|
|
print("\nAll instances compiled successfully!")
|
|
return 0
|
|
|
|
if __name__ == "__main__":
|
|
sys.exit(main()) |