Files
composable_kernel/experimental/grouped_convolution_tile_instances/test-instances.py
2026-03-11 23:03:20 -04:00

303 lines
12 KiB
Python
Executable File

#!/usr/bin/env python3
# filepath: /home/AMD/vpietila/git/rocm-libraries/projects/composablekernel/experimental/grouped_convolution_tile_instances/check_instances.py
"""
Script to check which backward weight convolution instances compile successfully.
Compiles each .cpp file independently and reports failures grouped by layout/datatype.
"""
import subprocess
import sys
from pathlib import Path
from collections import defaultdict
import argparse
import tempfile
from concurrent.futures import ThreadPoolExecutor, as_completed
import re
# Configuration
CXX_COMPILER = "/opt/rocm/bin/hipcc"
CXX_STANDARD = "20"
def get_include_dirs(project_root: Path) -> list[str]:
"""Get the include directories needed for compilation."""
return [
str(project_root / "build" / "include"),
str(project_root / "include"),
str(project_root / "library" / "include"),
str(project_root / "experimental" / "builder" / "include"),
str(project_root / "experimental" / "builder" / "test" / "utils"),
str(project_root / "experimental" / "grouped_convolution_tile_instances" / "instances"),
]
def compile_single_file(cpp_file: Path, project_root: Path, gpu_target: str, verbose: bool) -> tuple[bool, str]:
"""
Attempt to compile a single .cpp file.
Returns (success, error_message).
"""
include_dirs = get_include_dirs(project_root)
include_flags = [f"-I{d}" for d in include_dirs]
# Create a temporary directory for output
with tempfile.TemporaryDirectory() as tmpdir:
output_file = Path(tmpdir) / "output.o"
cmd = [
CXX_COMPILER,
"-c", # Compile only, don't link
f"-std=c++{CXX_STANDARD}",
f"--offload-arch={gpu_target}",
"-D__HIP_PLATFORM_AMD__",
"-D CK_EXPERIMENTAL_BUILDER=ON",
"-O3",
*include_flags,
str(cpp_file),
"-o", str(output_file)
]
try:
result = subprocess.run(
cmd,
capture_output=True,
text=True,
timeout=300 # 5 minute timeout per file
)
if result.returncode == 0:
return True, ""
else:
# Extract the key error message
error_output = result.stderr
return False, error_output
except subprocess.TimeoutExpired:
return False, "TIMEOUT: Compilation took too long"
except Exception as e:
return False, f"EXCEPTION: {str(e)}"
def extract_key_error(error_output: str) -> str:
"""Extract the most relevant error message from compiler output."""
lines = error_output.split('\n')
for line in lines:
if 'error:' in line:
return line.strip()
# Return first non-empty line if no explicit error found
for line in lines:
if line.strip():
return line.strip()[:200] # Limit length
return "Unknown error"
def find_instance_files(instances_dir: Path, direction: str = "backward_weight") -> dict[str, list[Path]]:
"""
Find all instance .cpp files grouped by subdirectory (layout/datatype).
Returns dict: subdirectory_name -> list of cpp files
"""
target_dir = instances_dir / direction
if not target_dir.exists():
print(f"Error: Directory {target_dir} does not exist")
sys.exit(1)
files_by_subdir = defaultdict(list)
for subdir in sorted(target_dir.iterdir()):
if subdir.is_dir():
cpp_files = sorted(subdir.glob("*.cpp"))
if cpp_files:
files_by_subdir[subdir.name] = cpp_files
return files_by_subdir
def parse_args():
parser = argparse.ArgumentParser(description="Check which convolution instances compile")
parser.add_argument("--direction", default="backward_weight",
choices=["forward", "backward_weight", "backward_data"],
help="Convolution direction to check")
parser.add_argument("--subdir", default=None,
help="Only check specific subdirectory (e.g., 'nhwgc_bf16')")
parser.add_argument("--max-files", type=int, default=None,
help="Maximum number of files to check per subdirectory")
parser.add_argument("--verbose", "-v", action="store_true",
help="Show verbose output including compile commands")
parser.add_argument("--output", "-o", default=None,
help="Output file for the blacklist")
parser.add_argument("--project-root", default=None,
help="Project root directory (auto-detected if not specified)")
parser.add_argument("--instance", type=int, default=None,
help="Only check a single instance by its index in the config file.")
parser.add_argument(
"--parallel-jobs",
"-j",
type=int,
default=1,
help="Number of parallel compilation jobs (default: 1)",
)
parser.add_argument("--gpu-target", type=str, default="gfx950", help="GPU target architecture (default: gfx950)")
args = parser.parse_args()
return args
def main():
args = parse_args()
# Find project root
if args.project_root:
project_root = Path(args.project_root)
else:
# Assume script is in experimental/grouped_convolution_tile_instances/
script_dir = Path(__file__).resolve().parent
project_root = script_dir.parent.parent # Go up to composablekernel/
instances_dir = project_root / "experimental" / "grouped_convolution_tile_instances" / "instances"
print(f"Project root: {project_root}")
print(f"Instances directory: {instances_dir}")
print(f"Compiler: {CXX_COMPILER}")
print(f"GPU Target: {args.gpu_target}")
print(f"Direction: {args.direction}")
if args.instance is not None:
print(f"Checking only instance index: {args.instance}")
print()
# Find all instance files
files_by_subdir = find_instance_files(instances_dir, args.direction)
# If sub directory is defined, check only that sub directory
if args.subdir:
if args.subdir not in files_by_subdir:
print(f"Error: Subdirectory '{args.subdir}' not found")
print(f"Available: {list(files_by_subdir.keys())}")
sys.exit(1)
files_by_subdir = {args.subdir: files_by_subdir[args.subdir]}
if args.instance is not None:
# If instance index is specified, find the corresponding file for each subdir
instance_files = {}
for subdir, files in files_by_subdir.items():
if args.instance >= 0:
target_suffix = f"_{args.instance}.cpp"
matched_files = [f for f in files if f.name.endswith(target_suffix)]
if matched_files:
assert len(matched_files) == 1, f"Expected exactly one file ending with {target_suffix} in {subdir}, found {len(matched_files)}"
instance_files[subdir] = matched_files
else:
if args.subdir is None:
print(f"Warning: Subdirectory '{subdir}' does not have instance index {args.instance}")
files_by_subdir = instance_files
if args.subdir:
if args.instance is not None and args.subdir not in files_by_subdir:
print(f"Instance index {args.instance} was not found in subdirectory '{args.subdir}'")
sys.exit(1)
elif args.subdir not in files_by_subdir:
print(f"Error: Subdirectory '{args.subdir}' not found")
print(f"Available: {list(files_by_subdir.keys())}")
sys.exit(1)
files_by_subdir = {args.subdir: files_by_subdir[args.subdir]}
# Track results
all_failures = defaultdict(list) # subdir -> list of (filename, error)
all_successes = defaultdict(list) # subdir -> list of filenames
error_types = defaultdict(set) # error_key -> set of files
total_files = sum(len(files) for files in files_by_subdir.values())
if args.max_files:
total_files = min(total_files, args.max_files * len(files_by_subdir))
print(f"Found {total_files} instance files to check")
print("=" * 60)
checked = 0
for subdir_name, cpp_files in sorted(files_by_subdir.items()):
print(f"\nChecking {subdir_name}...", flush=True)
files_to_check = cpp_files
if args.max_files:
files_to_check = cpp_files[:args.max_files]
if args.parallel_jobs > 1:
# Parallel compilation
with ThreadPoolExecutor(max_workers=args.parallel_jobs) as executor:
# Submit all compilation jobs
futures = {
executor.submit(compile_single_file, cpp_file, project_root, args.gpu_target, args.verbose): cpp_file
for cpp_file in files_to_check
}
# Process results as they complete
for future in as_completed(futures):
cpp_file = futures[future]
filename = cpp_file.name
checked += 1
success, error = future.result()
if success:
print(f" [{checked}/{total_files}] {filename}... OK", flush=True)
all_successes[subdir_name].append(filename)
else:
key_error = extract_key_error(error)
print(f" [{checked}/{total_files}] {filename}... FAILED", flush=True)
if args.verbose:
print(f" Error: {key_error}")
all_failures[subdir_name].append((filename, key_error))
error_types[key_error].add(f"{subdir_name}/{filename}")
else:
# Sequential compilation
print(f"Compiling {len(files_to_check)} files sequentially...")
for cpp_file in files_to_check:
checked += 1
filename = cpp_file.name
if args.verbose:
print(f" [{checked}/{total_files}] {filename}...", end=" ", flush=True)
else:
print(f" [{checked}/{total_files}] {filename}...", end=" ", flush=True)
success, error = compile_single_file(cpp_file, project_root, args.gpu_target, args.verbose)
if success:
print("OK")
all_successes[subdir_name].append(filename)
else:
key_error = extract_key_error(error)
print(f"FAILED")
if args.verbose:
print(f" Error: {key_error}")
all_failures[subdir_name].append((filename, key_error))
error_types[key_error].add(f"{subdir_name}/{filename}")
# Print summary
print("\n" + "=" * 60)
print("SUMMARY")
print("=" * 60)
for subdir_name in sorted(files_by_subdir.keys()):
successes = len(all_successes.get(subdir_name, []))
failures = len(all_failures.get(subdir_name, []))
total = successes + failures
print(f"\n{subdir_name}: {successes}/{total} passed, {failures} failed")
if failures > 0:
print(f" Failed files:")
# Order the failures by the filename for consistency
# Each filename ends with _{instance_index}.cpp, so we can sort by instance index
sorted_failures = sorted(
all_failures[subdir_name],
key=lambda x: int(re.search(r'_(\d+)\.cpp$', x[0]).group(1))
if re.search(r'_(\d+)\.cpp$', x[0]) else 0
)
for filename, error in sorted_failures:
print(f" - {filename}")
# Return exit code based on failures
total_failures = sum(len(f) for f in all_failures.values())
if total_failures > 0:
print(f"\n{total_failures} total failures found")
return 1
else:
print("\nAll instances compiled successfully!")
return 0
if __name__ == "__main__":
sys.exit(main())