From 4d55b9edcc38c0ea5d44a885e7c4bbc2f3f9db0e Mon Sep 17 00:00:00 2001 From: "assistant-librarian[bot]" Date: Tue, 26 Aug 2025 21:11:12 +0000 Subject: [PATCH] Merge commit '19d5327c45932d03fe9b1fe37817af9b5ced810d' into develop --- Jenkinsfile | 11 +- .../test_grouped_convnd_fwd_dataset_xdl.cpp | 1 - test_data/generate_model_configs.py | 103 ++++++++++---- test_data/generate_test_dataset.sh | 126 +++++++++++++----- test_data/run_model_with_miopen.py | 10 ++ 5 files changed, 186 insertions(+), 65 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index d590c01ba7..8842ce6814 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -1159,11 +1159,16 @@ pipeline { agent{ label rocmnode("gfx90a")} environment{ setup_args = "NO_CK_BUILD" - execute_args = """ cd test_data && \ - ./generate_test_dataset.sh && \ - cd ../script && \ + execute_args = """ cd ../build && \ ../script/cmake-ck-dev.sh ../ gfx90a && \ make -j64 test_grouped_convnd_fwd_dataset_xdl && \ + cd ../test_data && \ + # Dataset generation modes: + # - small: ~60 test cases (minimal, quick testing - 3 models, 2 batch sizes, 2 image sizes) + # - half: ~300 test cases (moderate coverage - 16 models, 3 batch sizes, 5 image sizes), ~ 17 hours testing time + # - full: ~600 test cases (comprehensive - 16 models, 5 batch sizes, 9 image sizes), ~ 40 hours testing time + ./generate_test_dataset.sh half && \ + cd ../build && \ ./bin/test_grouped_convnd_fwd_dataset_xdl""" } steps{ diff --git a/test/grouped_convnd_fwd/test_grouped_convnd_fwd_dataset_xdl.cpp b/test/grouped_convnd_fwd/test_grouped_convnd_fwd_dataset_xdl.cpp index ded68d9a44..a1ffdaa441 100644 --- a/test/grouped_convnd_fwd/test_grouped_convnd_fwd_dataset_xdl.cpp +++ b/test/grouped_convnd_fwd/test_grouped_convnd_fwd_dataset_xdl.cpp @@ -32,7 +32,6 @@ std::vector load_csv_test_cases(const std::string& f while(std::getline(file, line)) { line_number++; - std::cout << "Line " << line_number << ": " << line << std::endl; // Skip comment lines (starting with #) and empty lines if(line.empty() || line[0] == '#') { diff --git a/test_data/generate_model_configs.py b/test_data/generate_model_configs.py index d799c0fb94..125655cef4 100644 --- a/test_data/generate_model_configs.py +++ b/test_data/generate_model_configs.py @@ -10,8 +10,12 @@ import csv import itertools import argparse -def generate_2d_configs(): - """Generate all 2D model configuration combinations""" +def generate_2d_configs(mode='full'): + """Generate all 2D model configuration combinations + + Args: + mode: 'small' for minimal set (~50 configs), 'half' for reduced set (~250 configs), 'full' for comprehensive set (~500 configs) + """ # Define parameter ranges models_2d = [ @@ -24,15 +28,37 @@ def generate_2d_configs(): 'shufflenet_v2_x1_0' ] - batch_sizes = [1, 4, 8, 16, 32] - - # Input dimensions: (height, width) - input_dims = [ - (64, 64), (128, 128), (224, 224), (256, 256), (512, 512), # Square - (224, 320), (224, 448), (320, 224), (448, 224), # Rectangular - (227, 227), # AlexNet preferred - (299, 299) # Inception preferred - ] + if mode == 'small': + # Minimal set for quick testing + batch_sizes = [1, 8] # Just two batch sizes + # Very limited input dimensions - only 2 key sizes + input_dims = [ + (224, 224), # Standard (most common) + (256, 256), # Medium + ] + # Use only first 3 models for minimal testing + models_2d = models_2d[:3] # Only resnet18, resnet34, resnet50 + elif mode == 'half': + # Reduced set for faster testing + batch_sizes = [1, 8, 32] # Small, medium, large + # Reduced input dimensions - 5 key sizes + input_dims = [ + (64, 64), # Small + (224, 224), # Standard (most common) + (512, 512), # Large + (224, 320), # Rectangular + (227, 227), # AlexNet preferred + ] + else: # full mode + # More comprehensive but still limited + batch_sizes = [1, 4, 8, 16, 32] + # More dimensions but skip some redundant ones + input_dims = [ + (64, 64), (128, 128), (224, 224), (256, 256), (512, 512), # Square + (224, 320), (320, 224), # Rectangular (reduced from 4) + (227, 227), # AlexNet preferred + (299, 299) # Inception preferred + ] precisions = ['fp32'] #, 'fp16', 'bf16'] channels = [3] # Most models expect RGB @@ -68,19 +94,44 @@ def generate_2d_configs(): return configs -def generate_3d_configs(): - """Generate all 3D model configuration combinations""" +def generate_3d_configs(mode='full'): + """Generate all 3D model configuration combinations + + Args: + mode: 'small' for minimal set (~10 configs), 'half' for reduced set (~50 configs), 'full' for comprehensive set (~100 configs) + """ models_3d = ['r3d_18', 'mc3_18', 'r2plus1d_18'] - batch_sizes = [1, 2, 4, 8] # 3D models are more memory intensive - temporal_sizes = [8, 16, 32] - - # 3D input dimensions: (height, width) - input_dims = [ - (112, 112), (224, 224), (256, 256), # Standard sizes - (224, 320), (320, 224) # Rectangular - ] + if mode == 'small': + # Minimal set for quick testing + batch_sizes = [1, 4] # Just two batch sizes + temporal_sizes = [8] # Only smallest temporal size + # Very limited spatial dimensions + input_dims = [ + (112, 112), # Standard for 3D + ] + # Use only first model for minimal testing + models_3d = models_3d[:1] # Only r3d_18 + elif mode == 'half': + # Reduced set for faster testing + batch_sizes = [1, 4, 8] # Skip batch_size=2 + temporal_sizes = [8, 16] # Skip 32 (most expensive) + # Reduced spatial dimensions + input_dims = [ + (112, 112), # Small (common for video) + (224, 224), # Standard + (224, 320) # Rectangular + ] + else: # full mode + # More comprehensive but still reasonable + batch_sizes = [1, 2, 4, 8] # 3D models are more memory intensive + temporal_sizes = [8, 16, 32] + # More dimensions + input_dims = [ + (112, 112), (224, 224), (256, 256), # Standard sizes + (224, 320), (320, 224) # Rectangular + ] precisions = ['fp32'] #, 'fp16'] # Skip bf16 for 3D to reduce combinations channels = [3] @@ -142,19 +193,23 @@ def main(): help='Output file for 2D configurations') parser.add_argument('--output-3d', type=str, default='model_configs_3d.csv', help='Output file for 3D configurations') + parser.add_argument('--mode', choices=['small', 'half', 'full'], default='full', + help='Configuration mode: small (~60 total), half (~300 total) or full (~600 total) (default: half)') parser.add_argument('--limit', type=int, help='Limit number of configurations per type (for testing)') args = parser.parse_args() + print(f"Generating {args.mode} model configurations...") + print("Generating 2D model configurations...") - configs_2d = generate_2d_configs() + configs_2d = generate_2d_configs(mode=args.mode) if args.limit: configs_2d = configs_2d[:args.limit] save_configs_to_csv(configs_2d, args.output_2d, "2D") print("Generating 3D model configurations...") - configs_3d = generate_3d_configs() + configs_3d = generate_3d_configs(mode=args.mode) if args.limit: configs_3d = configs_3d[:args.limit] save_configs_to_csv(configs_3d, args.output_3d, "3D") @@ -164,4 +219,4 @@ def main(): print(" Update generate_test_dataset.sh to read from these CSV files") if __name__ == "__main__": - main() \ No newline at end of file + main() diff --git a/test_data/generate_test_dataset.sh b/test_data/generate_test_dataset.sh index 621ea4f144..3fb8fa027b 100755 --- a/test_data/generate_test_dataset.sh +++ b/test_data/generate_test_dataset.sh @@ -3,26 +3,71 @@ # This script captures MIOpen commands from PyTorch models and generates test cases set -e # Exit on error - -# Check if target files already exist -# if [ -f "conv_test_set_2d_dataset.csv" ] && [ -f "conv_test_set_3d_dataset.csv" ]; then -# echo "Target files already exist:" -# [ -f "conv_test_set_2d_dataset.csv" ] && echo " - conv_test_set_2d_dataset.csv ($(wc -l < conv_test_set_2d_dataset.csv) lines)" -# [ -f "conv_test_set_3d_dataset.csv" ] && echo " - conv_test_set_3d_dataset.csv ($(wc -l < conv_test_set_3d_dataset.csv) lines)" -# echo "" -# echo "To regenerate, please remove these files first:" -# echo " rm conv_test_set_2d_dataset.csv conv_test_set_3d_dataset.csv" -# exit 0 -# fi +set +x # Disable command echo (even if called with bash -x) echo "==========================================" echo "CK Convolution Test Dataset Generator" echo "==========================================" +# Check if PyTorch is installed, if not create a virtual environment +echo "Checking for PyTorch installation..." +if ! python3 -c "import torch" 2>/dev/null; then + echo "PyTorch not found. Creating virtual environment..." + + # Create a virtual environment in the current directory + VENV_DIR="./pytorch_venv" + if [ ! -d "$VENV_DIR" ]; then + python3 -m venv $VENV_DIR || { + echo "ERROR: Failed to create virtual environment." + echo "Creating empty CSV files as fallback..." + echo "# 2D Convolution Test Cases" > conv_test_set_2d_dataset.csv + echo "# Combined from multiple models" >> conv_test_set_2d_dataset.csv + echo "# 3D Convolution Test Cases" > conv_test_set_3d_dataset.csv + echo "# Combined from multiple models" >> conv_test_set_3d_dataset.csv + exit 1 + } + fi + + # Activate virtual environment + source $VENV_DIR/bin/activate + + # Install PyTorch in virtual environment with ROCm support + echo "Installing PyTorch and torchvision with ROCm support in virtual environment..." + # Since we're in a ROCm 6.4.1 environment, we need compatible PyTorch + # PyTorch doesn't have 6.4 wheels yet, so we use 6.2 which should be compatible + echo "Installing PyTorch with ROCm 6.2 support (compatible with ROCm 6.4)..." + pip install torch==2.5.1 torchvision==0.20.1 --index-url https://download.pytorch.org/whl/rocm6.2 || { + echo "ERROR: Failed to install PyTorch with ROCm support." + echo "Creating empty CSV files as fallback..." + echo "# 2D Convolution Test Cases" > conv_test_set_2d_dataset.csv + echo "# Combined from multiple models" >> conv_test_set_2d_dataset.csv + echo "# 3D Convolution Test Cases" > conv_test_set_3d_dataset.csv + echo "# Combined from multiple models" >> conv_test_set_3d_dataset.csv + exit 1 + } + echo "PyTorch installed successfully in virtual environment!" + + # Use the virtual environment's Python for the rest of the script + export PYTHON_CMD="$VENV_DIR/bin/python3" +else + echo "PyTorch is already installed." + export PYTHON_CMD="python3" +fi + +# Verify PyTorch installation and GPU support +$PYTHON_CMD -c "import torch; print(f'PyTorch version: {torch.__version__}')" +$PYTHON_CMD -c "import torch; print(f'CUDA/ROCm available: {torch.cuda.is_available()}')" +if ! $PYTHON_CMD -c "import torch; import sys; sys.exit(0 if torch.cuda.is_available() else 1)"; then + echo "WARNING: PyTorch installed but GPU support not available!" + echo "MIOpen commands will not be generated without GPU support." + echo "Continuing anyway to generate placeholder data..." +fi + # Configuration OUTPUT_DIR="generated_datasets" TIMESTAMP=$(date +"%Y%m%d_%H%M%S") -MAX_ITERATIONS=0 # Maximum number of iterations per model type (set to 0 for unlimited) +# Get configuration mode from command line argument (default: full) +CONFIG_MODE="${1:-full}" # Configuration mode: 'small', 'half' or 'full' # Colors RED='\033[0;31m' @@ -42,8 +87,9 @@ echo "Step 1: Generating model configurations" echo "-----------------------------------------" # Generate model configuration files (with limit for testing) -echo "Generating model configuration files..." -python3 generate_model_configs.py \ +echo "Generating model configuration files (mode: $CONFIG_MODE)..." +$PYTHON_CMD generate_model_configs.py \ + --mode $CONFIG_MODE \ --output-2d $OUTPUT_DIR/model_configs_2d.csv \ --output-3d $OUTPUT_DIR/model_configs_3d.csv @@ -55,10 +101,26 @@ fi # Check if running on GPU if ! command -v rocm-smi &> /dev/null; then - echo "WARNING: ROCm not detected. Models will run on CPU (no MIOpen commands)." - echo "For actual MIOpen commands, run this on a system with AMD GPU." + echo "ERROR: ROCm not detected. Cannot generate MIOpen commands without GPU." + echo "This script requires an AMD GPU with ROCm installed." + echo "Creating empty CSV files as placeholder..." + echo "# 2D Convolution Test Cases (No GPU available)" > conv_test_set_2d_dataset.csv + echo "# 3D Convolution Test Cases (No GPU available)" > conv_test_set_3d_dataset.csv + exit 1 fi +# Check if GPU is actually accessible +if ! rocm-smi &> /dev/null; then + echo "ERROR: rocm-smi failed. GPU may not be accessible." + echo "Creating empty CSV files as placeholder..." + echo "# 2D Convolution Test Cases (GPU not accessible)" > conv_test_set_2d_dataset.csv + echo "# 3D Convolution Test Cases (GPU not accessible)" > conv_test_set_3d_dataset.csv + exit 1 +fi + +echo "GPU detected. ROCm version:" +rocm-smi --showdriverversion || true + echo "" echo "Step 2: Running 2D/3D models and capturing MIOpen commands" @@ -85,22 +147,17 @@ while IFS=',' read -r config_name model batch_size channels height width precisi # Increment counter CURRENT_CONFIG=$((CURRENT_CONFIG + 1)) - # Stop after MAX_ITERATIONS if set - if [ $MAX_ITERATIONS -gt 0 ] && [ $CURRENT_CONFIG -gt $MAX_ITERATIONS ]; then - echo -e "${RED}Stopping after $MAX_ITERATIONS iterations (testing mode)${NC}" - break - fi # Build configuration command CONFIG="--model $model --batch-size $batch_size --channels $channels --height $height --width $width --precision $precision" CONFIG_NAME="$config_name" - echo -e "${GREEN}[${CURRENT_CONFIG}/${TOTAL_CONFIGS}]${NC} ${PURPLE}Running MIOpenDriver${NC} ${CYAN}2D${NC} ${YELLOW}$CONFIG_NAME${NC}: ${BLUE}$CONFIG${NC}" + echo -e "${GREEN}[${CURRENT_CONFIG}/${TOTAL_CONFIGS}]${NC} ${CYAN}2D${NC} ${YELLOW}$CONFIG_NAME${NC}" - # Actual run with logging - MIOPEN_ENABLE_LOGGING_CMD=1 python3 run_model_with_miopen.py \ + # Actual run with logging (suppress stdout, only capture stderr with MIOpen commands) + MIOPEN_ENABLE_LOGGING_CMD=1 $PYTHON_CMD run_model_with_miopen.py \ --model $model --batch-size $batch_size --channels $channels --height $height --width $width --precision $precision \ - 2>> $OUTPUT_DIR/${model}_miopen_log_2d.txt || true + > /dev/null 2>> $OUTPUT_DIR/${model}_miopen_log_2d.txt || true done < $OUTPUT_DIR/model_configs_2d.csv @@ -125,23 +182,18 @@ while IFS=',' read -r config_name model batch_size channels temporal_size height # Increment counter CURRENT_3D_CONFIG=$((CURRENT_3D_CONFIG + 1)) - # Stop after MAX_ITERATIONS if set - if [ $MAX_ITERATIONS -gt 0 ] && [ $CURRENT_3D_CONFIG -gt $MAX_ITERATIONS ]; then - echo -e "${RED}Stopping after $MAX_ITERATIONS iterations (testing mode)${NC}" - break - fi # Build configuration command for 3D models CONFIG="--model $model --batch-size $batch_size --channels $channels --temporal-size $temporal_size --height $height --width $width --precision $precision" CONFIG_NAME="$config_name" - echo -e "${GREEN}[${CURRENT_3D_CONFIG}/${TOTAL_3D_CONFIGS}]${NC} ${PURPLE}Running MIOpenDriver${NC} ${CYAN}3D${NC} ${YELLOW}$CONFIG_NAME${NC}: ${BLUE}$CONFIG${NC}" + echo -e "${GREEN}[${CURRENT_3D_CONFIG}/${TOTAL_3D_CONFIGS}]${NC} ${CYAN}3D${NC} ${YELLOW}$CONFIG_NAME${NC}" - # Actual run with logging - MIOPEN_ENABLE_LOGGING_CMD=1 python3 run_model_with_miopen.py \ + # Actual run with logging (suppress stdout, only capture stderr with MIOpen commands) + MIOPEN_ENABLE_LOGGING_CMD=1 $PYTHON_CMD run_model_with_miopen.py \ --model $model --batch-size $batch_size --channels $channels --temporal-size $temporal_size --height $height --width $width --precision $precision \ - 2>> $OUTPUT_DIR/${model}_miopen_log_3d.txt || true + > /dev/null 2>> $OUTPUT_DIR/${model}_miopen_log_3d.txt || true done < $OUTPUT_DIR/model_configs_3d.csv @@ -159,7 +211,7 @@ for log_file in $OUTPUT_DIR/*_miopen_log_2d.txt; do output_csv="$OUTPUT_DIR/${base_name}_cases_2d.csv" echo " Converting $log_file -> $output_csv" - python3 miopen_to_csv.py \ + $PYTHON_CMD miopen_to_csv.py \ --input "$log_file" \ --output-2d "$output_csv" \ --model-name "$base_name" \ @@ -176,7 +228,7 @@ for log_file in $OUTPUT_DIR/*_miopen_log_3d.txt; do output_csv="$OUTPUT_DIR/${base_name}_cases_3d.csv" echo " Converting $log_file -> $output_csv" - python3 miopen_to_csv.py \ + $PYTHON_CMD miopen_to_csv.py \ --input "$log_file" \ --output-3d "$output_csv" \ --model-name "$base_name" \ @@ -259,4 +311,4 @@ echo "" echo "To use these datasets:" echo " 1. Build the test: cd ../script && make -j64 test_grouped_convnd_fwd_dataset_xdl" echo " 2. Run the test: ./bin/test_grouped_convnd_fwd_dataset_xdl" -echo "" \ No newline at end of file +echo "" diff --git a/test_data/run_model_with_miopen.py b/test_data/run_model_with_miopen.py index 83d08c82b7..3d96e19f2f 100644 --- a/test_data/run_model_with_miopen.py +++ b/test_data/run_model_with_miopen.py @@ -87,6 +87,16 @@ def main(): else: device = torch.device(args.device) + # Check if actually running on GPU + if device.type == 'cpu': + import sys + print(f"WARNING: Running on CPU, MIOpen commands will not be generated!", file=sys.stderr) + print(f"CUDA/ROCm available: {torch.cuda.is_available()}", file=sys.stderr) + if torch.cuda.is_available(): + print(f"GPU device count: {torch.cuda.device_count()}", file=sys.stderr) + print(f"GPU name: {torch.cuda.get_device_name(0) if torch.cuda.device_count() > 0 else 'N/A'}", file=sys.stderr) + # Continue anyway for testing purposes + if not args.quiet: print(f"Using device: {device}")