Files
composable_kernel/script/gemm_profile.sh
Aviral Goel ab68c9d384 chore(copyright): update copyright header for script directory (#3184)
* chore(copyright): update copyright header for tile_engine directory

* chore(copyright): update copyright header for script directory

---------

Co-authored-by: Vidyasagar Ananthan <vanantha@amd.com>
2025-11-11 11:26:01 -08:00

115 lines
3.2 KiB
Bash
Executable File

#!/bin/bash
# Copyright (c) Advanced Micro Devices, Inc., or its affiliates.
# SPDX-License-Identifier: MIT
BIN=./bin/tile_example_gemm_weight_preshuffle
PREC=fp8
VERBOSITY=2
# List of all (m, n, k) triplets
ARGS_LIST=(
"1 2048 5120"
"1 5120 1024"
"2 2048 5120"
"2 5120 1024"
"3 2048 5120"
"3 5120 1024"
"4 2048 5120"
"4 5120 1024"
"5 2048 5120"
"5 5120 1024"
"6 2048 5120"
"6 5120 1024"
"7 2048 5120"
"7 5120 1024"
"8 2048 5120"
"8 5120 1024"
"9 2048 5120"
"9 5120 1024"
"10 2048 5120"
"10 5120 1024"
"11 2048 5120"
"11 5120 1024"
"12 2048 5120"
"12 5120 1024"
"13 2048 5120"
"13 5120 1024"
"14 2048 5120"
"14 5120 1024"
"15 2048 5120"
"15 5120 1024"
"16 64 128"
"16 64 256"
"16 2048 5120"
"16 5120 1024"
"512 768 640"
"1024 1792 896"
"1536 2816 1152"
"2048 5120 1024"
"2048 5120 8192"
"2048 7168 8192"
"2048 8192 3584"
"16384 7168 8192"
"16384 8192 3584"
)
# Output file
OUTPUT_FILE="gemm_profile_results.csv"
# Output header
echo "m,n,k,Pipeline,Time_ms,TFlops,GBps,Verification" > "$OUTPUT_FILE"
# Loop over each argument set
for args in "${ARGS_LIST[@]}"; do
read -r m n k <<< "$args"
echo "Testing: m=$m, n=$n, k=$k"
OUTPUT=$($BIN -m=$m -n=$n -k=$k -prec=$PREC -v=$VERBOSITY 2>/dev/null)
# Extract pipeline information
# Format: "Launching kernel with args: gemm_fp8_pipeline_AGmemBGmemCRegV2_128x256x256x256_16x16x128_16x16_0x0x0"
PIPELINE=$(echo "$OUTPUT" | grep "Launching kernel with args:" | sed -n 's/.*Launching kernel with args: \(.*\)/\1/p')
# Extract TFlops and GB/s from the output
# Format: "Run Gemm kernel with M=3840 N=4096 K=2048 ... : 0.042338 ms, 1521.67 TFlops, 1126.89 GB/s,"
PERF_LINE=$(echo "$OUTPUT" | grep "TFlops")
# Extract verification result
# Format: "The GPU verification result is:correct" (note: no space after colon)
VERIFICATION=$(echo "$OUTPUT" | grep "The GPU verification result is:" | sed -n 's/.*The GPU verification result is:\(.*\)/\1/p')
if [ -n "$PERF_LINE" ]; then
# Extract execution time in ms
TIME_MS=$(echo "$PERF_LINE" | grep -o '[0-9]\+\.[0-9]\+ ms' | grep -o '[0-9]\+\.[0-9]\+')
# Extract TFlops value - more robust regex
TFLOPS=$(echo "$PERF_LINE" | grep -o '[0-9]\+\.[0-9]\+ TFlops' | grep -o '[0-9]\+\.[0-9]\+')
# Extract GB/s value - more robust regex
GBPS=$(echo "$PERF_LINE" | grep -o '[0-9]\+\.[0-9]\+ GB/s' | grep -o '[0-9]\+\.[0-9]\+')
# Use extracted pipeline or default if not found
if [ -z "$PIPELINE" ]; then
PIPELINE="gemm_basic"
fi
# Print to terminal
echo " Pipeline: $PIPELINE"
echo " Time: ${TIME_MS} ms"
echo " TFlops: ${TFLOPS}"
echo " GB/s: ${GBPS}"
echo " Verification: ${VERIFICATION:-N/A}"
# Save to CSV file
echo "$m,$n,$k,$PIPELINE,$TIME_MS,$TFLOPS,$GBPS,$VERIFICATION" >> "$OUTPUT_FILE"
else
echo " ERROR: Could not parse performance data"
echo ""
echo "$m,$n,$k,$PIPELINE,,,,$VERIFICATION" >> "$OUTPUT_FILE"
fi
done
echo "=========================================="
echo "Profile completed!"
echo "Results saved to: $OUTPUT_FILE"
echo "Total tests run: ${#ARGS_LIST[@]}"
echo "=========================================="