feat: add failure type categorization to Playwright PR comments

Add categorization of test failures by type (screenshot assertions,
expectation failures, timeouts, and other) to help developers quickly
understand what types of issues are occurring.

Changes:
- Add categorizeFailureType() function to detect failure types from error messages
- Track failure type counts in TestCounts interface
- Display "Failure Breakdown" section in PR comments when tests fail
- Show counts for: screenshot, expectation, timeout, and other failures

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
snomiao
2025-12-22 19:49:27 +00:00
parent 8c0c819471
commit 3af88a8b93
2 changed files with 109 additions and 3 deletions

View File

@@ -52,6 +52,14 @@ interface FailingTest {
line: number line: number
error: string error: string
tracePath?: string tracePath?: string
failureType?: 'screenshot' | 'expectation' | 'timeout' | 'other'
}
interface FailureTypeCounts {
screenshot: number
expectation: number
timeout: number
other: number
} }
interface TestCounts { interface TestCounts {
@@ -61,6 +69,48 @@ interface TestCounts {
skipped: number skipped: number
total: number total: number
failingTests?: FailingTest[] failingTests?: FailingTest[]
failureTypes?: FailureTypeCounts
}
/**
* Categorize the failure type based on error message
*/
function categorizeFailureType(
error: string,
status: string
): 'screenshot' | 'expectation' | 'timeout' | 'other' {
if (status === 'timedOut') {
return 'timeout'
}
const errorLower = error.toLowerCase()
// Screenshot-related errors
if (
errorLower.includes('screenshot') ||
errorLower.includes('snapshot') ||
errorLower.includes('toHaveScreenshot') ||
errorLower.includes('image comparison') ||
errorLower.includes('pixel') ||
errorLower.includes('visual')
) {
return 'screenshot'
}
// Expectation errors
if (
errorLower.includes('expect') ||
errorLower.includes('assertion') ||
errorLower.includes('toEqual') ||
errorLower.includes('toBe') ||
errorLower.includes('toContain') ||
errorLower.includes('toHave') ||
errorLower.includes('toMatch')
) {
return 'expectation'
}
return 'other'
} }
/** /**
@@ -94,12 +144,15 @@ function extractFailingTests(
} }
} }
const failureType = categorizeFailureType(error, result.status)
failingTests.push({ failingTests.push({
name: test.title, name: test.title,
filePath: test.location?.file || 'unknown', filePath: test.location?.file || 'unknown',
line: test.location?.line || 0, line: test.location?.line || 0,
error: error.split('\n')[0], // First line of error error: error.split('\n')[0], // First line of error
tracePath tracePath,
failureType
}) })
} }
} }
@@ -126,7 +179,13 @@ function extractTestCounts(reportDir: string): TestCounts {
flaky: 0, flaky: 0,
skipped: 0, skipped: 0,
total: 0, total: 0,
failingTests: [] failingTests: [],
failureTypes: {
screenshot: 0,
expectation: 0,
timeout: 0,
other: 0
}
} }
try { try {
@@ -155,6 +214,14 @@ function extractTestCounts(reportDir: string): TestCounts {
} }
} }
// Count failure types
if (counts.failingTests) {
for (const test of counts.failingTests) {
const type = test.failureType || 'other'
counts.failureTypes![type]++
}
}
return counts return counts
} }
} }
@@ -195,6 +262,14 @@ function extractTestCounts(reportDir: string): TestCounts {
} }
} }
// Count failure types
if (counts.failingTests) {
for (const test of counts.failingTests) {
const type = test.failureType || 'other'
counts.failureTypes![type]++
}
}
return counts return counts
} }
} catch (e) { } catch (e) {
@@ -230,6 +305,14 @@ function extractTestCounts(reportDir: string): TestCounts {
} }
} }
// Count failure types
if (counts.failingTests) {
for (const test of counts.failingTests) {
const type = test.failureType || 'other'
counts.failureTypes![type]++
}
}
return counts return counts
} }
} catch (e) { } catch (e) {

View File

@@ -252,6 +252,10 @@ else
total_flaky=0 total_flaky=0
total_skipped=0 total_skipped=0
total_tests=0 total_tests=0
total_screenshot_failures=0
total_expectation_failures=0
total_timeout_failures=0
total_other_failures=0
# Parse counts and calculate totals # Parse counts and calculate totals
IFS='|' read -r -a counts_array <<< "$all_counts" IFS='|' read -r -a counts_array <<< "$all_counts"
@@ -265,6 +269,10 @@ else
flaky=$(echo "$counts_json" | jq -r '.flaky // 0') flaky=$(echo "$counts_json" | jq -r '.flaky // 0')
skipped=$(echo "$counts_json" | jq -r '.skipped // 0') skipped=$(echo "$counts_json" | jq -r '.skipped // 0')
total=$(echo "$counts_json" | jq -r '.total // 0') total=$(echo "$counts_json" | jq -r '.total // 0')
screenshot=$(echo "$counts_json" | jq -r '.failureTypes.screenshot // 0')
expectation=$(echo "$counts_json" | jq -r '.failureTypes.expectation // 0')
timeout=$(echo "$counts_json" | jq -r '.failureTypes.timeout // 0')
other=$(echo "$counts_json" | jq -r '.failureTypes.other // 0')
else else
# Fallback parsing without jq # Fallback parsing without jq
passed=$(echo "$counts_json" | sed -n 's/.*"passed":\([0-9]*\).*/\1/p') passed=$(echo "$counts_json" | sed -n 's/.*"passed":\([0-9]*\).*/\1/p')
@@ -272,13 +280,21 @@ else
flaky=$(echo "$counts_json" | sed -n 's/.*"flaky":\([0-9]*\).*/\1/p') flaky=$(echo "$counts_json" | sed -n 's/.*"flaky":\([0-9]*\).*/\1/p')
skipped=$(echo "$counts_json" | sed -n 's/.*"skipped":\([0-9]*\).*/\1/p') skipped=$(echo "$counts_json" | sed -n 's/.*"skipped":\([0-9]*\).*/\1/p')
total=$(echo "$counts_json" | sed -n 's/.*"total":\([0-9]*\).*/\1/p') total=$(echo "$counts_json" | sed -n 's/.*"total":\([0-9]*\).*/\1/p')
screenshot=0
expectation=0
timeout=0
other=0
fi fi
total_passed=$((total_passed + ${passed:-0})) total_passed=$((total_passed + ${passed:-0}))
total_failed=$((total_failed + ${failed:-0})) total_failed=$((total_failed + ${failed:-0}))
total_flaky=$((total_flaky + ${flaky:-0})) total_flaky=$((total_flaky + ${flaky:-0}))
total_skipped=$((total_skipped + ${skipped:-0})) total_skipped=$((total_skipped + ${skipped:-0}))
total_tests=$((total_tests + ${total:-0})) total_tests=$((total_tests + ${total:-0}))
total_screenshot_failures=$((total_screenshot_failures + ${screenshot:-0}))
total_expectation_failures=$((total_expectation_failures + ${expectation:-0}))
total_timeout_failures=$((total_timeout_failures + ${timeout:-0}))
total_other_failures=$((total_other_failures + ${other:-0}))
fi fi
done done
unset IFS unset IFS
@@ -309,6 +325,13 @@ $status_icon **$status_text** • ⏰ $(date -u '+%m/%d/%Y, %I:%M:%S %p') UTC"
comment="$comment comment="$comment
**$total_passed** ✅ • **$total_failed** $([ $total_failed -gt 0 ] && echo '❌' || echo '✅') • **$total_flaky** $([ $total_flaky -gt 0 ] && echo '⚠️' || echo '✅') • **$total_skipped** ⏭️ • **$total_tests** total" **$total_passed** ✅ • **$total_failed** $([ $total_failed -gt 0 ] && echo '❌' || echo '✅') • **$total_flaky** $([ $total_flaky -gt 0 ] && echo '⚠️' || echo '✅') • **$total_skipped** ⏭️ • **$total_tests** total"
# Add failure breakdown if there are failures
if [ $total_failed -gt 0 ]; then
comment="$comment
**Failure Breakdown:** 📸 $total_screenshot_failures screenshot • ✓ $total_expectation_failures expectation • ⏱️ $total_timeout_failures timeout • ❓ $total_other_failures other"
fi
fi fi
# Collect all failing tests across browsers # Collect all failing tests across browsers