From 2d5d18c020201f3b65b6720b1493c90e179e1b81 Mon Sep 17 00:00:00 2001
From: Csongor Czezar <44126075+csongorczezar@users.noreply.github.com>
Date: Sat, 10 Jan 2026 22:09:18 -0800
Subject: [PATCH] feat: improved playwright comment format (#7882)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

### Description
Improve Playwright PR comment format

### Problem
The current Playwright PR comment format is verbose and doesn't provide
easy access to failing test details.
Developers need to navigate multiple levels deep to:
Find which tests failed
Access test source code
View trace files for debugging
This makes debugging test failures tedious and time-consuming.

### Solution
Improved the Playwright PR comment format to be concise and actionable
by:
Modified extract-playwright-counts.ts to extract detailed failure
information from Playwright JSON reports including test names, file
paths, and trace URLs
Updated pr-playwright-deploy-and-comment.sh to generate concise comments
with failed tests listed upfront
Modified ci-tests-e2e.yaml to pass GITHUB_SHA for source code links
Modified ci-tests-e2e-forks.yaml to pass GITHUB_SHA for forked PR
workflow

**Before:**
Large multi-section layout with emoji-heavy headers
Summary section listing all counts vertically
Browser results displayed prominently with detailed counts
Failed test details only accessible through report links
No direct links to test source code or traces

**After:**
Concise single-line header with status
Single-line summary: "X passed, Y failed, Z flaky, W skipped (Total: N)"
Failed tests section (only shown when tests fail) with:
Direct links to test source code on GitHub
Direct links to trace viewer for each failure
Browser details collapsed in details section
Overall roughly half size reduction in visible text

### Testing
Verified TypeScript extraction logic for parsing Playwright JSON reports
Validated shell script syntax
Confirmed GitHub workflow changes are properly formatted
Will be fully tested on next PR with actual test failures

┆Issue is synchronized with this [Notion
page](https://www.notion.so/PR-7882-feat-improved-playwright-comment-format-2e16d73d365081609078e34773063511)
by [Unito](https://www.unito.io)
---
 .github/workflows/ci-tests-e2e-forks.yaml     |   5 +-
 .github/workflows/ci-tests-e2e.yaml           |   3 +-
 scripts/cicd/extract-playwright-counts.ts     | 146 +++++++++++++++++-
 .../cicd/pr-playwright-deploy-and-comment.sh  | 100 +++++++-----
 4 files changed, 208 insertions(+), 46 deletions(-)

diff --git a/.github/workflows/ci-tests-e2e-forks.yaml b/.github/workflows/ci-tests-e2e-forks.yaml
index c1828b7fb..8f039f1c4 100644
--- a/.github/workflows/ci-tests-e2e-forks.yaml
+++ b/.github/workflows/ci-tests-e2e-forks.yaml
@@ -1,9 +1,9 @@
 # Description: Deploys test results from forked PRs (forks can't access deployment secrets)
-name: "CI: Tests E2E (Deploy for Forks)"
+name: 'CI: Tests E2E (Deploy for Forks)'
 
 on:
   workflow_run:
-    workflows: ["CI: Tests E2E"]
+    workflows: ['CI: Tests E2E']
     types: [requested, completed]
 
 env:
@@ -81,6 +81,7 @@ jobs:
           CLOUDFLARE_API_TOKEN: ${{ secrets.CLOUDFLARE_API_TOKEN }}
           CLOUDFLARE_ACCOUNT_ID: ${{ secrets.CLOUDFLARE_ACCOUNT_ID }}
           GITHUB_TOKEN: ${{ github.token }}
+          GITHUB_SHA: ${{ github.event.workflow_run.head_sha }}
         run: |
           # Rename merged report if exists
           [ -d "reports/playwright-report-chromium-merged" ] && \
diff --git a/.github/workflows/ci-tests-e2e.yaml b/.github/workflows/ci-tests-e2e.yaml
index 91ae9a481..90bf4112c 100644
--- a/.github/workflows/ci-tests-e2e.yaml
+++ b/.github/workflows/ci-tests-e2e.yaml
@@ -1,5 +1,5 @@
 # Description: End-to-end testing with Playwright across multiple browsers, deploys test reports to Cloudflare Pages
-name: "CI: Tests E2E"
+name: 'CI: Tests E2E'
 
 on:
   push:
@@ -222,6 +222,7 @@ jobs:
           CLOUDFLARE_API_TOKEN: ${{ secrets.CLOUDFLARE_API_TOKEN }}
           CLOUDFLARE_ACCOUNT_ID: ${{ secrets.CLOUDFLARE_ACCOUNT_ID }}
           GITHUB_TOKEN: ${{ github.token }}
+          GITHUB_SHA: ${{ github.event.pull_request.head.sha }}
         run: |
           bash ./scripts/cicd/pr-playwright-deploy-and-comment.sh \
             "${{ github.event.pull_request.number }}" \
diff --git a/scripts/cicd/extract-playwright-counts.ts b/scripts/cicd/extract-playwright-counts.ts
index ff6f44db3..1a7922816 100755
--- a/scripts/cicd/extract-playwright-counts.ts
+++ b/scripts/cicd/extract-playwright-counts.ts
@@ -10,37 +10,158 @@ interface TestStats {
   finished?: number
 }
 
+interface TestResult {
+  status: string
+  duration?: number
+  error?: {
+    message?: string
+    stack?: string
+  }
+  attachments?: Array<{
+    name: string
+    path?: string
+    contentType: string
+  }>
+}
+
+interface TestCase {
+  title: string
+  ok: boolean
+  outcome: string
+  results: TestResult[]
+}
+
+interface Suite {
+  title: string
+  file: string
+  suites?: Suite[]
+  tests?: TestCase[]
+}
+
+interface FullReportData {
+  stats?: TestStats
+  suites?: Suite[]
+}
+
 interface ReportData {
   stats?: TestStats
 }
 
+interface FailedTest {
+  name: string
+  file: string
+  traceUrl?: string
+  error?: string
+}
+
 interface TestCounts {
   passed: number
   failed: number
   flaky: number
   skipped: number
   total: number
+  failures?: FailedTest[]
+}
+
+/**
+ * Extract failed test details from Playwright report
+ */
+function extractFailedTests(
+  reportData: FullReportData,
+  baseUrl?: string
+): FailedTest[] {
+  const failures: FailedTest[] = []
+
+  function processTest(test: TestCase, file: string, suitePath: string[]) {
+    // Check if test failed or is flaky
+    const hasFailed = test.results.some(
+      (r) => r.status === 'failed' || r.status === 'timedOut'
+    )
+    const isFlaky = test.outcome === 'flaky'
+
+    if (hasFailed || isFlaky) {
+      const fullTestName = [...suitePath, test.title]
+        .filter(Boolean)
+        .join(' › ')
+      const failedResult = test.results.find(
+        (r) => r.status === 'failed' || r.status === 'timedOut'
+      )
+
+      // Find trace attachment
+      let traceUrl: string | undefined
+      if (failedResult?.attachments) {
+        const traceAttachment = failedResult.attachments.find(
+          (a) => a.name === 'trace' && a.contentType === 'application/zip'
+        )
+        if (traceAttachment?.path) {
+          // Convert local path to URL path
+          const tracePath = traceAttachment.path.replace(/\\/g, '/')
+          const traceFile = path.basename(tracePath)
+          if (baseUrl) {
+            // Construct trace viewer URL
+            const traceDataUrl = `${baseUrl}/data/${traceFile}`
+            traceUrl = `${baseUrl}/trace/?trace=${encodeURIComponent(traceDataUrl)}`
+          }
+        }
+      }
+
+      failures.push({
+        name: fullTestName,
+        file: file,
+        traceUrl,
+        error: failedResult?.error?.message
+      })
+    }
+  }
+
+  function processSuite(suite: Suite, parentPath: string[] = []) {
+    const suitePath = suite.title ? [...parentPath, suite.title] : parentPath
+
+    // Process tests in this suite
+    if (suite.tests) {
+      for (const test of suite.tests) {
+        processTest(test, suite.file, suitePath)
+      }
+    }
+
+    // Recursively process nested suites
+    if (suite.suites) {
+      for (const childSuite of suite.suites) {
+        processSuite(childSuite, suitePath)
+      }
+    }
+  }
+
+  if (reportData.suites) {
+    for (const suite of reportData.suites) {
+      processSuite(suite)
+    }
+  }
+
+  return failures
 }
 
 /**
  * Extract test counts from Playwright HTML report
  * @param reportDir - Path to the playwright-report directory
- * @returns Test counts { passed, failed, flaky, skipped, total }
+ * @param baseUrl - Base URL of the deployed report (for trace links)
+ * @returns Test counts { passed, failed, flaky, skipped, total, failures }
  */
-function extractTestCounts(reportDir: string): TestCounts {
+function extractTestCounts(reportDir: string, baseUrl?: string): TestCounts {
   const counts: TestCounts = {
     passed: 0,
     failed: 0,
     flaky: 0,
     skipped: 0,
-    total: 0
+    total: 0,
+    failures: []
   }
 
   try {
     // First, try to find report.json which Playwright generates with JSON reporter
     const jsonReportFile = path.join(reportDir, 'report.json')
     if (fs.existsSync(jsonReportFile)) {
-      const reportJson: ReportData = JSON.parse(
+      const reportJson: FullReportData = JSON.parse(
         fs.readFileSync(jsonReportFile, 'utf-8')
       )
       if (reportJson.stats) {
@@ -54,6 +175,12 @@ function extractTestCounts(reportDir: string): TestCounts {
         counts.failed = stats.unexpected || 0
         counts.flaky = stats.flaky || 0
         counts.skipped = stats.skipped || 0
+
+        // Extract detailed failure information
+        if (counts.failed > 0 || counts.flaky > 0) {
+          counts.failures = extractFailedTests(reportJson, baseUrl)
+        }
+
         return counts
       }
     }
@@ -169,15 +296,18 @@ function extractTestCounts(reportDir: string): TestCounts {
 
 // Main execution
 const reportDir = process.argv[2]
+const baseUrl = process.argv[3] // Optional: base URL for trace links
 
 if (!reportDir) {
-  console.error('Usage: extract-playwright-counts.ts <report-directory>')
+  console.error(
+    'Usage: extract-playwright-counts.ts <report-directory> [base-url]'
+  )
   process.exit(1)
 }
 
-const counts = extractTestCounts(reportDir)
+const counts = extractTestCounts(reportDir, baseUrl)
 
 // Output as JSON for easy parsing in shell script
-console.log(JSON.stringify(counts))
+process.stdout.write(JSON.stringify(counts) + '\n')
 
-export { extractTestCounts }
+export { extractTestCounts, extractFailedTests }
diff --git a/scripts/cicd/pr-playwright-deploy-and-comment.sh b/scripts/cicd/pr-playwright-deploy-and-comment.sh
index 840203f44..9332e60b7 100755
--- a/scripts/cicd/pr-playwright-deploy-and-comment.sh
+++ b/scripts/cicd/pr-playwright-deploy-and-comment.sh
@@ -134,23 +134,22 @@ post_comment() {
 
 # Main execution
 if [ "$STATUS" = "starting" ]; then
-    # Post starting comment
+    # Post concise starting comment
     comment=$(cat <<EOF
 $COMMENT_MARKER
-## 🎭 Playwright Test Results
+## 🎭 Playwright Tests: ⏳ Running...
 
-<img alt='loading' src='https://github.com/user-attachments/assets/755c86ee-e445-4ea8-bc2c-cca85df48686' width='14px' height='14px'/> **Tests are starting...**
+Tests started at $START_TIME UTC
 
-⏰ Started at: $START_TIME UTC
+<details>
+<summary>📊 Browser Tests</summary>
 
-### 🚀 Running Tests
-- 🧪 **chromium**: Running tests...
-- 🧪 **chromium-0.5x**: Running tests...
-- 🧪 **chromium-2x**: Running tests...
-- 🧪 **mobile-chrome**: Running tests...
+- **chromium**: Running...
+- **chromium-0.5x**: Running...
+- **chromium-2x**: Running...
+- **mobile-chrome**: Running...
 
----
-⏱️ Please wait while tests are running...
+</details>
 EOF
 )
     post_comment "$comment"
@@ -189,7 +188,8 @@ else
                 
                 if command -v tsx > /dev/null 2>&1 && [ -f "$EXTRACT_SCRIPT" ]; then
                     echo "Extracting counts from $REPORT_DIR using $EXTRACT_SCRIPT" >&2
-                    counts=$(tsx "$EXTRACT_SCRIPT" "$REPORT_DIR" 2>&1 || echo '{}')
+                    # Pass the base URL so we can generate trace links
+                    counts=$(tsx "$EXTRACT_SCRIPT" "$REPORT_DIR" "$url" 2>&1 || echo '{}')
                     echo "Extracted counts for $browser: $counts" >&2
                     echo "$counts" > "$temp_dir/$i.counts"
                 else
@@ -286,43 +286,74 @@ else
     # Determine overall status
     if [ $total_failed -gt 0 ]; then
         status_icon="❌"
-        status_text="Some tests failed"
+        status_text="Failed"
     elif [ $total_flaky -gt 0 ]; then
         status_icon="⚠️"
-        status_text="Tests passed with flaky tests"
+        status_text="Passed with flaky tests"
     elif [ $total_tests -gt 0 ]; then
         status_icon="✅"
-        status_text="All tests passed!"
+        status_text="Passed"
     else
         status_icon="🕵🏻"
-        status_text="No test results found"
+        status_text="No test results"
     fi
     
-    # Generate completion comment
+    # Generate concise completion comment
     comment="$COMMENT_MARKER
-## 🎭 Playwright Test Results
-
-$status_icon **$status_text**
-
-⏰ Completed at: $(date -u '+%m/%d/%Y, %I:%M:%S %p') UTC"
+## 🎭 Playwright Tests: $status_icon **$status_text**"
 
     # Add summary counts if we have test data
     if [ $total_tests -gt 0 ]; then
         comment="$comment
 
-### 📈 Summary
-- **Total Tests:** $total_tests
-- **Passed:** $total_passed ✅
-- **Failed:** $total_failed $([ $total_failed -gt 0 ] && echo '❌' || echo '')
-- **Flaky:** $total_flaky $([ $total_flaky -gt 0 ] && echo '⚠️' || echo '')
-- **Skipped:** $total_skipped $([ $total_skipped -gt 0 ] && echo '⏭️' || echo '')"
+**Results:** $total_passed passed, $total_failed failed, $total_flaky flaky, $total_skipped skipped (Total: $total_tests)"
+    fi
+
+    # Extract and display failed tests from all browsers
+    if [ $total_failed -gt 0 ] || [ $total_flaky -gt 0 ]; then
+        comment="$comment
+
+### ❌ Failed Tests"
+        
+        # Process each browser's failures
+        for counts_json in "${counts_array[@]}"; do
+            [ -z "$counts_json" ] || [ "$counts_json" = "{}" ] && continue
+            
+            if command -v jq > /dev/null 2>&1; then
+                # Extract failures array from JSON
+                failures=$(echo "$counts_json" | jq -r '.failures // [] | .[]? | "\(.name)|\(.file)|\(.traceUrl // "")"')
+                
+                if [ -n "$failures" ]; then
+                    while IFS='|' read -r test_name test_file trace_url; do
+                        [ -z "$test_name" ] && continue
+                        
+                        # Convert file path to GitHub URL (relative to repo root)
+                        github_file_url="https://github.com/$GITHUB_REPOSITORY/blob/$GITHUB_SHA/$test_file"
+                        
+                        # Build the failed test line
+                        test_line="- [$test_name]($github_file_url)"
+                        
+                        if [ -n "$trace_url" ] && [ "$trace_url" != "null" ]; then
+                            test_line="$test_line: [View trace]($trace_url)"
+                        fi
+                        
+                        comment="$comment
+$test_line"
+                    done <<< "$failures"
+                fi
+            fi
+        done
     fi
     
+    # Add browser reports in collapsible section
     comment="$comment
 
-### 📊 Test Reports by Browser"
+<details>
+<summary>📊 Browser Reports</summary>
+
+"
     
-    # Add browser results with individual counts
+    # Add browser results
     i=0
     IFS=' ' read -r -a browser_array <<< "$BROWSERS"
     IFS=' ' read -r -a url_array <<< "$urls"
@@ -349,7 +380,7 @@ $status_icon **$status_text**
                 fi
                 
                 if [ -n "$b_total" ] && [ "$b_total" != "0" ]; then
-                    counts_str=" • ✅ $b_passed / ❌ $b_failed / ⚠️ $b_flaky / ⏭️ $b_skipped"
+                    counts_str=" (✅ $b_passed / ❌ $b_failed / ⚠️ $b_flaky / ⏭️ $b_skipped)"
                 else
                     counts_str=""
                 fi
@@ -358,10 +389,10 @@ $status_icon **$status_text**
             fi
             
             comment="$comment
-- ✅ **${browser}**: [View Report](${url})${counts_str}"
+- **${browser}**: [View Report](${url})${counts_str}"
         else
             comment="$comment
-- ❌ **${browser}**: Deployment failed"
+- **${browser}**: ❌ Deployment failed"
         fi
         i=$((i + 1))
     done
@@ -369,8 +400,7 @@ $status_icon **$status_text**
     
     comment="$comment
 
----
-🎉 Click on the links above to view detailed test results for each browser configuration."
+</details>"
     
     post_comment "$comment"
 fi