From 66310cc5bf75cc782f076b336886b60727f647e2 Mon Sep 17 00:00:00 2001 From: andrew clark Date: Mon, 27 Oct 2025 08:24:36 -0600 Subject: [PATCH] Jenkins Alerts Notifications (#3086) * Testing minimal pipeline * Update Jenkinsfile * Testing webhook * Testing webhook * Testing webhook * Testing build log output * Testing log retrieval * Testing * Testing pattern matching * Fixing regex * Testing error detection * Testing log formatting Including additional context around log failure. * Testing notification message format * Update Jenkinsfile * Notification formatting * Testing secure interpolation * Testing string interpolation * Notification format * Fixing markdown * Testing markdown * Testing markdown * Revert "Testing markdown" This reverts commit adeb6d2d55d2e2a4b1817c3e82b1f0095bd25aeb. * Testing different markdown format * Revert "Testing different markdown format" This reverts commit bf5406a1cd25f55208a08211472a758a018a9d2a. * Testing markdown * Testing markdown * Testing markdown * Testing markdown * Testing markdown * Testing notification * Testing notification * Testing notification * Testing failure mode * Testing failure mode * Adding new patterns and tests * Commenting * Stage name fix * Moving to notification on failure only * Fixing notification format * Testing env vars * Testing build url redirect * Testing no log errors * Testing no errors case * Integrating into primary jenkinsfile * Updating notification message Removed emoji from message [ROCm/composable_kernel commit: a1ce64374f91a0a8982b61a3bf2594fec3c75d60] --- Jenkinsfile | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/Jenkinsfile b/Jenkinsfile index b89d6fb657..9acbbeeca2 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -12,6 +12,14 @@ def show_node_info() { """ } +// Error patterns to scan build logs for specific failure types and send detailed notifications. +def failurePatterns = [ + [pattern: /login attempt to .* failed with status: 401 Unauthorized/, description: "Docker registry authentication failed"], + [pattern: /docker login failed/, description: "Docker login failed"], + [pattern: /HTTP request sent .* 404 Not Found/, description: "HTTP request failed with 404"], + [pattern: /cat: .* No such file or directory/, description: "GPU not found"], +] + class Version { int major, minor, patch @Override @@ -1849,4 +1857,36 @@ pipeline { } } } + post { + failure { + node(rocmnode("nogpu")) { + script { + // Get the build log. + def buildLog = sh(script: 'wget -q --no-check-certificate -O - ' + BUILD_URL + 'consoleText', returnStdout: true) + // Check for patterns in the log. + def foundPatterns = [] + for (patternMap in failurePatterns) { + def result = checkForPattern(patternMap.pattern, buildLog) + if (result.found) { + foundPatterns.add([ + description: patternMap.description, + matchedLine: result.matchedLine, + context: result.context + ]) + } + } + // Send a notification for each matched failure pattern. + for (patternMap in foundPatterns) { + withCredentials([string(credentialsId: 'ck_ci_errors_webhook_url', variable: 'WEBHOOK_URL')]) { + sh ''' + curl -X POST "${WEBHOOK_URL}" \ + -H 'Content-Type: application/json' \ + -d '{"text": "\\n\\n**Build Failed**\\n\\n**Issues detected:** ''' + patternMap.description + '''\\n\\n**Log context:**\\n```\\n''' + patternMap.context.replace("'", "\\'") + '''\\n```\\n\\n**Job:** ''' + env.JOB_NAME + '''\\n\\n**Build:** #''' + env.BUILD_NUMBER + '''\\n\\n**URL:** ''' + env.RUN_DISPLAY_URL + '''"}' + ''' + } + } + } + } + } + } }