sglang/.github/workflows/pr-gate.yml

on:
  workflow_call:
    inputs:
      require-run-ci:
        description: "Whether the PR must have the run-ci label"
        type: boolean
        default: true
      cool-down-minutes:
        description: "Cooldown period in minutes for low-permission users; 0 disables rate limiting"
        type: number
        default: 120

jobs:
  pr-gate:
    # 1. for commits on main: no gating needed
    # 2. for workflow_dispatch: this can only be triggered by users with write access
    runs-on: ubuntu-latest
    steps:
      - name: Fetch latest PR info
        if: github.event_name == 'pull_request'
        id: pr
        uses: actions/github-script@v7
        with:
          github-token: ${{ secrets.GITHUB_TOKEN }}
          script: |
            const pr = await github.rest.pulls.get({
              owner: context.repo.owner,
              repo: context.repo.repo,
              pull_number: context.issue.number
            });
            core.setOutput("labels", JSON.stringify(pr.data.labels.map(l => l.name)));
            core.setOutput("draft", pr.data.draft);
            core.setOutput("user", pr.data.user.login);

      - name: Log PR info
        if: github.event_name == 'pull_request'
        run: |
          echo "===== PR Info ====="
          echo "PR Event: ${{ github.event_name }}"
          echo "PR Labels: ${{ steps.pr.outputs.labels }}"
          echo "PR Draft: ${{ steps.pr.outputs.draft }}"
          echo "PR User: ${{ steps.pr.outputs.user }}"
          echo "Require run-ci: ${{ inputs.require-run-ci }}"
          echo "Cool down minutes: ${{ inputs.cool-down-minutes }}"
          echo "==================="

      - name: Block draft PR
        if: github.event_name == 'pull_request' && fromJson(steps.pr.outputs.draft)
        run: |
          echo "PR is draft. Blocking CI."
          exit 1

      - name: Require run-ci label (optional)
        if:  github.event_name == 'pull_request' && inputs.require-run-ci == true
        run: |
          labels='${{ steps.pr.outputs.labels }}'
          if [[ "${{ contains(fromJson(steps.pr.outputs.labels), 'run-ci') }}" == "false" ]]; then
            echo "Missing required label 'run-ci'. See https://docs.sglang.io/developer_guide/contribution_guide.html#how-to-trigger-ci-tests for more details."
            exit 1
          fi

      - name: Enforce rate limit for low-permission actors (optional)
        if: github.event_name == 'pull_request' && inputs.cool-down-minutes > 0
        uses: actions/github-script@v7
        with:
          github-token: ${{ secrets.GITHUB_TOKEN }}
          script: |
            const DEFAULT_MINUTES = Number("${{ inputs.cool-down-minutes }}");
            const owner = context.repo.owner;
            const repo = context.repo.repo;
            const eventName = context.eventName;
            const curRun = await github.rest.actions.getWorkflowRun({
              owner, repo, run_id: context.runId
            });
            let triggeringActor = curRun.data.triggering_actor?.login || context.actor;
            if (triggeringActor === "github-actions[bot]") {
              triggeringActor = `${{ steps.pr.outputs.user }}`;
              core.info(
                `triggering_actor is github-actions[bot]; substituting PR author '${triggeringActor}'.`
              );
            }

            async function hasHighPermission(username) {
              try {
                const { data } = await github.rest.repos.getCollaboratorPermissionLevel({ owner, repo, username });
                const perm = data.permission || 'none';
                return perm === 'write' || perm === 'maintain' || perm === 'admin';
              } catch (e) {
                if (e.status === 404 || e.status === 403) return false;
                throw e;
              }
            }

            if (await hasHighPermission(triggeringActor)) {
              core.info(`Triggering user '${triggeringActor}' has high permission. No rate limit applied.`);
              return;
            }

            let effectiveCooldownMinutes = DEFAULT_MINUTES;
            let perUserCooldownMinutes = null;

            try {
              const contentResp = await github.rest.repos.getContent({
                owner,
                repo,
                path: ".github/CI_PERMISSIONS.json",
                ref: "main",
              });

              if (!Array.isArray(contentResp.data) && contentResp.data && "content" in contentResp.data) {
                const raw = Buffer.from(
                  contentResp.data.content,
                  contentResp.data.encoding || "base64"
                ).toString();
                const ciPermissions = JSON.parse(raw);

                const userPerm = ciPermissions[triggeringActor];
                if (userPerm && typeof userPerm.cooldown_interval_minutes === "number") {
                  perUserCooldownMinutes = userPerm.cooldown_interval_minutes;
                  core.info(
                    `Per-user cooldown for '${triggeringActor}' from CI_PERMISSIONS.json: ${perUserCooldownMinutes} minutes.`
                  );
                } else {
                  core.info(`No per-user cooldown found for '${triggeringActor}' in CI_PERMISSIONS.json.`);
                }
              } else {
                core.info("CI_PERMISSIONS.json content response is not a file; skipping per-user cooldown.");
              }
            } catch (e) {
              core.info(`CI_PERMISSIONS.json not found or unreadable: ${e.message}. Using default rate limit only.`);
            }

            if (perUserCooldownMinutes !== null) {
              effectiveCooldownMinutes = Math.min(effectiveCooldownMinutes, perUserCooldownMinutes);
            }

            if (effectiveCooldownMinutes <= 0) {
              core.info(
                `Effective cooldown for '${triggeringActor}' is 0 minutes; no rate limit enforced for this user.`
              );
              return;
            }

            const cutoff = new Date(Date.now() - effectiveCooldownMinutes * 60 * 1000);
            core.info(
              `Checking for workflow runs since ${cutoff.toISOString()} (last ${effectiveCooldownMinutes} minutes) for event '${eventName}'.`
            );

            const { data } = await github.rest.actions.listWorkflowRuns({
              owner,
              repo,
              workflow_id: 'pr-test.yml',
              event: eventName,
              per_page: 100,
            });

            const runs = data.workflow_runs || [];

            // Rate Limiting Logic:
            // We only count workflow runs that actually consumed CI resources (i.e., passed the gate).
            // A run "passes the gate" if any jobs beyond the gate jobs (check-changes, pr-gate, call-gate)
            // actually executed (not skipped/cancelled). This prevents scenarios where:
            // - User has PR A with missing 'run-ci' label (fails at gate)
            // - User opens PR B with 'run-ci' label
            // - PR B should be able to run even though PR A triggered a run recently

            // Helper function to check if a run passed the gate (i.e., actually consumed CI resources)
            async function didRunPassGate(run) {
              try {
                // Note: Fetching up to 100 jobs (API maximum). If a workflow has >100 jobs,
                // we may miss some, but this is unlikely in practice.
                const { data: jobsData } = await github.rest.actions.listJobsForWorkflowRun({
                  owner, repo, run_id: run.id, per_page: 100
                });
                const jobs = jobsData.jobs || [];

                // If no jobs exist yet, the run hasn't started consuming resources
                if (jobs.length === 0) {
                  core.info(`Run ${run.id} has no jobs yet; not counting against rate limit.`);
                  return false;
                }

                // Gate jobs that don't consume significant CI resources
                const gateJobs = ['check-changes', 'pr-gate', 'call-gate', 'pr-test-finish'];
                const jobsBeyondGate = jobs.filter(j => !gateJobs.some(g => j.name === g || j.name.startsWith(g + ' ')));

                // A job "ran" if it reached a terminal conclusion state that indicates actual execution
                const ranStates = ['success', 'failure', 'timed_out', 'action_required'];
                const hasJobsThatRan = jobsBeyondGate.some(j => j.conclusion && ranStates.includes(j.conclusion));
                return hasJobsThatRan;
              } catch (e) {
                core.warning(`Could not check jobs for run ${run.id}: ${e.message}`);

                // If it's a rate limit error, count it conservatively to prevent abuse
                if (e.status === 429) {
                  core.warning(`Hit rate limit checking run ${run.id}; counting it to be safe.`);
                  return true;
                }

                // For cancelled/skipped runs, they likely didn't consume resources
                if (run.conclusion === 'cancelled' || run.conclusion === 'skipped') {
                  return false;
                }

                // Default to counting it to prevent abuse
                return true;
              }
            }

            // Limit the number of runs we'll check in detail to avoid API rate limits
            const MAX_RUNS_TO_CHECK = 5;
            let runsChecked = 0;
            let runsSkippedAtGate = 0;
            let recentFound = null;

            for (const run of runs) {
              if (String(run.id) === String(context.runId)) continue;
              if (new Date(run.created_at) < cutoff) continue;
              const isUserRun = (run.actor?.login === triggeringActor) || (run.triggering_actor?.login === triggeringActor);
              if (!isUserRun) continue;

              runsChecked++;
              core.info(`Checking run ${run.id} (created: ${run.created_at}, conclusion: ${run.conclusion})`);

              // Safety limit: if we've checked too many runs, assume the next one passed to be conservative
              if (runsChecked > MAX_RUNS_TO_CHECK) {
                core.warning(`Checked ${MAX_RUNS_TO_CHECK} runs; assuming this one passed gate to avoid API limits.`);
                recentFound = run;
                break;
              }

              // Only count runs that actually passed the gate and consumed CI resources
              if (await didRunPassGate(run)) {
                recentFound = run;
                core.info(`Found recent run ${run.id} that passed gate.`);
                break;
              } else {
                runsSkippedAtGate++;
                core.info(`Run ${run.id} failed at gate; not counting against rate limit.`);
              }
            }

            core.info(`Rate limit check summary: checked ${runsChecked} runs, ${runsSkippedAtGate} failed at gate.`);

            if (recentFound) {
              core.setFailed(
                `User '${triggeringActor}' already triggered '${context.workflow}' via '${eventName}' at ${recentFound.created_at}. ` +
                `Please wait ${effectiveCooldownMinutes} minutes before triggering again.`
              );
            } else {
              core.info(
                `No recent runs detected for '${triggeringActor}' within the last ${effectiveCooldownMinutes} minutes; proceeding.`
              );
            }