Files
gitea-migration/runners-conversion/periodVault/monitor-pr-checks.sh

108 lines
3.8 KiB
Bash
Executable File

#!/usr/bin/env bash
set -euo pipefail
usage() {
cat <<'EOF'
Usage: scripts/monitor-pr-checks.sh <pr-number>
Environment overrides:
CHECK_FAST_INTERVAL_SECONDS default: 60
CHECK_SLOW_INTERVAL_SECONDS default: 180
CHECK_MIN_FAST_WINDOW_SECONDS default: 900
CHECK_STABLE_CYCLES_FOR_SLOW default: 5
EOF
}
if [[ "${1:-}" == "-h" ]] || [[ "${1:-}" == "--help" ]]; then
usage
exit 0
fi
PR_NUMBER="${1:-}"
if [[ -z "$PR_NUMBER" ]]; then
usage >&2
exit 2
fi
FAST_INTERVAL_SECONDS="${CHECK_FAST_INTERVAL_SECONDS:-60}"
SLOW_INTERVAL_SECONDS="${CHECK_SLOW_INTERVAL_SECONDS:-180}"
MIN_FAST_WINDOW_SECONDS="${CHECK_MIN_FAST_WINDOW_SECONDS:-900}"
STABLE_CYCLES_FOR_SLOW="${CHECK_STABLE_CYCLES_FOR_SLOW:-5}"
start_ts="$(date +%s)"
stable_cycles=0
last_fingerprint=""
err_file="$(mktemp)"
trap 'rm -f "$err_file"' EXIT
echo "Monitoring PR #${PR_NUMBER} checks"
echo "Policy: fast=${FAST_INTERVAL_SECONDS}s, slow=${SLOW_INTERVAL_SECONDS}s, min-fast-window=${MIN_FAST_WINDOW_SECONDS}s, stable-cycles-for-slow=${STABLE_CYCLES_FOR_SLOW}"
while true; do
now_ts="$(date +%s)"
elapsed="$((now_ts - start_ts))"
elapsed_mm="$((elapsed / 60))"
elapsed_ss="$((elapsed % 60))"
if ! checks_json="$(gh pr checks "$PR_NUMBER" --json name,state,link 2>"$err_file")"; then
err_msg="$(tr '\n' ' ' <"$err_file" | sed 's/[[:space:]]\+/ /g; s/^ //; s/ $//')"
echo "[$(date -u '+%Y-%m-%dT%H:%M:%SZ')] elapsed ${elapsed_mm}m${elapsed_ss}s | check query failed: ${err_msg:-unknown error}"
sleep "$FAST_INTERVAL_SECONDS"
continue
fi
if [[ "$checks_json" == "[]" ]]; then
echo "[$(date -u '+%Y-%m-%dT%H:%M:%SZ')] elapsed ${elapsed_mm}m${elapsed_ss}s | no checks yet"
sleep "$FAST_INTERVAL_SECONDS"
continue
fi
success_count="$(jq '[.[] | select(.state=="SUCCESS")] | length' <<<"$checks_json")"
failure_count="$(jq '[.[] | select(.state=="FAILURE" or .state=="ERROR" or .state=="STARTUP_FAILURE" or .state=="TIMED_OUT")] | length' <<<"$checks_json")"
cancelled_count="$(jq '[.[] | select(.state=="CANCELLED")] | length' <<<"$checks_json")"
skipped_count="$(jq '[.[] | select(.state=="SKIPPED" or .state=="NEUTRAL")] | length' <<<"$checks_json")"
active_count="$(jq '[.[] | select(.state=="PENDING" or .state=="QUEUED" or .state=="IN_PROGRESS" or .state=="WAITING" or .state=="REQUESTED")] | length' <<<"$checks_json")"
total_count="$(jq 'length' <<<"$checks_json")"
fingerprint="$(jq -r 'sort_by(.name) | map("\(.name)=\(.state)") | join(";")' <<<"$checks_json")"
if [[ "$fingerprint" == "$last_fingerprint" ]]; then
stable_cycles="$((stable_cycles + 1))"
else
stable_cycles=0
last_fingerprint="$fingerprint"
fi
echo "[$(date -u '+%Y-%m-%dT%H:%M:%SZ')] elapsed ${elapsed_mm}m${elapsed_ss}s | total=${total_count} success=${success_count} skipped=${skipped_count} active=${active_count} failed=${failure_count} cancelled=${cancelled_count}"
if [[ "$failure_count" -gt 0 ]]; then
echo "Failing checks:"
jq -r '.[] | select(.state=="FAILURE" or .state=="ERROR" or .state=="STARTUP_FAILURE" or .state=="TIMED_OUT") | " - \(.name): \(.state) \(.link)"' <<<"$checks_json"
exit 1
fi
if [[ "$active_count" -eq 0 ]]; then
if [[ "$cancelled_count" -gt 0 ]]; then
echo "Checks ended with cancellations."
jq -r '.[] | select(.state=="CANCELLED") | " - \(.name): \(.link)"' <<<"$checks_json"
exit 1
fi
if [[ "$((success_count + skipped_count))" -eq "$total_count" ]]; then
echo "All checks passed."
exit 0
fi
echo "Checks finished with non-success states."
jq -r '.[] | " - \(.name): \(.state) \(.link)"' <<<"$checks_json"
exit 1
fi
if (( elapsed < MIN_FAST_WINDOW_SECONDS )); then
sleep "$FAST_INTERVAL_SECONDS"
continue
fi
if (( stable_cycles >= STABLE_CYCLES_FOR_SLOW )); then
sleep "$SLOW_INTERVAL_SECONDS"
else
sleep "$FAST_INTERVAL_SECONDS"
fi
done