feat: add runner conversion scripts and strengthen cutover automation
This commit is contained in:
107
runners-conversion/periodVault/monitor-pr-checks.sh
Executable file
107
runners-conversion/periodVault/monitor-pr-checks.sh
Executable file
@@ -0,0 +1,107 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
usage() {
|
||||
cat <<'EOF'
|
||||
Usage: scripts/monitor-pr-checks.sh <pr-number>
|
||||
|
||||
Environment overrides:
|
||||
CHECK_FAST_INTERVAL_SECONDS default: 60
|
||||
CHECK_SLOW_INTERVAL_SECONDS default: 180
|
||||
CHECK_MIN_FAST_WINDOW_SECONDS default: 900
|
||||
CHECK_STABLE_CYCLES_FOR_SLOW default: 5
|
||||
EOF
|
||||
}
|
||||
|
||||
if [[ "${1:-}" == "-h" ]] || [[ "${1:-}" == "--help" ]]; then
|
||||
usage
|
||||
exit 0
|
||||
fi
|
||||
|
||||
PR_NUMBER="${1:-}"
|
||||
if [[ -z "$PR_NUMBER" ]]; then
|
||||
usage >&2
|
||||
exit 2
|
||||
fi
|
||||
|
||||
FAST_INTERVAL_SECONDS="${CHECK_FAST_INTERVAL_SECONDS:-60}"
|
||||
SLOW_INTERVAL_SECONDS="${CHECK_SLOW_INTERVAL_SECONDS:-180}"
|
||||
MIN_FAST_WINDOW_SECONDS="${CHECK_MIN_FAST_WINDOW_SECONDS:-900}"
|
||||
STABLE_CYCLES_FOR_SLOW="${CHECK_STABLE_CYCLES_FOR_SLOW:-5}"
|
||||
|
||||
start_ts="$(date +%s)"
|
||||
stable_cycles=0
|
||||
last_fingerprint=""
|
||||
err_file="$(mktemp)"
|
||||
trap 'rm -f "$err_file"' EXIT
|
||||
|
||||
echo "Monitoring PR #${PR_NUMBER} checks"
|
||||
echo "Policy: fast=${FAST_INTERVAL_SECONDS}s, slow=${SLOW_INTERVAL_SECONDS}s, min-fast-window=${MIN_FAST_WINDOW_SECONDS}s, stable-cycles-for-slow=${STABLE_CYCLES_FOR_SLOW}"
|
||||
|
||||
while true; do
|
||||
now_ts="$(date +%s)"
|
||||
elapsed="$((now_ts - start_ts))"
|
||||
elapsed_mm="$((elapsed / 60))"
|
||||
elapsed_ss="$((elapsed % 60))"
|
||||
|
||||
if ! checks_json="$(gh pr checks "$PR_NUMBER" --json name,state,link 2>"$err_file")"; then
|
||||
err_msg="$(tr '\n' ' ' <"$err_file" | sed 's/[[:space:]]\+/ /g; s/^ //; s/ $//')"
|
||||
echo "[$(date -u '+%Y-%m-%dT%H:%M:%SZ')] elapsed ${elapsed_mm}m${elapsed_ss}s | check query failed: ${err_msg:-unknown error}"
|
||||
sleep "$FAST_INTERVAL_SECONDS"
|
||||
continue
|
||||
fi
|
||||
if [[ "$checks_json" == "[]" ]]; then
|
||||
echo "[$(date -u '+%Y-%m-%dT%H:%M:%SZ')] elapsed ${elapsed_mm}m${elapsed_ss}s | no checks yet"
|
||||
sleep "$FAST_INTERVAL_SECONDS"
|
||||
continue
|
||||
fi
|
||||
|
||||
success_count="$(jq '[.[] | select(.state=="SUCCESS")] | length' <<<"$checks_json")"
|
||||
failure_count="$(jq '[.[] | select(.state=="FAILURE" or .state=="ERROR" or .state=="STARTUP_FAILURE" or .state=="TIMED_OUT")] | length' <<<"$checks_json")"
|
||||
cancelled_count="$(jq '[.[] | select(.state=="CANCELLED")] | length' <<<"$checks_json")"
|
||||
skipped_count="$(jq '[.[] | select(.state=="SKIPPED" or .state=="NEUTRAL")] | length' <<<"$checks_json")"
|
||||
active_count="$(jq '[.[] | select(.state=="PENDING" or .state=="QUEUED" or .state=="IN_PROGRESS" or .state=="WAITING" or .state=="REQUESTED")] | length' <<<"$checks_json")"
|
||||
total_count="$(jq 'length' <<<"$checks_json")"
|
||||
|
||||
fingerprint="$(jq -r 'sort_by(.name) | map("\(.name)=\(.state)") | join(";")' <<<"$checks_json")"
|
||||
if [[ "$fingerprint" == "$last_fingerprint" ]]; then
|
||||
stable_cycles="$((stable_cycles + 1))"
|
||||
else
|
||||
stable_cycles=0
|
||||
last_fingerprint="$fingerprint"
|
||||
fi
|
||||
|
||||
echo "[$(date -u '+%Y-%m-%dT%H:%M:%SZ')] elapsed ${elapsed_mm}m${elapsed_ss}s | total=${total_count} success=${success_count} skipped=${skipped_count} active=${active_count} failed=${failure_count} cancelled=${cancelled_count}"
|
||||
|
||||
if [[ "$failure_count" -gt 0 ]]; then
|
||||
echo "Failing checks:"
|
||||
jq -r '.[] | select(.state=="FAILURE" or .state=="ERROR" or .state=="STARTUP_FAILURE" or .state=="TIMED_OUT") | " - \(.name): \(.state) \(.link)"' <<<"$checks_json"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [[ "$active_count" -eq 0 ]]; then
|
||||
if [[ "$cancelled_count" -gt 0 ]]; then
|
||||
echo "Checks ended with cancellations."
|
||||
jq -r '.[] | select(.state=="CANCELLED") | " - \(.name): \(.link)"' <<<"$checks_json"
|
||||
exit 1
|
||||
fi
|
||||
if [[ "$((success_count + skipped_count))" -eq "$total_count" ]]; then
|
||||
echo "All checks passed."
|
||||
exit 0
|
||||
fi
|
||||
echo "Checks finished with non-success states."
|
||||
jq -r '.[] | " - \(.name): \(.state) \(.link)"' <<<"$checks_json"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if (( elapsed < MIN_FAST_WINDOW_SECONDS )); then
|
||||
sleep "$FAST_INTERVAL_SECONDS"
|
||||
continue
|
||||
fi
|
||||
|
||||
if (( stable_cycles >= STABLE_CYCLES_FOR_SLOW )); then
|
||||
sleep "$SLOW_INTERVAL_SECONDS"
|
||||
else
|
||||
sleep "$FAST_INTERVAL_SECONDS"
|
||||
fi
|
||||
done
|
||||
Reference in New Issue
Block a user