fix: resolve 10 bugs across scripts

- manage_runner.sh: fix RUNNER_DEFAULT_IMAGE clobbering by renaming per-runner var to RUNNER_SECTION_IMAGE; .env fallback now works - manage_runner.sh: render native runner config.yaml before registration so act_runner can read it during --config flag - manage_runner.sh: add SSH credential validation for remote hosts (fail early with clear error instead of cryptic SSH failure) - phase1/phase2: add UNRAID_DB_IP/FEDORA_DB_IP to conditional require_vars when DB_TYPE != sqlite3 - cleanup.sh: only clear manifest when all actions for host succeeded; failed actions are preserved for retry - phase8_cutover.sh: strip empty environment: block from Caddy docker-compose when TLS_MODE=existing - phase5_migrate_pipelines.sh, phase5_teardown.sh, phase9_teardown.sh: wrap cd+git in subshells so working directory is always restored - phase3_post_check.sh: handle both string and numeric runner status from Gitea API (offline vs 2) - configure_env.sh: fix TOTAL_PROMPTS base count (63->64) and move DB/repo count adjustments before their prompts are shown Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-01 13:21:30 -05:00
parent 045283be50
commit 9494645b3a
10 changed files with 94 additions and 60 deletions
--- a/manage_runner.sh
+++ b/manage_runner.sh
@@ -40,7 +40,7 @@ EOF
 # ---------------------------------------------------------------------------
 # Parse a runner entry from runners.conf (INI format) by section name.
 # Sets globals: RUNNER_NAME, RUNNER_HOST, RUNNER_TYPE, RUNNER_DATA_PATH,
-#   RUNNER_LABELS, RUNNER_DEFAULT_IMAGE, RUNNER_REPOS, RUNNER_CAPACITY,
+#   RUNNER_LABELS, RUNNER_SECTION_IMAGE, RUNNER_REPOS, RUNNER_CAPACITY,
 #   RUNNER_CPU, RUNNER_MEMORY, RUNNER_BOOT
 # Also resolves: RUNNER_SSH_HOST, RUNNER_SSH_USER, RUNNER_SSH_PORT,
 #   RUNNER_SSH_KEY (from .env or custom section keys)
@@ -65,7 +65,7 @@ parse_runner_entry() {
  RUNNER_TYPE=$(ini_get "$RUNNERS_CONF" "$target_name" "type" "")
  RUNNER_DATA_PATH=$(ini_get "$RUNNERS_CONF" "$target_name" "data_path" "")
  RUNNER_LABELS=$(ini_get "$RUNNERS_CONF" "$target_name" "labels" "")
-  RUNNER_DEFAULT_IMAGE=$(ini_get "$RUNNERS_CONF" "$target_name" "default_image" "")
+  RUNNER_SECTION_IMAGE=$(ini_get "$RUNNERS_CONF" "$target_name" "default_image" "")
  RUNNER_REPOS=$(ini_get "$RUNNERS_CONF" "$target_name" "repos" "all")
  RUNNER_CAPACITY=$(ini_get "$RUNNERS_CONF" "$target_name" "capacity" "1")
  RUNNER_CPU=$(ini_get "$RUNNERS_CONF" "$target_name" "cpu" "")
@@ -107,6 +107,18 @@ parse_runner_entry() {
      ;;
  esac

+  # --- Validate SSH credentials for remote hosts ---
+  if [[ "$RUNNER_SSH_HOST" != "local" ]]; then
+    if [[ -z "$RUNNER_SSH_HOST" ]]; then
+      log_error "Runner '$target_name': SSH host is empty (check .env for ${RUNNER_HOST^^}_IP)"
+      return 1
+    fi
+    if [[ -z "$RUNNER_SSH_USER" ]]; then
+      log_error "Runner '$target_name': SSH user is empty (check .env for ${RUNNER_HOST^^}_SSH_USER)"
+      return 1
+    fi
+  fi
+
  # --- Validate required fields ---
  if [[ -z "$RUNNER_TYPE" ]]; then
    log_error "Runner '$target_name': type is empty (must be docker or native)"
@@ -137,9 +149,10 @@ parse_runner_entry() {
 # Sets RUNNER_RESOLVED_IMAGE.
 # ---------------------------------------------------------------------------
 resolve_runner_image() {
-  local image="${RUNNER_DEFAULT_IMAGE:-}"
+  # Per-runner image from runners.conf takes priority, then .env global, then hardcoded fallback
+  local image="${RUNNER_SECTION_IMAGE:-${RUNNER_DEFAULT_IMAGE:-}}"
  if [[ -z "$image" ]] && [[ "$RUNNER_TYPE" == "docker" ]]; then
-    image="${RUNNER_DEFAULT_IMAGE:-catthehacker/ubuntu:act-latest}"
+    image="catthehacker/ubuntu:act-latest"
  fi

  if [[ -n "$image" ]] && [[ -n "${LOCAL_REGISTRY:-}" ]]; then
@@ -384,6 +397,16 @@ add_native_runner() {
    log_success "act_runner binary downloaded"
  fi

+  # Render runner config (must exist before registration — act_runner reads it)
+  local tmpfile
+  tmpfile=$(mktemp)
+  # shellcheck disable=SC2090  # intentional — RUNNER_LABELS_YAML rendered via envsubst
+  export RUNNER_NAME RUNNER_DATA_PATH RUNNER_LABELS_YAML RUNNER_CAPACITY
+  render_template "${SCRIPT_DIR}/templates/runner-config.yaml.tpl" "$tmpfile" \
+    "\${RUNNER_NAME} \${RUNNER_LABELS_YAML} \${RUNNER_CAPACITY}"
+  cp "$tmpfile" "${RUNNER_DATA_PATH}/config.yaml"
+  rm -f "$tmpfile"
+
  # Register the runner with Gitea
  if [[ ! -f "${RUNNER_DATA_PATH}/.runner" ]]; then
    log_info "Registering runner with Gitea..."
@@ -397,16 +420,6 @@ add_native_runner() {
    log_success "Runner registered"
  fi

-  # Render runner config
-  local tmpfile
-  tmpfile=$(mktemp)
-  # shellcheck disable=SC2090  # intentional — RUNNER_LABELS_YAML rendered via envsubst
-  export RUNNER_NAME RUNNER_DATA_PATH RUNNER_LABELS_YAML RUNNER_CAPACITY
-  render_template "${SCRIPT_DIR}/templates/runner-config.yaml.tpl" "$tmpfile" \
-    "\${RUNNER_NAME} \${RUNNER_LABELS_YAML} \${RUNNER_CAPACITY}"
-  cp "$tmpfile" "${RUNNER_DATA_PATH}/config.yaml"
-  rm -f "$tmpfile"
-
  # Render launchd plist.
  # When boot=true, insert a <key>UserName</key> entry so the daemon runs as
  # the deploying user instead of root (LaunchDaemons default to root).
--- a/phase1_gitea_unraid.sh
+++ b/phase1_gitea_unraid.sh
@@ -19,7 +19,8 @@ require_vars UNRAID_IP UNRAID_SSH_USER UNRAID_SSH_PORT UNRAID_GITEA_DATA_PATH \
             GITEA_INTERNAL_URL GITEA_DOMAIN

 if [[ "${GITEA_DB_TYPE}" != "sqlite3" ]]; then
-  require_vars GITEA_DB_HOST GITEA_DB_PORT GITEA_DB_NAME GITEA_DB_USER GITEA_DB_PASSWD
+  require_vars GITEA_DB_HOST GITEA_DB_PORT GITEA_DB_NAME GITEA_DB_USER GITEA_DB_PASSWD \
+               UNRAID_DB_IP
 fi

 phase_header 1 "Gitea on Unraid"
--- a/phase2_gitea_fedora.sh
+++ b/phase2_gitea_fedora.sh
@@ -19,7 +19,8 @@ require_vars FEDORA_IP FEDORA_SSH_USER FEDORA_SSH_PORT FEDORA_GITEA_DATA_PATH \
             GITEA_BACKUP_INTERNAL_URL

 if [[ "${GITEA_DB_TYPE}" != "sqlite3" ]]; then
-  require_vars GITEA_DB_HOST GITEA_DB_PORT GITEA_DB_NAME GITEA_DB_USER GITEA_DB_PASSWD
+  require_vars GITEA_DB_HOST GITEA_DB_PORT GITEA_DB_NAME GITEA_DB_USER GITEA_DB_PASSWD \
+               FEDORA_DB_IP
 fi

 phase_header 2 "Gitea on Fedora (Backup)"
--- a/phase3_post_check.sh
+++ b/phase3_post_check.sh
@@ -45,7 +45,7 @@ while IFS= read -r name; do
  if [[ -z "$local_status" ]]; then
    log_error "FAIL: Runner '${name}' not found in Gitea admin"
    FAIL=$((FAIL + 1))
-  elif [[ "$local_status" == "offline" ]]; then
+  elif [[ "$local_status" == "offline" ]] || [[ "$local_status" == "2" ]]; then
    log_error "FAIL: Runner '${name}' is registered but offline"
    FAIL=$((FAIL + 1))
  else
--- a/phase5_migrate_pipelines.sh
+++ b/phase5_migrate_pipelines.sh
@@ -158,13 +158,14 @@ for repo in "${REPOS[@]}"; do
  # Step 5: Commit and push
  # Configure git user for the commit (required for fresh clones)
  # -------------------------------------------------------------------------
-  cd "$CLONE_DIR"
-  git config user.name "Gitea Migration"
-  git config user.email "migration@gitea.local"
-  git add .gitea/
-  git commit -q -m "Migrate workflows to Gitea Actions"
-  git_with_auth git push -q origin HEAD
-  cd "$SCRIPT_DIR"
+  (
+    cd "$CLONE_DIR"
+    git config user.name "Gitea Migration"
+    git config user.email "migration@gitea.local"
+    git add .gitea/
+    git commit -q -m "Migrate workflows to Gitea Actions"
+    git_with_auth git push -q origin HEAD
+  )

  log_success "Workflows migrated for ${repo}"
  SUCCESS=$((SUCCESS + 1))
--- a/phase5_teardown.sh
+++ b/phase5_teardown.sh
@@ -78,13 +78,14 @@ for repo in "${REPOS[@]}"; do

  if [[ -d "${CLONE_DIR}/.gitea/workflows" ]]; then
    rm -rf "${CLONE_DIR}/.gitea/workflows"
-    cd "$CLONE_DIR"
-    git config user.name "Gitea Migration"
-    git config user.email "migration@gitea.local"
-    git add -A
-    git commit -q -m "Remove Gitea Actions workflows (teardown)"
-    git_with_auth git push -q origin HEAD
-    cd "$SCRIPT_DIR"
+    (
+      cd "$CLONE_DIR"
+      git config user.name "Gitea Migration"
+      git config user.email "migration@gitea.local"
+      git add -A
+      git commit -q -m "Remove Gitea Actions workflows (teardown)"
+      git_with_auth git push -q origin HEAD
+    )
    log_success "Removed .gitea/workflows/ from ${repo}"
  else
    log_info ".gitea/workflows/ not found in clone — already clean"
--- a/phase8_cutover.sh
+++ b/phase8_cutover.sh
@@ -188,6 +188,16 @@ else

  render_template "${SCRIPT_DIR}/templates/docker-compose-caddy.yml.tpl" "$TMPFILE" \
    "\${CADDY_DATA_PATH} \${CADDY_CONTAINER_IP} \${CADDY_ENV_VARS} \${CADDY_EXTRA_VOLUMES}"
+  # Strip empty YAML blocks left when optional vars are blank
+  if [[ -z "$CADDY_ENV_VARS" ]]; then
+    sed -i.bak '/^[[:space:]]*environment:$/d' "$TMPFILE"
+    rm -f "${TMPFILE}.bak"
+  fi
+  if [[ -z "$CADDY_EXTRA_VOLUMES" ]]; then
+    # Remove trailing blank lines after the volumes block
+    sed -i.bak -e :a -e '/^\n*$/{$d;N;ba' -e '}' "$TMPFILE"
+    rm -f "${TMPFILE}.bak"
+  fi
  scp_to UNRAID "$TMPFILE" "${CADDY_DATA_PATH}/docker-compose.yml"
  rm -f "$TMPFILE"
  log_success "Caddy docker-compose.yml deployed"
--- a/phase9_teardown.sh
+++ b/phase9_teardown.sh
@@ -79,13 +79,14 @@ for repo in "${REPOS[@]}"; do

  if [[ -f "${CLONE_DIR}/.gitea/workflows/security-scan.yml" ]]; then
    rm -f "${CLONE_DIR}/.gitea/workflows/security-scan.yml"
-    cd "$CLONE_DIR"
-    git config user.name "Gitea Migration"
-    git config user.email "migration@gitea.local"
-    git add -A
-    git commit -q -m "Remove security scanning workflow (teardown)"
-    git_with_auth git push -q origin HEAD
-    cd "$SCRIPT_DIR"
+    (
+      cd "$CLONE_DIR"
+      git config user.name "Gitea Migration"
+      git config user.email "migration@gitea.local"
+      git add -A
+      git commit -q -m "Remove security scanning workflow (teardown)"
+      git_with_auth git push -q origin HEAD
+    )
    log_success "Removed security-scan.yml from ${repo}"
  fi

--- a/setup/cleanup.sh
+++ b/setup/cleanup.sh
@@ -214,6 +214,7 @@ for host in "${HOSTS[@]}"; do
  log_info "=== Cleaning up: ${host} ==="

  ssh_key=$(host_to_ssh_key "$host")
+  HOST_FAILED=0

  # Read entries into array, then reverse (bash 3.2 compatible — no mapfile)
  entries=()
@@ -237,99 +238,102 @@ for host in "${HOSTS[@]}"; do
        if cleanup_brew_pkg "$target"; then
          CLEANED=$((CLEANED + 1))
        else
-          FAILED=$((FAILED + 1))
+          FAILED=$((FAILED + 1)); HOST_FAILED=$((HOST_FAILED + 1))
        fi
        ;;
      dnf_pkg)
        if [[ -z "$ssh_key" ]]; then
          log_warn "Cannot clean up dnf_pkg '$target' — no SSH key for host '$host'"
-          FAILED=$((FAILED + 1))
+          FAILED=$((FAILED + 1)); HOST_FAILED=$((HOST_FAILED + 1))
          continue
        fi
        if cleanup_dnf_pkg "$ssh_key" "$target"; then
          CLEANED=$((CLEANED + 1))
        else
-          FAILED=$((FAILED + 1))
+          FAILED=$((FAILED + 1)); HOST_FAILED=$((HOST_FAILED + 1))
        fi
        ;;
      static_bin)
        if [[ -z "$ssh_key" ]]; then
          log_warn "Cannot clean up static_bin '$target' — no SSH key for host '$host'"
-          FAILED=$((FAILED + 1))
+          FAILED=$((FAILED + 1)); HOST_FAILED=$((HOST_FAILED + 1))
          continue
        fi
        if cleanup_static_bin "$ssh_key" "$target"; then
          CLEANED=$((CLEANED + 1))
        else
-          FAILED=$((FAILED + 1))
+          FAILED=$((FAILED + 1)); HOST_FAILED=$((HOST_FAILED + 1))
        fi
        ;;
      docker_group)
        if [[ -z "$ssh_key" ]]; then
          log_warn "Cannot clean up docker_group '$target' — no SSH key for host '$host'"
-          FAILED=$((FAILED + 1))
+          FAILED=$((FAILED + 1)); HOST_FAILED=$((HOST_FAILED + 1))
          continue
        fi
        if cleanup_docker_group "$ssh_key" "$target"; then
          CLEANED=$((CLEANED + 1))
        else
-          FAILED=$((FAILED + 1))
+          FAILED=$((FAILED + 1)); HOST_FAILED=$((HOST_FAILED + 1))
        fi
        ;;
      systemd_svc)
        if [[ -z "$ssh_key" ]]; then
          log_warn "Cannot clean up systemd_svc '$target' — no SSH key for host '$host'"
-          FAILED=$((FAILED + 1))
+          FAILED=$((FAILED + 1)); HOST_FAILED=$((HOST_FAILED + 1))
          continue
        fi
        if cleanup_systemd_svc "$ssh_key" "$target"; then
          CLEANED=$((CLEANED + 1))
        else
-          FAILED=$((FAILED + 1))
+          FAILED=$((FAILED + 1)); HOST_FAILED=$((HOST_FAILED + 1))
        fi
        ;;
      xcode_cli)
        if cleanup_xcode_cli; then
          CLEANED=$((CLEANED + 1))
        else
-          FAILED=$((FAILED + 1))
+          FAILED=$((FAILED + 1)); HOST_FAILED=$((HOST_FAILED + 1))
        fi
        ;;
      ssh_key)
        if [[ -z "$ssh_key" ]]; then
          log_warn "Cannot clean up ssh_key '$target' — no SSH key for host '$host'"
-          FAILED=$((FAILED + 1))
+          FAILED=$((FAILED + 1)); HOST_FAILED=$((HOST_FAILED + 1))
          continue
        fi
        if cleanup_ssh_key "$ssh_key" "$target"; then
          CLEANED=$((CLEANED + 1))
        else
-          FAILED=$((FAILED + 1))
+          FAILED=$((FAILED + 1)); HOST_FAILED=$((HOST_FAILED + 1))
        fi
        ;;
      authorized_key)
        if [[ -z "$ssh_key" ]]; then
          log_warn "Cannot clean up authorized_key '$target' — no SSH key for host '$host'"
-          FAILED=$((FAILED + 1))
+          FAILED=$((FAILED + 1)); HOST_FAILED=$((HOST_FAILED + 1))
          continue
        fi
        if cleanup_authorized_key "$ssh_key" "$target"; then
          CLEANED=$((CLEANED + 1))
        else
-          FAILED=$((FAILED + 1))
+          FAILED=$((FAILED + 1)); HOST_FAILED=$((HOST_FAILED + 1))
        fi
        ;;
      *)
        log_warn "Unknown action type '${action_type}' for target '${target}' — skipping"
-        FAILED=$((FAILED + 1))
+        FAILED=$((FAILED + 1)); HOST_FAILED=$((HOST_FAILED + 1))
        ;;
    esac
  done

-  # Clear the manifest after successful cleanup (unless dry run)
-  if [[ "$DRY_RUN" == "false" ]]; then
+  # Only clear the manifest if all actions for this host succeeded.
+  # Failed actions remain in the manifest so they can be retried.
+  if [[ "$DRY_RUN" == "false" ]] && [[ "$HOST_FAILED" -eq 0 ]]; then
    manifest_clear "$host"
    log_success "Manifest cleared for ${host}"
+  elif [[ "$DRY_RUN" == "false" ]] && [[ "$HOST_FAILED" -gt 0 ]]; then
+    log_warn "Manifest preserved for ${host} — ${HOST_FAILED} action(s) failed"
  fi
 done

--- a/setup/configure_env.sh
+++ b/setup/configure_env.sh
@@ -65,7 +65,7 @@ get_env_val() {
 # Prompt function
 # ---------------------------------------------------------------------------
 # Base prompt count (fixed prompts + TLS conditional slots — repo/DB prompts added dynamically)
-TOTAL_PROMPTS=63
+TOTAL_PROMPTS=64
 CURRENT_PROMPT=0
 LAST_SECTION=""

@@ -267,6 +267,8 @@ prompt_var "GITEA_DB_TYPE"          "Database type (sqlite3, mysql, postgres, ms
 COLLECTED_DB_TYPE=$(get_env_val "GITEA_DB_TYPE" "sqlite3")

 if [[ "$COLLECTED_DB_TYPE" != "sqlite3" ]]; then
+  # Update total BEFORE showing DB prompts so progress counter is accurate
+  TOTAL_PROMPTS=$((TOTAL_PROMPTS + 5))
  # Determine default port based on DB type
  case "$COLLECTED_DB_TYPE" in
    mysql)    db_port_default="3306" ;;
@@ -279,8 +281,6 @@ if [[ "$COLLECTED_DB_TYPE" != "sqlite3" ]]; then
  prompt_var "GITEA_DB_NAME"    "Database name"                            nonempty  "gitea"            "DATABASE"
  prompt_var "GITEA_DB_USER"    "Database user"                            nonempty  "gitea"            "DATABASE"
  prompt_var "GITEA_DB_PASSWD"  "Database password (min 8 chars)"          password  ""                 "DATABASE"
-  # Update total for the 5 DB prompts
-  TOTAL_PROMPTS=$((TOTAL_PROMPTS + 5))
 fi

 prompt_var "GITEA_VERSION"          "Gitea Docker image tag"                      nonempty  "1.25"    "GITEA SHARED CREDENTIALS"
@@ -312,6 +312,8 @@ if [[ -n "$EXISTING_REPOS" ]]; then
  EXISTING_COUNT=$#
 fi

+# Account for the "how many" prompt itself
+TOTAL_PROMPTS=$((TOTAL_PROMPTS + 1))
 CURRENT_PROMPT=$((CURRENT_PROMPT + 1))
 printf '%b[%d/~%d]%b How many repos to migrate? %b[%s]%b: ' "$C_DIM" "$CURRENT_PROMPT" "$TOTAL_PROMPTS" "$C_RESET" "$C_YELLOW" "${EXISTING_COUNT:-3}" "$C_RESET"
 read -r REPO_COUNT
@@ -321,8 +323,8 @@ while ! [[ "$REPO_COUNT" =~ ^[1-9][0-9]*$ ]]; do
  printf 'How many repos to migrate? '
  read -r REPO_COUNT
 done
-# Update total now that we know how many repos
-TOTAL_PROMPTS=$((TOTAL_PROMPTS + 1 + REPO_COUNT))
+# Now that we know repo count, add them to total
+TOTAL_PROMPTS=$((TOTAL_PROMPTS + REPO_COUNT))

 # Collect repo names
 COLLECTED_REPOS=""