diff --git a/misc/api.func b/misc/api.func index d6405bf67..4d5c0ae86 100644 --- a/misc/api.func +++ b/misc/api.func @@ -312,7 +312,8 @@ json_escape() { s=${s//$'\r'/} s=${s//$'\t'/\\t} # Remove any remaining control characters (0x00-0x1F except those already handled) - s=$(printf '%s' "$s" | tr -d '\000-\010\013\014\016-\037') + # Also remove DEL (0x7F) and invalid high bytes that break JSON parsers + s=$(printf '%s' "$s" | tr -d '\000-\010\013\014\016-\037\177') printf '%s' "$s" } @@ -982,7 +983,7 @@ EOF fi # All 3 attempts failed — do NOT set POST_UPDATE_DONE=true. - # This allows the EXIT trap (api_exit_script) to retry with 3 fresh attempts. + # This allows the EXIT trap (on_exit in error_handler.func) to retry. # No infinite loop risk: EXIT trap fires exactly once. } diff --git a/misc/build.func b/misc/build.func index 269a87d3c..2b4b41f26 100644 --- a/misc/build.func +++ b/misc/build.func @@ -4098,6 +4098,11 @@ EOF' # Installation failed? if [[ $install_exit_code -ne 0 ]]; then + # Prevent SIGTSTP (Ctrl+Z) from suspending the script during recovery. + # In non-interactive shells (bash -c), background processes (spinner) can + # trigger terminal-related signals that stop the entire process group. + trap '' TSTP + msg_error "Installation failed in container ${CTID} (exit code: ${install_exit_code})" # Copy install log from container BEFORE API call so get_error_text() can read it @@ -4173,7 +4178,12 @@ EOF' fi # Report failure to telemetry API (now with log available on host) + # NOTE: Do NOT use msg_info/spinner here — the background spinner process + # causes SIGTSTP in non-interactive shells (bash -c "$(curl ...)"), which + # stops the entire process group and prevents the recovery dialog from appearing. + $STD echo -e "${TAB}⏳ Reporting failure to telemetry..." post_update_to_api "failed" "$install_exit_code" + $STD echo -e "${TAB}${CM:-✔} Failure reported" # Defense-in-depth: Ensure error handling stays disabled during recovery. # Some functions (e.g. silent/$STD) unconditionally re-enable set -Eeuo pipefail @@ -4537,8 +4547,12 @@ EOF' # Force one final status update attempt after cleanup # This ensures status is updated even if the first attempt failed (e.g., HTTP 400) + $STD echo -e "${TAB}⏳ Finalizing telemetry report..." post_update_to_api "failed" "$install_exit_code" "force" + $STD echo -e "${TAB}${CM:-✔} Telemetry finalized" + # Restore default SIGTSTP handling before exit + trap - TSTP exit $install_exit_code fi @@ -5608,44 +5622,21 @@ ensure_log_on_host() { fi } -# ------------------------------------------------------------------------------ -# api_exit_script() +# ============================================================================== +# TRAP MANAGEMENT +# ============================================================================== +# All traps (ERR, EXIT, INT, TERM, HUP) are set by catch_errors() in +# error_handler.func — called at the top of this file after sourcing. # -# - Exit trap handler for reporting to API telemetry -# - Captures exit code and reports to PocketBase using centralized error descriptions -# - Uses explain_exit_code() from api.func for consistent error messages -# - ALWAYS sends telemetry FIRST before log collection to prevent pct pull -# hangs from blocking status updates (container may be dead/unresponsive) -# - For non-zero exit codes: posts "failed" status -# - For zero exit codes where post_update_to_api was never called: -# catches orphaned "installing" records (e.g., script exited cleanly -# but description() was never reached) -# ------------------------------------------------------------------------------ -api_exit_script() { - local exit_code=$? - if [ $exit_code -ne 0 ]; then - # ALWAYS send telemetry FIRST - ensure status is reported even if - # ensure_log_on_host hangs (e.g. pct pull on dead container) - post_update_to_api "failed" "$exit_code" 2>/dev/null || true - # Best-effort log collection (non-critical after telemetry is sent) - if declare -f ensure_log_on_host >/dev/null 2>&1; then - ensure_log_on_host 2>/dev/null || true - fi - # Stop orphaned container if we're in the install phase - if [[ "${CONTAINER_INSTALLING:-}" == "true" && -n "${CTID:-}" ]] && command -v pct &>/dev/null; then - pct stop "$CTID" 2>/dev/null || true - fi - elif [[ "${POST_TO_API_DONE:-}" == "true" && "${POST_UPDATE_DONE:-}" != "true" ]]; then - # Script exited with 0 but never sent a completion status - # exit_code=0 is never an error — report as success - post_update_to_api "done" "0" - fi -} - -if command -v pveversion >/dev/null 2>&1; then - trap 'api_exit_script' EXIT -fi -trap 'local _ec=$?; if [[ $_ec -ne 0 ]]; then post_update_to_api "failed" "$_ec" 2>/dev/null || true; if declare -f ensure_log_on_host &>/dev/null; then ensure_log_on_host 2>/dev/null || true; fi; fi' ERR -trap 'post_update_to_api "failed" "129" 2>/dev/null || true; if [[ -n "${CTID:-}" ]] && command -v pct &>/dev/null; then pct stop "$CTID" 2>/dev/null || true; fi; exit 129' SIGHUP -trap 'post_update_to_api "failed" "130" 2>/dev/null || true; if [[ -n "${CTID:-}" ]] && command -v pct &>/dev/null; then pct stop "$CTID" 2>/dev/null || true; fi; exit 130' SIGINT -trap 'post_update_to_api "failed" "143" 2>/dev/null || true; if [[ -n "${CTID:-}" ]] && command -v pct &>/dev/null; then pct stop "$CTID" 2>/dev/null || true; fi; exit 143' SIGTERM +# Do NOT set duplicate traps here. The handlers in error_handler.func +# (on_exit, on_interrupt, on_terminate, on_hangup, error_handler) already: +# - Send telemetry via post_update_to_api / _send_abort_telemetry +# - Stop orphaned containers via _stop_container_if_installing +# - Collect logs via ensure_log_on_host +# - Clean up lock files and spinner processes +# +# Previously, inline traps here overwrote catch_errors() traps, causing: +# - error_handler() never fired (no error output, no cleanup dialog) +# - on_hangup() never fired (SSH disconnect → stuck records) +# - Duplicated logic in two places (hard to debug) +# ============================================================================== diff --git a/misc/error_handler.func b/misc/error_handler.func index cea4639a1..10aff2e16 100644 --- a/misc/error_handler.func +++ b/misc/error_handler.func @@ -199,11 +199,16 @@ error_handler() { return 0 fi + # Stop spinner and restore cursor FIRST — before any output + # This prevents spinner text overlapping with error messages + if declare -f stop_spinner >/dev/null 2>&1; then + stop_spinner 2>/dev/null || true + fi + printf "\e[?25h" + local explanation explanation="$(explain_exit_code "$exit_code")" - printf "\e[?25h" - # ALWAYS report failure to API immediately - don't wait for container checks # This ensures we capture failures that occur before/after container exists if declare -f post_update_to_api &>/dev/null; then @@ -359,9 +364,39 @@ _send_abort_telemetry() { command -v curl &>/dev/null || return 0 [[ "${DIAGNOSTICS:-no}" == "no" ]] && return 0 [[ -z "${RANDOM_UUID:-}" ]] && return 0 - curl -fsS -m 5 -X POST "${TELEMETRY_URL:-https://telemetry.community-scripts.org/telemetry}" \ - -H "Content-Type: application/json" \ - -d "{\"random_id\":\"${RANDOM_UUID}\",\"execution_id\":\"${EXECUTION_ID:-${RANDOM_UUID}}\",\"type\":\"${TELEMETRY_TYPE:-lxc}\",\"nsapp\":\"${NSAPP:-${app:-unknown}}\",\"status\":\"failed\",\"exit_code\":${exit_code}}" &>/dev/null || true + + # Collect last 20 log lines for error diagnosis (best-effort) + local error_text="" + if [[ -n "${INSTALL_LOG:-}" && -s "${INSTALL_LOG}" ]]; then + error_text=$(tail -n 20 "$INSTALL_LOG" 2>/dev/null | sed 's/\x1b\[[0-9;]*[a-zA-Z]//g; s/\\/\\\\/g; s/"/\\"/g; s/\r//g' | tr '\n' '|' | sed 's/|$//' | tr -d '\000-\010\013\014\016-\037\177') || true + fi + + # Calculate duration if start time is available + local duration="" + if [[ -n "${DIAGNOSTICS_START_TIME:-}" ]]; then + duration=$(($(date +%s) - DIAGNOSTICS_START_TIME)) + fi + + # Build JSON payload with error context + local payload + payload="{\"random_id\":\"${RANDOM_UUID}\",\"execution_id\":\"${EXECUTION_ID:-${RANDOM_UUID}}\",\"type\":\"${TELEMETRY_TYPE:-lxc}\",\"nsapp\":\"${NSAPP:-${app:-unknown}}\",\"status\":\"failed\",\"exit_code\":${exit_code}" + [[ -n "$error_text" ]] && payload="${payload},\"error\":\"${error_text}\"" + [[ -n "$duration" ]] && payload="${payload},\"duration\":${duration}" + payload="${payload}}" + + local api_url="${TELEMETRY_URL:-https://telemetry.community-scripts.org/telemetry}" + + # 2 attempts (retry once on failure) — original had no retry + local attempt + for attempt in 1 2; do + if curl -fsS -m 5 -X POST "$api_url" \ + -H "Content-Type: application/json" \ + -d "$payload" &>/dev/null; then + return 0 + fi + [[ $attempt -eq 1 ]] && sleep 1 + done + return 0 } # ------------------------------------------------------------------------------ @@ -437,6 +472,12 @@ on_exit() { # - Exits with code 130 (128 + SIGINT=2) # ------------------------------------------------------------------------------ on_interrupt() { + # Stop spinner and restore cursor before any output + if declare -f stop_spinner >/dev/null 2>&1; then + stop_spinner 2>/dev/null || true + fi + printf "\e[?25h" 2>/dev/null || true + _send_abort_telemetry "130" _stop_container_if_installing if declare -f msg_error >/dev/null 2>&1; then @@ -456,6 +497,12 @@ on_interrupt() { # - Exits with code 143 (128 + SIGTERM=15) # ------------------------------------------------------------------------------ on_terminate() { + # Stop spinner and restore cursor before any output + if declare -f stop_spinner >/dev/null 2>&1; then + stop_spinner 2>/dev/null || true + fi + printf "\e[?25h" 2>/dev/null || true + _send_abort_telemetry "143" _stop_container_if_installing if declare -f msg_error >/dev/null 2>&1; then @@ -478,6 +525,11 @@ on_terminate() { # - Exits with code 129 (128 + SIGHUP=1) # ------------------------------------------------------------------------------ on_hangup() { + # Stop spinner (no cursor restore needed — terminal is already gone) + if declare -f stop_spinner >/dev/null 2>&1; then + stop_spinner 2>/dev/null || true + fi + _send_abort_telemetry "129" _stop_container_if_installing exit 129