From 69de9fa57e5822b6d268029ba0e33d767caea65a Mon Sep 17 00:00:00 2001 From: "CanbiZ (MickLesk)" <47820557+MickLesk@users.noreply.github.com> Date: Mon, 23 Feb 2026 13:22:09 +0100 Subject: [PATCH] Improve error handling and logging for LXC builds (#12208) Prevent host-side error_handler from being triggered during in-container install/recovery by delaying re-enabling set -Eeuo pipefail and the ERR trap in misc/build.func until after install/recovery completes; add explanatory comments. Update misc/error_handler.func to fall back to BUILD_LOG if container-internal log path is unavailable, show the last 20 log lines when present, refine container vs host detection (check INSTALL_LOG file and /root), copy INSTALL_LOG into /root and write a .failed flag with the exit code for host-side detection, and ensure full-log output and container removal prompt are shown appropriately in host context. Tweak misc/core.func silent() output to include a "Full log" path and adjust formatting. --- misc/build.func | 9 +++- misc/core.func | 3 +- misc/error_handler.func | 109 ++++++++++++++++++++++------------------ 3 files changed, 68 insertions(+), 53 deletions(-) diff --git a/misc/build.func b/misc/build.func index 8bfae32ef..a8e4e0003 100644 --- a/misc/build.func +++ b/misc/build.func @@ -4055,8 +4055,9 @@ EOF' lxc-attach -n "$CTID" -- bash -c "$(curl -fsSL https://raw.githubusercontent.com/community-scripts/ProxmoxVE/main/install/${var_install}.sh)" local lxc_exit=$? - set -Eeuo pipefail # Re-enable error handling - trap 'error_handler' ERR # Restore ERR trap + # Keep error handling DISABLED during failure detection and recovery + # Re-enabling it here would cause any pct exec/pull failure to trigger + # error_handler() on the host, bypassing the recovery menu entirely # Check for error flag file in container (more reliable than lxc-attach exit code) local install_exit_code=0 @@ -4484,6 +4485,10 @@ EOF' exit $install_exit_code fi + + # Re-enable error handling after successful install or recovery menu completion + set -Eeuo pipefail + trap 'error_handler' ERR } destroy_lxc() { diff --git a/misc/core.func b/misc/core.func index 0b6b14846..23c318fd9 100644 --- a/misc/core.func +++ b/misc/core.func @@ -524,7 +524,8 @@ silent() { if [[ -s "$logfile" ]]; then echo -e "\n${TAB}--- Last 10 lines of log ---" tail -n 10 "$logfile" - echo -e "${TAB}-----------------------------------\n" + echo -e "${TAB}-----------------------------------" + echo -e "${TAB}📋 Full log: ${logfile}\n" fi exit "$rc" diff --git a/misc/error_handler.func b/misc/error_handler.func index 380b52c85..4b586e085 100644 --- a/misc/error_handler.func +++ b/misc/error_handler.func @@ -236,67 +236,54 @@ error_handler() { active_log="$SILENT_LOGFILE" fi + # If active_log points to a container-internal path that doesn't exist on host, + # fall back to BUILD_LOG (host-side log) + if [[ -n "$active_log" && ! -s "$active_log" && -n "${BUILD_LOG:-}" && -s "${BUILD_LOG}" ]]; then + active_log="$BUILD_LOG" + fi + + # Show last log lines if available if [[ -n "$active_log" && -s "$active_log" ]]; then echo -e "\n${TAB}--- Last 20 lines of log ---" tail -n 20 "$active_log" echo -e "${TAB}-----------------------------------\n" + fi - # Detect context: Container (INSTALL_LOG set + /root exists) vs Host (BUILD_LOG) - if [[ -n "${INSTALL_LOG:-}" && -d /root ]]; then - # CONTAINER CONTEXT: Copy log and create flag file for host - local container_log="/root/.install-${SESSION_ID:-error}.log" - cp "$active_log" "$container_log" 2>/dev/null || true + # Detect context: Container (INSTALL_LOG set + inside container /root) vs Host + if [[ -n "${INSTALL_LOG:-}" && -f "${INSTALL_LOG:-}" && -d /root ]]; then + # CONTAINER CONTEXT: Copy log and create flag file for host + local container_log="/root/.install-${SESSION_ID:-error}.log" + cp "${INSTALL_LOG}" "$container_log" 2>/dev/null || true - # Create error flag file with exit code for host detection - echo "$exit_code" >"/root/.install-${SESSION_ID:-error}.failed" 2>/dev/null || true - # Log path is shown by host as combined log - no need to show container path - else - # HOST CONTEXT: Show local log path and offer container cleanup + # Create error flag file with exit code for host detection + echo "$exit_code" >"/root/.install-${SESSION_ID:-error}.failed" 2>/dev/null || true + # Log path is shown by host as combined log - no need to show container path + else + # HOST CONTEXT: Show local log path and offer container cleanup + if [[ -n "$active_log" && -s "$active_log" ]]; then if declare -f msg_custom >/dev/null 2>&1; then msg_custom "📋" "${YW}" "Full log: ${active_log}" else echo -e "${YW}Full log:${CL} ${BL}${active_log}${CL}" fi + fi - # Offer to remove container if it exists (build errors after container creation) - if [[ -n "${CTID:-}" ]] && command -v pct &>/dev/null && pct status "$CTID" &>/dev/null; then - echo "" - if declare -f msg_custom >/dev/null 2>&1; then - echo -en "${TAB}❓${TAB}${YW}Remove broken container ${CTID}? (Y/n) [auto-remove in 60s]: ${CL}" - else - echo -en "${YW}Remove broken container ${CTID}? (Y/n) [auto-remove in 60s]: ${CL}" - fi + # Offer to remove container if it exists (build errors after container creation) + if [[ -n "${CTID:-}" ]] && command -v pct &>/dev/null && pct status "$CTID" &>/dev/null; then + echo "" + if declare -f msg_custom >/dev/null 2>&1; then + echo -en "${TAB}❓${TAB}${YW}Remove broken container ${CTID}? (Y/n) [auto-remove in 60s]: ${CL}" + else + echo -en "${YW}Remove broken container ${CTID}? (Y/n) [auto-remove in 60s]: ${CL}" + fi - if read -t 60 -r response; then - if [[ -z "$response" || "$response" =~ ^[Yy]$ ]]; then - echo "" - if declare -f msg_info >/dev/null 2>&1; then - msg_info "Removing container ${CTID}" - else - echo -e "${YW}Removing container ${CTID}${CL}" - fi - pct stop "$CTID" &>/dev/null || true - pct destroy "$CTID" &>/dev/null || true - if declare -f msg_ok >/dev/null 2>&1; then - msg_ok "Container ${CTID} removed" - else - echo -e "${GN}✔${CL} Container ${CTID} removed" - fi - elif [[ "$response" =~ ^[Nn]$ ]]; then - echo "" - if declare -f msg_warn >/dev/null 2>&1; then - msg_warn "Container ${CTID} kept for debugging" - else - echo -e "${YW}Container ${CTID} kept for debugging${CL}" - fi - fi - else - # Timeout - auto-remove + if read -t 60 -r response; then + if [[ -z "$response" || "$response" =~ ^[Yy]$ ]]; then echo "" if declare -f msg_info >/dev/null 2>&1; then - msg_info "No response - removing container ${CTID}" + msg_info "Removing container ${CTID}" else - echo -e "${YW}No response - removing container ${CTID}${CL}" + echo -e "${YW}Removing container ${CTID}${CL}" fi pct stop "$CTID" &>/dev/null || true pct destroy "$CTID" &>/dev/null || true @@ -305,13 +292,35 @@ error_handler() { else echo -e "${GN}✔${CL} Container ${CTID} removed" fi + elif [[ "$response" =~ ^[Nn]$ ]]; then + echo "" + if declare -f msg_warn >/dev/null 2>&1; then + msg_warn "Container ${CTID} kept for debugging" + else + echo -e "${YW}Container ${CTID} kept for debugging${CL}" + fi fi + else + # Timeout - auto-remove + echo "" + if declare -f msg_info >/dev/null 2>&1; then + msg_info "No response - removing container ${CTID}" + else + echo -e "${YW}No response - removing container ${CTID}${CL}" + fi + pct stop "$CTID" &>/dev/null || true + pct destroy "$CTID" &>/dev/null || true + if declare -f msg_ok >/dev/null 2>&1; then + msg_ok "Container ${CTID} removed" + else + echo -e "${GN}✔${CL} Container ${CTID} removed" + fi + fi - # Force one final status update attempt after cleanup - # This ensures status is updated even if the first attempt failed (e.g., HTTP 400) - if declare -f post_update_to_api &>/dev/null; then - post_update_to_api "failed" "$exit_code" "force" - fi + # Force one final status update attempt after cleanup + # This ensures status is updated even if the first attempt failed (e.g., HTTP 400) + if declare -f post_update_to_api &>/dev/null; then + post_update_to_api "failed" "$exit_code" "force" fi fi fi