mirror of
https://github.com/community-scripts/ProxmoxVE.git
synced 2026-03-17 15:33:00 +01:00
fix: route customization errors through recovery menu instead of error_handler
Previously, when a container was stopped or base package installation failed during 'Customizing LXC Container', the error handler's simple 'Remove broken container? (Y/n)' prompt appeared instead of the full recovery menu with retry/repair options (verbose rebuild, APT repair, OOM retry, DNS override). Root cause: set -Eeuo pipefail and ERR trap were still active during the customization phase (locale, timezone, base packages). The exit 1 triggered error_handler() which has its own cleanup flow, bypassing the full recovery menu in build_container(). Changes: - Disable ERR trap before customization phase (not just before lxc-attach) - Replace exit 1 with install_exit_code=1 for base package failures - Wrap lxc-attach section in if-block that skips when customization failed - Both customization and install errors now reach the full recovery menu - Fix read in error_handler.func to use </dev/tty (same curl stdin issue) - Eliminates 'pop_var_context: head of shell_variables not a function context' bash error caused by double-exit (exit 1 -> ERR trap -> exit)
This commit is contained in:
120
misc/build.func
120
misc/build.func
@@ -4012,6 +4012,16 @@ EOF
|
||||
# install_gpu_userland "NVIDIA"
|
||||
# fi
|
||||
|
||||
# Disable error trap for entire customization & install phase.
|
||||
# All errors are handled explicitly — recovery menu shown on failure.
|
||||
# Without this, customization errors (e.g. container stopped during base package
|
||||
# install) would trigger error_handler() with a simple "Remove broken container?"
|
||||
# prompt instead of the full recovery menu with retry/repair options.
|
||||
set +Eeuo pipefail
|
||||
trap - ERR
|
||||
|
||||
local install_exit_code=0
|
||||
|
||||
# Continue with standard container setup
|
||||
if [ "$var_os" == "alpine" ]; then
|
||||
sleep 3
|
||||
@@ -4021,7 +4031,7 @@ http://dl-cdn.alpinelinux.org/alpine/latest-stable/community
|
||||
EOF'
|
||||
pct exec "$CTID" -- ash -c "apk add bash newt curl openssh nano mc ncurses jq >/dev/null" || {
|
||||
msg_error "Failed to install base packages in Alpine container"
|
||||
exit 1
|
||||
install_exit_code=1
|
||||
}
|
||||
else
|
||||
sleep 3
|
||||
@@ -4047,67 +4057,67 @@ EOF'
|
||||
|
||||
pct exec "$CTID" -- bash -c "apt-get update >/dev/null && apt-get install -y sudo curl mc gnupg2 jq >/dev/null" || {
|
||||
msg_error "apt-get base packages installation failed"
|
||||
exit 1
|
||||
install_exit_code=1
|
||||
}
|
||||
fi
|
||||
|
||||
msg_ok "Customized LXC Container"
|
||||
# Only continue with installation if customization succeeded
|
||||
if [[ $install_exit_code -eq 0 ]]; then
|
||||
msg_ok "Customized LXC Container"
|
||||
|
||||
# Optional DNS override for retry scenarios (inside LXC, never on host)
|
||||
if [[ "${DNS_RETRY_OVERRIDE:-false}" == "true" ]]; then
|
||||
msg_info "Applying DNS retry override in LXC (8.8.8.8, 1.1.1.1)"
|
||||
pct exec "$CTID" -- bash -c "printf 'nameserver 8.8.8.8\nnameserver 1.1.1.1\n' >/etc/resolv.conf" >/dev/null 2>&1 || true
|
||||
msg_ok "DNS override applied in LXC"
|
||||
fi
|
||||
|
||||
# Install SSH keys
|
||||
install_ssh_keys_into_ct
|
||||
|
||||
# Start timer for duration tracking
|
||||
start_install_timer
|
||||
|
||||
# Run application installer
|
||||
# Disable error trap - container errors are handled internally via flag file
|
||||
set +Eeuo pipefail # Disable ALL error handling temporarily
|
||||
trap - ERR # Remove ERR trap completely
|
||||
|
||||
# Signal handlers use this flag to stop the container on abort (SIGHUP/SIGINT/SIGTERM)
|
||||
# Without this, SSH disconnects leave the container running as an orphan process
|
||||
# that sends "configuring" status AFTER the host already reported "failed"
|
||||
export CONTAINER_INSTALLING=true
|
||||
|
||||
# Capture lxc-attach terminal output to host-side log via tee.
|
||||
# This is the ONLY reliable way to get install output when:
|
||||
# - install.func fails to load (DNS error) → no container-side logging
|
||||
# - install script crashes before logging starts
|
||||
# - $STD/silent() not used for some commands
|
||||
# PIPESTATUS[0] gets the real exit code from lxc-attach (not from tee).
|
||||
local _LXC_CAPTURE_LOG="/tmp/.install-capture-${SESSION_ID}.log"
|
||||
lxc-attach -n "$CTID" -- bash -c "$(curl -fsSL https://raw.githubusercontent.com/community-scripts/ProxmoxVE/main/install/${var_install}.sh)" 2>&1 | tee "$_LXC_CAPTURE_LOG"
|
||||
local lxc_exit=${PIPESTATUS[0]}
|
||||
|
||||
unset CONTAINER_INSTALLING
|
||||
|
||||
# Keep error handling DISABLED during failure detection and recovery
|
||||
# Re-enabling it here would cause any pct exec/pull failure to trigger
|
||||
# error_handler() on the host, bypassing the recovery menu entirely
|
||||
|
||||
# Check for error flag file in container (more reliable than lxc-attach exit code)
|
||||
local install_exit_code=0
|
||||
if [[ -n "${SESSION_ID:-}" ]]; then
|
||||
local error_flag="/root/.install-${SESSION_ID}.failed"
|
||||
if pct exec "$CTID" -- test -f "$error_flag" 2>/dev/null; then
|
||||
install_exit_code=$(pct exec "$CTID" -- cat "$error_flag" 2>/dev/null || echo "1")
|
||||
pct exec "$CTID" -- rm -f "$error_flag" 2>/dev/null || true
|
||||
# Optional DNS override for retry scenarios (inside LXC, never on host)
|
||||
if [[ "${DNS_RETRY_OVERRIDE:-false}" == "true" ]]; then
|
||||
msg_info "Applying DNS retry override in LXC (8.8.8.8, 1.1.1.1)"
|
||||
pct exec "$CTID" -- bash -c "printf 'nameserver 8.8.8.8\nnameserver 1.1.1.1\n' >/etc/resolv.conf" >/dev/null 2>&1 || true
|
||||
msg_ok "DNS override applied in LXC"
|
||||
fi
|
||||
fi
|
||||
|
||||
# Fallback to lxc-attach exit code if no flag file
|
||||
if [[ $install_exit_code -eq 0 && $lxc_exit -ne 0 ]]; then
|
||||
install_exit_code=$lxc_exit
|
||||
fi
|
||||
# Install SSH keys
|
||||
install_ssh_keys_into_ct
|
||||
|
||||
# Installation failed?
|
||||
# Start timer for duration tracking
|
||||
start_install_timer
|
||||
|
||||
# Run application installer
|
||||
# Error handling already disabled above (before customization phase)
|
||||
|
||||
# Signal handlers use this flag to stop the container on abort (SIGHUP/SIGINT/SIGTERM)
|
||||
# Without this, SSH disconnects leave the container running as an orphan process
|
||||
# that sends "configuring" status AFTER the host already reported "failed"
|
||||
export CONTAINER_INSTALLING=true
|
||||
|
||||
# Capture lxc-attach terminal output to host-side log via tee.
|
||||
# This is the ONLY reliable way to get install output when:
|
||||
# - install.func fails to load (DNS error) → no container-side logging
|
||||
# - install script crashes before logging starts
|
||||
# - $STD/silent() not used for some commands
|
||||
# PIPESTATUS[0] gets the real exit code from lxc-attach (not from tee).
|
||||
local _LXC_CAPTURE_LOG="/tmp/.install-capture-${SESSION_ID}.log"
|
||||
lxc-attach -n "$CTID" -- bash -c "$(curl -fsSL https://raw.githubusercontent.com/community-scripts/ProxmoxVE/main/install/${var_install}.sh)" 2>&1 | tee "$_LXC_CAPTURE_LOG"
|
||||
local lxc_exit=${PIPESTATUS[0]}
|
||||
|
||||
unset CONTAINER_INSTALLING
|
||||
|
||||
# Keep error handling DISABLED during failure detection and recovery
|
||||
# Re-enabling it here would cause any pct exec/pull failure to trigger
|
||||
# error_handler() on the host, bypassing the recovery menu entirely
|
||||
|
||||
# Check for error flag file in container (more reliable than lxc-attach exit code)
|
||||
if [[ -n "${SESSION_ID:-}" ]]; then
|
||||
local error_flag="/root/.install-${SESSION_ID}.failed"
|
||||
if pct exec "$CTID" -- test -f "$error_flag" 2>/dev/null; then
|
||||
install_exit_code=$(pct exec "$CTID" -- cat "$error_flag" 2>/dev/null || echo "1")
|
||||
pct exec "$CTID" -- rm -f "$error_flag" 2>/dev/null || true
|
||||
fi
|
||||
fi
|
||||
|
||||
# Fallback to lxc-attach exit code if no flag file
|
||||
if [[ $install_exit_code -eq 0 && ${lxc_exit:-0} -ne 0 ]]; then
|
||||
install_exit_code=${lxc_exit:-0}
|
||||
fi
|
||||
fi # end: if [[ $install_exit_code -eq 0 ]] (customization succeeded)
|
||||
|
||||
# Installation or customization failed?
|
||||
if [[ $install_exit_code -ne 0 ]]; then
|
||||
# Prevent job-control signals from suspending the script during recovery.
|
||||
# In non-interactive shells (bash -c), background processes (spinner) can
|
||||
|
||||
@@ -286,7 +286,7 @@ error_handler() {
|
||||
echo -en "${YW}Remove broken container ${CTID}? (Y/n) [auto-remove in 60s]: ${CL}"
|
||||
fi
|
||||
|
||||
if read -t 60 -r response; then
|
||||
if read -t 60 -r response </dev/tty; then
|
||||
if [[ -z "$response" || "$response" =~ ^[Yy]$ ]]; then
|
||||
echo ""
|
||||
if declare -f msg_info >/dev/null 2>&1; then
|
||||
|
||||
Reference in New Issue
Block a user