fix(tools): improve error diagnostics and actionable hints across install functions

- Add _diagnose_deb_failure() helper: extracts package metadata from failed .deb installs,
  detects PostgreSQL version conflicts (e.g., postgresql-16-vchord with PG17 active),
  lists unmet dependencies, and provides specific actionable hints
- Replace all 4 generic 'Both apt and dpkg installation failed' messages in
  fetch_and_deploy_{codeberg,gh,gl}_release and fetch_and_deploy_from_url with
  _diagnose_deb_failure() for targeted diagnostics
- install_packages_with_retry: on failure, check which packages are missing from
  configured repos and name them with a distribution-specific hint
- upgrade_packages_with_retry: add hint about held-back packages / apt-cache policy
- setup_postgresql: when PGDG repo is unavailable for trixie/forky/sid, show which
  distro PG version will be installed and warn that extension packages must match
- setup_deb822_repo: include GPG key URL and firewall hint in download failure message
- curl_download: add network/DNS hint to the failure message
- error_handler: add log-pattern analysis block after Node.js OOM detection that
  scans the last 60 log lines for 5 common failure patterns and emits msg_warn hints:
    * APT/dpkg dependency conflict (generic + PostgreSQL version mismatch)
    * APT GPG/signature verification failure (sqv, KEYEXPIRED, NO_PUBKEY)
    * Network/DNS failure (Could not resolve, Failed to fetch)
    * APT lock held by another process
    * Disk space exhaustion (ENOSPC)
This commit is contained in:
MickLesk
2026-05-24 21:03:14 +02:00
parent 947d032f96
commit 85aa701e31
2 changed files with 139 additions and 6 deletions

View File

@@ -358,6 +358,55 @@ error_handler() {
fi
fi
# ── Log-pattern analysis: detect common failure causes and emit actionable hints ──
if [[ -n "$active_log" && -s "$active_log" ]]; then
local _log_tail
_log_tail=$(tail -n 60 "$active_log" 2>/dev/null || true)
# 1. APT/dpkg dependency conflict
if echo "$_log_tail" | grep -qE "Depends:|depends on.*but.*not installed|broken packages|unmet dep|dependency problems"; then
# Check for PostgreSQL-specific version mismatch (most actionable)
local _pg_conflict
_pg_conflict=$(echo "$_log_tail" | grep -oE 'postgresql-[0-9]+ but.*installed' | head -1 || true)
if [[ -n "$_pg_conflict" ]]; then
if declare -f msg_warn >/dev/null 2>&1; then
msg_warn "PostgreSQL version conflict: ${_pg_conflict}"
msg_warn "Hint: A package requires a specific PostgreSQL version that is not installed. Your distribution may have installed a different PG version than expected."
fi
else
if declare -f msg_warn >/dev/null 2>&1; then
msg_warn "APT dependency conflict detected. A required package is not available or is the wrong version for this system."
msg_warn "Hint: Run 'apt-get install -f' inside the container or check that all required repositories are configured for your distribution."
fi
fi
# 2. APT/GPG signature verification failure
elif echo "$_log_tail" | grep -qE "sqv|KEYEXPIRED|NO_PUBKEY|key is not certified|signature verification failed|is not signed|Sub-process.*sqv"; then
if declare -f msg_warn >/dev/null 2>&1; then
msg_warn "APT repository signature error detected."
msg_warn "Hint: A repository GPG key may be missing, expired, or the keyring file is not yet present (/usr/share/postgresql-common/pgdg/apt.postgresql.org.asc etc.)."
msg_warn "Hint: Install the 'postgresql-common' package first, or re-add the repository with its correct signing key."
fi
# 3. Network / DNS failure during apt-get or curl
elif echo "$_log_tail" | grep -qE "Could not resolve|Failed to fetch|Unable to connect|Name or service not known|Network is unreachable|curl.*resolve"; then
if declare -f msg_warn >/dev/null 2>&1; then
msg_warn "Network or DNS failure detected."
msg_warn "Hint: Check the container's network connectivity, DNS settings, and whether any firewall or ad-blocker is intercepting traffic."
fi
# 4. APT lock held by another process
elif echo "$_log_tail" | grep -qE "Could not get lock|dpkg frontend lock|waiting for it to exit|E: Unable to lock"; then
if declare -f msg_warn >/dev/null 2>&1; then
msg_warn "APT or dpkg lock conflict detected."
msg_warn "Hint: Another package manager process may be running. Try 'rm /var/lib/dpkg/lock-frontend && dpkg --configure -a' inside the container."
fi
# 5. Disk space exhaustion
elif echo "$_log_tail" | grep -qE "No space left on device|disk quota exceeded|ENOSPC"; then
if declare -f msg_warn >/dev/null 2>&1; then
msg_warn "Disk space exhausted during installation."
msg_warn "Hint: Increase the container's disk size (pct resize <ctid> rootfs +2G) or clean up space first."
fi
fi
fi
# Detect context: Container (INSTALL_LOG set + inside container /root) vs Host
if [[ -n "${INSTALL_LOG:-}" && -f "${INSTALL_LOG:-}" && -d /root ]]; then
# CONTAINER CONTEXT: Copy log and create flag file for host