From cecadf5681c0198b8fb62e765e91ecdc8646ded5 Mon Sep 17 00:00:00 2001 From: "CanbiZ (MickLesk)" <47820557+MickLesk@users.noreply.github.com> Date: Sat, 14 Feb 2026 15:28:30 +0100 Subject: [PATCH] core: improve error reporting with structured error strings and better categorization + output formatting (#11907) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix(telemetry): improve error reporting with structured error strings and better categorization - Add build_error_string() that creates structured format: 'exit_code=N | description\n---\n' - Fix categorize_error() to map ALL known exit codes: - Added: shell(1,2), proxmox(200-231), service(150-154), database(170-193), runtime(243-249), signal(139,141,143) - Split timeout from network (28 was in both) - Added DPKG(255) to dependency category - Update all API functions to use build_error_string(): post_update_to_api, post_update_to_api_extended, post_tool_to_api, post_addon_to_api - Add ensure_log_on_host() calls to on_exit, on_interrupt, on_terminate handlers to prevent race condition where telemetry reports before container log is pulled to host * fix(ui): improve error output formatting and remove redundant log paths - error_handler: Use msg_info/msg_ok/msg_warn for container cleanup instead of raw echo with manual ANSI codes - error_handler: Add ❓ icon before 'Remove broken container?' prompt - error_handler: Indent log output with TAB for visual consistency - build.func: Use msg_custom for installation log path display - build.func: Use msg_info → msg_ok for container removal flow - build.func: Use msg_warn for 'kept for debugging' message - core.func/vm-core.func: Remove redundant container-internal log path display (📋 View full log) since combined log on host is the canonical location shown after failure --- misc/api.func | 106 +++++++++++++++++++++++++++------------- misc/build.func | 21 ++++---- misc/core.func | 10 +--- misc/error_handler.func | 55 ++++++++++++++++++--- misc/vm-core.func | 10 +--- 5 files changed, 135 insertions(+), 67 deletions(-) diff --git a/misc/api.func b/misc/api.func index 4a554ecfc..f8c3aa67e 100644 --- a/misc/api.func +++ b/misc/api.func @@ -287,6 +287,32 @@ get_error_text() { fi } +# ------------------------------------------------------------------------------ +# build_error_string() +# +# - Builds a structured error string for telemetry reporting +# - Format: "exit_code= | \n---\n" +# - If no log lines available, returns just the explanation +# - Arguments: +# * $1: exit_code (numeric) +# * $2: log_text (optional, output from get_error_text) +# - Returns structured error string via stdout +# ------------------------------------------------------------------------------ +build_error_string() { + local exit_code="${1:-1}" + local log_text="${2:-}" + local explanation + explanation=$(explain_exit_code "$exit_code") + + if [[ -n "$log_text" ]]; then + # Structured format: header + separator + log lines + printf 'exit_code=%s | %s\n---\n%s' "$exit_code" "$explanation" "$log_text" + else + # No log available - just the explanation with exit code + printf 'exit_code=%s | %s' "$exit_code" "$explanation" + fi +} + # ============================================================================== # SECTION 2: TELEMETRY FUNCTIONS # ============================================================================== @@ -665,13 +691,12 @@ post_update_to_api() { else exit_code=1 fi + # Get log lines and build structured error string local error_text="" error_text=$(get_error_text) - if [[ -n "$error_text" ]]; then - error=$(json_escape "$error_text") - else - error=$(json_escape "$(explain_exit_code "$exit_code")") - fi + local full_error + full_error=$(build_error_string "$exit_code" "$error_text") + error=$(json_escape "$full_error") short_error=$(json_escape "$(explain_exit_code "$exit_code")") error_category=$(categorize_error "$exit_code") [[ -z "$error" ]] && error="Unknown error" @@ -814,31 +839,52 @@ EOF categorize_error() { local code="$1" case "$code" in - # Network errors - 6 | 7 | 22 | 28 | 35) echo "network" ;; + # Network errors (curl/wget) + 6 | 7 | 22 | 35) echo "network" ;; - # Storage errors - 214 | 217 | 219) echo "storage" ;; + # Timeout errors + 28 | 124 | 211) echo "timeout" ;; - # Dependency/Package errors - 100 | 101 | 102 | 127 | 160 | 161 | 162) echo "dependency" ;; + # Storage errors (Proxmox storage) + 214 | 217 | 219 | 224) echo "storage" ;; + + # Dependency/Package errors (APT, DPKG, pip, commands) + 100 | 101 | 102 | 127 | 160 | 161 | 162 | 255) echo "dependency" ;; # Permission errors 126 | 152) echo "permission" ;; - # Timeout errors - 124 | 28 | 211) echo "timeout" ;; + # Configuration errors (Proxmox config, invalid args) + 128 | 203 | 204 | 205 | 206 | 207 | 208) echo "config" ;; - # Configuration errors - 203 | 204 | 205 | 206 | 207 | 208) echo "config" ;; + # Proxmox container/template errors + 200 | 209 | 210 | 212 | 213 | 215 | 216 | 218 | 220 | 221 | 222 | 223 | 225 | 231) echo "proxmox" ;; + + # Service/Systemd errors + 150 | 151 | 153 | 154) echo "service" ;; + + # Database errors (PostgreSQL, MySQL, MongoDB) + 170 | 171 | 172 | 173 | 180 | 181 | 182 | 183 | 190 | 191 | 192 | 193) echo "database" ;; + + # Node.js / JavaScript runtime errors + 243 | 245 | 246 | 247 | 248 | 249) echo "runtime" ;; + + # Python environment errors + # (already covered: 160-162 under dependency) # Aborted by user 130) echo "aborted" ;; - # Resource errors (OOM, etc) - 137 | 134) echo "resource" ;; + # Resource errors (OOM, SIGKILL, SIGABRT) + 134 | 137) echo "resource" ;; - # Default + # Signal/Process errors (SIGTERM, SIGPIPE, SIGSEGV) + 139 | 141 | 143) echo "signal" ;; + + # Shell errors (general error, syntax error) + 1 | 2) echo "shell" ;; + + # Default - truly unknown *) echo "unknown" ;; esac } @@ -901,11 +947,9 @@ post_tool_to_api() { [[ ! "$exit_code" =~ ^[0-9]+$ ]] && exit_code=1 local error_text="" error_text=$(get_error_text) - if [[ -n "$error_text" ]]; then - error=$(json_escape "$error_text") - else - error=$(json_escape "$(explain_exit_code "$exit_code")") - fi + local full_error + full_error=$(build_error_string "$exit_code" "$error_text") + error=$(json_escape "$full_error") error_category=$(categorize_error "$exit_code") fi @@ -968,11 +1012,9 @@ post_addon_to_api() { [[ ! "$exit_code" =~ ^[0-9]+$ ]] && exit_code=1 local error_text="" error_text=$(get_error_text) - if [[ -n "$error_text" ]]; then - error=$(json_escape "$error_text") - else - error=$(json_escape "$(explain_exit_code "$exit_code")") - fi + local full_error + full_error=$(build_error_string "$exit_code" "$error_text") + error=$(json_escape "$full_error") error_category=$(categorize_error "$exit_code") fi @@ -1067,11 +1109,9 @@ post_update_to_api_extended() { fi local error_text="" error_text=$(get_error_text) - if [[ -n "$error_text" ]]; then - error=$(json_escape "$error_text") - else - error=$(json_escape "$(explain_exit_code "$exit_code")") - fi + local full_error + full_error=$(build_error_string "$exit_code" "$error_text") + error=$(json_escape "$full_error") error_category=$(categorize_error "$exit_code") [[ -z "$error" ]] && error="Unknown error" fi diff --git a/misc/build.func b/misc/build.func index dcf8fb42c..e3f77b722 100644 --- a/misc/build.func +++ b/misc/build.func @@ -4133,8 +4133,7 @@ EOF' # Show combined log location if [[ -n "$CTID" && -n "${SESSION_ID:-}" ]]; then - echo "" - echo -e "${GN}✔${CL} Installation log: ${BL}${combined_log}${CL}" + msg_custom "📋" "${YW}" "Installation log: ${combined_log}" fi # Dev mode: Keep container or open breakpoint shell @@ -4157,19 +4156,21 @@ EOF' exit $install_exit_code fi - # Prompt user for cleanup with 60s timeout (plain echo - no msg_info to avoid spinner) + # Prompt user for cleanup with 60s timeout echo "" - echo -en "${YW}Remove broken container ${CTID}? (Y/n) [auto-remove in 60s]: ${CL}" + echo -en "${TAB}❓${TAB}${YW}Remove broken container ${CTID}? (Y/n) [auto-remove in 60s]: ${CL}" if read -t 60 -r response; then if [[ -z "$response" || "$response" =~ ^[Yy]$ ]]; then # Remove container - echo -e "\n${TAB}${HOLD}${YW}Removing container ${CTID}${CL}" + echo "" + msg_info "Removing container ${CTID}" pct stop "$CTID" &>/dev/null || true pct destroy "$CTID" &>/dev/null || true - echo -e "${BFR}${CM}${GN}Container ${CTID} removed${CL}" + msg_ok "Container ${CTID} removed" elif [[ "$response" =~ ^[Nn]$ ]]; then - echo -e "\n${TAB}${YW}Container ${CTID} kept for debugging${CL}" + echo "" + msg_warn "Container ${CTID} kept for debugging" # Dev mode: Setup MOTD/SSH for debugging access to broken container if [[ "${DEV_MODE_MOTD:-false}" == "true" ]]; then @@ -4185,11 +4186,11 @@ EOF' fi else # Timeout - auto-remove - echo -e "\n${YW}No response - auto-removing container${CL}" - echo -e "${TAB}${HOLD}${YW}Removing container ${CTID}${CL}" + echo "" + msg_info "No response - removing container ${CTID}" pct stop "$CTID" &>/dev/null || true pct destroy "$CTID" &>/dev/null || true - echo -e "${BFR}${CM}${GN}Container ${CTID} removed${CL}" + msg_ok "Container ${CTID} removed" fi # Force one final status update attempt after cleanup diff --git a/misc/core.func b/misc/core.func index f13be64d3..838ebdd85 100644 --- a/misc/core.func +++ b/misc/core.func @@ -522,15 +522,9 @@ silent() { msg_custom "→" "${YWB}" "${cmd}" if [[ -s "$logfile" ]]; then - local log_lines=$(wc -l <"$logfile") - echo "--- Last 10 lines of silent log ---" + echo -e "\n${TAB}--- Last 10 lines of log ---" tail -n 10 "$logfile" - echo "-----------------------------------" - - # Show how to view full log if there are more lines - if [[ $log_lines -gt 10 ]]; then - msg_custom "📋" "${YW}" "View full log (${log_lines} lines): ${logfile}" - fi + echo -e "${TAB}-----------------------------------\n" fi exit "$rc" diff --git a/misc/error_handler.func b/misc/error_handler.func index afedc696f..2f0fc2438 100644 --- a/misc/error_handler.func +++ b/misc/error_handler.func @@ -175,9 +175,9 @@ error_handler() { fi if [[ -n "$active_log" && -s "$active_log" ]]; then - echo "--- Last 20 lines of silent log ---" + echo -e "\n${TAB}--- Last 20 lines of log ---" tail -n 20 "$active_log" - echo "-----------------------------------" + echo -e "${TAB}-----------------------------------\n" # Detect context: Container (INSTALL_LOG set + /root exists) vs Host (BUILD_LOG) if [[ -n "${INSTALL_LOG:-}" && -d /root ]]; then @@ -204,23 +204,50 @@ error_handler() { fi echo "" - echo -en "${YW}Remove broken container ${CTID}? (Y/n) [auto-remove in 60s]: ${CL}" + if declare -f msg_custom >/dev/null 2>&1; then + echo -en "${TAB}❓${TAB}${YW}Remove broken container ${CTID}? (Y/n) [auto-remove in 60s]: ${CL}" + else + echo -en "${YW}Remove broken container ${CTID}? (Y/n) [auto-remove in 60s]: ${CL}" + fi if read -t 60 -r response; then if [[ -z "$response" || "$response" =~ ^[Yy]$ ]]; then - echo -e "\n${YW}Removing container ${CTID}${CL}" + echo "" + if declare -f msg_info >/dev/null 2>&1; then + msg_info "Removing container ${CTID}" + else + echo -e "${YW}Removing container ${CTID}${CL}" + fi pct stop "$CTID" &>/dev/null || true pct destroy "$CTID" &>/dev/null || true - echo -e "${GN}✔${CL} Container ${CTID} removed" + if declare -f msg_ok >/dev/null 2>&1; then + msg_ok "Container ${CTID} removed" + else + echo -e "${GN}✔${CL} Container ${CTID} removed" + fi elif [[ "$response" =~ ^[Nn]$ ]]; then - echo -e "\n${YW}Container ${CTID} kept for debugging${CL}" + echo "" + if declare -f msg_warn >/dev/null 2>&1; then + msg_warn "Container ${CTID} kept for debugging" + else + echo -e "${YW}Container ${CTID} kept for debugging${CL}" + fi fi else # Timeout - auto-remove - echo -e "\n${YW}No response - auto-removing container${CL}" + echo "" + if declare -f msg_info >/dev/null 2>&1; then + msg_info "No response - removing container ${CTID}" + else + echo -e "${YW}No response - removing container ${CTID}${CL}" + fi pct stop "$CTID" &>/dev/null || true pct destroy "$CTID" &>/dev/null || true - echo -e "${GN}✔${CL} Container ${CTID} removed" + if declare -f msg_ok >/dev/null 2>&1; then + msg_ok "Container ${CTID} removed" + else + echo -e "${GN}✔${CL} Container ${CTID} removed" + fi fi # Force one final status update attempt after cleanup @@ -254,6 +281,10 @@ on_exit() { # post_to_api was called ("installing" sent) but post_update_to_api was never called if [[ "${POST_TO_API_DONE:-}" == "true" && "${POST_UPDATE_DONE:-}" != "true" ]]; then if declare -f post_update_to_api >/dev/null 2>&1; then + # Ensure log is accessible on host before reporting + if declare -f ensure_log_on_host >/dev/null 2>&1; then + ensure_log_on_host + fi if [[ $exit_code -ne 0 ]]; then post_update_to_api "failed" "$exit_code" else @@ -273,6 +304,10 @@ on_exit() { # - Exits with code 130 (128 + SIGINT=2) # ------------------------------------------------------------------------------ on_interrupt() { + # Ensure log is accessible on host before reporting + if declare -f ensure_log_on_host >/dev/null 2>&1; then + ensure_log_on_host + fi # Report interruption to telemetry API (prevents stuck "installing" records) if declare -f post_update_to_api >/dev/null 2>&1; then post_update_to_api "failed" "130" @@ -294,6 +329,10 @@ on_interrupt() { # - Triggered by external process termination # ------------------------------------------------------------------------------ on_terminate() { + # Ensure log is accessible on host before reporting + if declare -f ensure_log_on_host >/dev/null 2>&1; then + ensure_log_on_host + fi # Report termination to telemetry API (prevents stuck "installing" records) if declare -f post_update_to_api >/dev/null 2>&1; then post_update_to_api "failed" "143" diff --git a/misc/vm-core.func b/misc/vm-core.func index 125b16125..557b99a46 100644 --- a/misc/vm-core.func +++ b/misc/vm-core.func @@ -207,15 +207,9 @@ silent() { msg_custom "→" "${YWB}" "${cmd}" if [[ -s "$logfile" ]]; then - local log_lines=$(wc -l <"$logfile") - echo "--- Last 10 lines of log ---" + echo -e "\n${TAB}--- Last 10 lines of log ---" tail -n 10 "$logfile" - echo "----------------------------" - - # Show how to view full log if there are more lines - if [[ $log_lines -gt 10 ]]; then - msg_custom "📋" "${YW}" "View full log (${log_lines} lines): ${logfile}" - fi + echo -e "${TAB}----------------------------\n" fi exit "$rc"