mirror of
https://github.com/community-scripts/ProxmoxVE.git
synced 2026-03-02 17:05:55 +01:00
fix: improve error trace propagation for telemetry
- post_update_to_api: Attempts 2/3 now send medium_error (16KB truncated log) instead of short_error (generic description only). This is the primary fix — when attempt 1 fails (120KB payload too large/timeout), attempts 2/3 no longer discard all log data. - _send_abort_telemetry: Increased container fallback from 20 to 200 log lines (capped at 16KB). Added SILENT_LOGFILE as fallback source. Added exit code explanation header and error_category to payload. - get_error_text/get_full_log: Added SILENT_LOGFILE as last-resort fallback when INSTALL_LOG, combined log, and BUILD_LOG are all empty/missing.
This commit is contained in:
@@ -393,6 +393,11 @@ get_error_text() {
|
||||
logfile="$BUILD_LOG"
|
||||
fi
|
||||
|
||||
# Try SILENT_LOGFILE as last resort (captures $STD command output)
|
||||
if [[ -z "$logfile" || ! -s "$logfile" ]] && [[ -n "${SILENT_LOGFILE:-}" && -s "${SILENT_LOGFILE}" ]]; then
|
||||
logfile="$SILENT_LOGFILE"
|
||||
fi
|
||||
|
||||
if [[ -n "$logfile" && -s "$logfile" ]]; then
|
||||
tail -n 20 "$logfile" 2>/dev/null | sed 's/\r$//' | sed 's/\x1b\[[0-9;]*[a-zA-Z]//g'
|
||||
fi
|
||||
@@ -438,6 +443,13 @@ get_full_log() {
|
||||
fi
|
||||
fi
|
||||
|
||||
# Fall back to SILENT_LOGFILE (captures $STD command output)
|
||||
if [[ -z "$logfile" || ! -s "$logfile" ]]; then
|
||||
if [[ -n "${SILENT_LOGFILE:-}" && -s "${SILENT_LOGFILE}" ]]; then
|
||||
logfile="$SILENT_LOGFILE"
|
||||
fi
|
||||
fi
|
||||
|
||||
if [[ -n "$logfile" && -s "$logfile" ]]; then
|
||||
# Strip ANSI codes, carriage returns, and anonymize IP addresses (GDPR)
|
||||
sed 's/\r$//' "$logfile" 2>/dev/null |
|
||||
@@ -876,7 +888,7 @@ post_update_to_api() {
|
||||
esac
|
||||
|
||||
# For failed/unknown status, resolve exit code and error description
|
||||
local short_error=""
|
||||
local short_error="" medium_error=""
|
||||
if [[ "$pb_status" == "failed" ]] || [[ "$pb_status" == "unknown" ]]; then
|
||||
if [[ "$raw_exit_code" =~ ^[0-9]+$ ]]; then
|
||||
exit_code="$raw_exit_code"
|
||||
@@ -896,6 +908,18 @@ post_update_to_api() {
|
||||
short_error=$(json_escape "$(explain_exit_code "$exit_code")")
|
||||
error_category=$(categorize_error "$exit_code")
|
||||
[[ -z "$error" ]] && error="Unknown error"
|
||||
|
||||
# Build medium error for attempt 2: explanation + last 100 log lines (≤16KB)
|
||||
# This is the critical middle ground between full 120KB log and generic-only description
|
||||
local medium_log=""
|
||||
medium_log=$(get_full_log 16384) || true # 16KB max
|
||||
if [[ -z "$medium_log" ]]; then
|
||||
medium_log=$(get_error_text) || true
|
||||
fi
|
||||
local medium_full
|
||||
medium_full=$(build_error_string "$exit_code" "$medium_log")
|
||||
medium_error=$(json_escape "$medium_full")
|
||||
[[ -z "$medium_error" ]] && medium_error="$short_error"
|
||||
fi
|
||||
|
||||
# Calculate duration if timer was started
|
||||
@@ -954,7 +978,7 @@ EOF
|
||||
return 0
|
||||
fi
|
||||
|
||||
# ── Attempt 2: Short error text (no full log) ──
|
||||
# ── Attempt 2: Medium error text (truncated log ≤16KB instead of full 120KB) ──
|
||||
sleep 1
|
||||
local RETRY_PAYLOAD
|
||||
RETRY_PAYLOAD=$(
|
||||
@@ -974,7 +998,7 @@ EOF
|
||||
"pve_version": "${pve_version}",
|
||||
"method": "${METHOD:-default}",
|
||||
"exit_code": ${exit_code},
|
||||
"error": "${short_error}",
|
||||
"error": "${medium_error}",
|
||||
"error_category": "${error_category}",
|
||||
"install_duration": ${duration},
|
||||
"cpu_vendor": "${cpu_vendor}",
|
||||
@@ -997,7 +1021,7 @@ EOF
|
||||
return 0
|
||||
fi
|
||||
|
||||
# ── Attempt 3: Minimal payload (bare minimum to set status) ──
|
||||
# ── Attempt 3: Minimal payload with medium error (bare minimum to set status) ──
|
||||
sleep 2
|
||||
local MINIMAL_PAYLOAD
|
||||
MINIMAL_PAYLOAD=$(
|
||||
@@ -1009,7 +1033,7 @@ EOF
|
||||
"nsapp": "${NSAPP:-unknown}",
|
||||
"status": "${pb_status}",
|
||||
"exit_code": ${exit_code},
|
||||
"error": "${short_error}",
|
||||
"error": "${medium_error}",
|
||||
"error_category": "${error_category}",
|
||||
"install_duration": ${duration}
|
||||
}
|
||||
|
||||
@@ -408,10 +408,29 @@ _send_abort_telemetry() {
|
||||
[[ "${DIAGNOSTICS:-no}" == "no" ]] && return 0
|
||||
[[ -z "${RANDOM_UUID:-}" ]] && return 0
|
||||
|
||||
# Collect last 20 log lines for error diagnosis (best-effort)
|
||||
# Collect last 200 log lines for error diagnosis (best-effort)
|
||||
# Container context has no get_full_log(), so we gather as much as possible
|
||||
local error_text=""
|
||||
local logfile=""
|
||||
if [[ -n "${INSTALL_LOG:-}" && -s "${INSTALL_LOG}" ]]; then
|
||||
error_text=$(tail -n 20 "$INSTALL_LOG" 2>/dev/null | sed 's/\x1b\[[0-9;]*[a-zA-Z]//g; s/\\/\\\\/g; s/"/\\"/g; s/\r//g' | tr '\n' '|' | sed 's/|$//' | tr -d '\000-\010\013\014\016-\037\177') || true
|
||||
logfile="${INSTALL_LOG}"
|
||||
elif [[ -n "${SILENT_LOGFILE:-}" && -s "${SILENT_LOGFILE}" ]]; then
|
||||
logfile="${SILENT_LOGFILE}"
|
||||
fi
|
||||
|
||||
if [[ -n "$logfile" ]]; then
|
||||
error_text=$(tail -n 200 "$logfile" 2>/dev/null | sed 's/\x1b\[[0-9;]*[a-zA-Z]//g; s/\\/\\\\/g; s/"/\\"/g; s/\r//g' | tr '\n' '|' | sed 's/|$//' | head -c 16384 | tr -d '\000-\010\013\014\016-\037\177') || true
|
||||
fi
|
||||
|
||||
# Prepend exit code explanation header (like build_error_string does on host)
|
||||
local explanation=""
|
||||
if declare -f explain_exit_code &>/dev/null; then
|
||||
explanation=$(explain_exit_code "$exit_code" 2>/dev/null) || true
|
||||
fi
|
||||
if [[ -n "$explanation" && -n "$error_text" ]]; then
|
||||
error_text="exit_code=${exit_code} | ${explanation}|---|${error_text}"
|
||||
elif [[ -n "$explanation" && -z "$error_text" ]]; then
|
||||
error_text="exit_code=${exit_code} | ${explanation}"
|
||||
fi
|
||||
|
||||
# Calculate duration if start time is available
|
||||
@@ -420,10 +439,17 @@ _send_abort_telemetry() {
|
||||
duration=$(($(date +%s) - DIAGNOSTICS_START_TIME))
|
||||
fi
|
||||
|
||||
# Categorize error if function is available (may not be in minimal container context)
|
||||
local error_category=""
|
||||
if declare -f categorize_error &>/dev/null; then
|
||||
error_category=$(categorize_error "$exit_code" 2>/dev/null) || true
|
||||
fi
|
||||
|
||||
# Build JSON payload with error context
|
||||
local payload
|
||||
payload="{\"random_id\":\"${RANDOM_UUID}\",\"execution_id\":\"${EXECUTION_ID:-${RANDOM_UUID}}\",\"type\":\"${TELEMETRY_TYPE:-lxc}\",\"nsapp\":\"${NSAPP:-${app:-unknown}}\",\"status\":\"failed\",\"exit_code\":${exit_code}"
|
||||
[[ -n "$error_text" ]] && payload="${payload},\"error\":\"${error_text}\""
|
||||
[[ -n "$error_category" ]] && payload="${payload},\"error_category\":\"${error_category}\""
|
||||
[[ -n "$duration" ]] && payload="${payload},\"duration\":${duration}"
|
||||
payload="${payload}}"
|
||||
|
||||
|
||||
Reference in New Issue
Block a user