fix(telemetry): improve error reporting with structured error strings and better categorization

- Add build_error_string() that creates structured format:
  'exit_code=N | description\n---\n<last 20 log lines>'
- Fix categorize_error() to map ALL known exit codes:
  - Added: shell(1,2), proxmox(200-231), service(150-154),
    database(170-193), runtime(243-249), signal(139,141,143)
  - Split timeout from network (28 was in both)
  - Added DPKG(255) to dependency category
- Update all API functions to use build_error_string():
  post_update_to_api, post_update_to_api_extended,
  post_tool_to_api, post_addon_to_api
- Add ensure_log_on_host() calls to on_exit, on_interrupt,
  on_terminate handlers to prevent race condition where
  telemetry reports before container log is pulled to host
This commit is contained in:
CanbiZ (MickLesk)
2026-02-14 14:29:43 +01:00
parent f762155870
commit 58f8158c47
2 changed files with 85 additions and 33 deletions

View File

@@ -254,6 +254,10 @@ on_exit() {
# post_to_api was called ("installing" sent) but post_update_to_api was never called
if [[ "${POST_TO_API_DONE:-}" == "true" && "${POST_UPDATE_DONE:-}" != "true" ]]; then
if declare -f post_update_to_api >/dev/null 2>&1; then
# Ensure log is accessible on host before reporting
if declare -f ensure_log_on_host >/dev/null 2>&1; then
ensure_log_on_host
fi
if [[ $exit_code -ne 0 ]]; then
post_update_to_api "failed" "$exit_code"
else
@@ -273,6 +277,10 @@ on_exit() {
# - Exits with code 130 (128 + SIGINT=2)
# ------------------------------------------------------------------------------
on_interrupt() {
# Ensure log is accessible on host before reporting
if declare -f ensure_log_on_host >/dev/null 2>&1; then
ensure_log_on_host
fi
# Report interruption to telemetry API (prevents stuck "installing" records)
if declare -f post_update_to_api >/dev/null 2>&1; then
post_update_to_api "failed" "130"
@@ -294,6 +302,10 @@ on_interrupt() {
# - Triggered by external process termination
# ------------------------------------------------------------------------------
on_terminate() {
# Ensure log is accessible on host before reporting
if declare -f ensure_log_on_host >/dev/null 2>&1; then
ensure_log_on_host
fi
# Report termination to telemetry API (prevents stuck "installing" records)
if declare -f post_update_to_api >/dev/null 2>&1; then
post_update_to_api "failed" "143"