Compare commits

..

5 Commits

Author SHA1 Message Date
CanbiZ (MickLesk)
b6b9563300 fix(api): handle missing RAM speed in nested VMs
- Add || true to dmidecode pipelines to prevent error when speed is 'Unknown'
- Validate RAM_SPEED is a valid integer, fallback to 0
- Send ram_speed as integer (not string) in all JSON payloads for PocketBase

Fixes: dmidecode in nested VMs returns 'Configured Memory Speed: Unknown'
which causes grep to fail and triggers catch_errors handler.
2026-02-14 16:07:39 +01:00
community-scripts-pr-app[bot]
ed9a6d9d4b Update CHANGELOG.md (#11911)
Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
2026-02-14 14:29:19 +00:00
community-scripts-pr-app[bot]
c6005af29d Update CHANGELOG.md (#11910)
Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
2026-02-14 14:28:56 +00:00
CanbiZ (MickLesk)
911f533e6a fix(cluster): validate container IDs cluster-wide across all nodes (#11906)
- Query /cluster/resources via pvesh to check all VMs/CTs on ALL nodes
- Check /etc/pve/nodes/*/qemu-server and /etc/pve/nodes/*/lxc dirs
- Handles pmxcfs sync delays that caused sporadic ID conflicts
- Remove duplicate validate_container_id/get_valid_container_id definitions
- Add max_attempts safeguard to prevent infinite loops
2026-02-14 15:28:47 +01:00
CanbiZ (MickLesk)
cecadf5681 core: improve error reporting with structured error strings and better categorization + output formatting (#11907)
* fix(telemetry): improve error reporting with structured error strings and better categorization

- Add build_error_string() that creates structured format:
  'exit_code=N | description\n---\n<last 20 log lines>'
- Fix categorize_error() to map ALL known exit codes:
  - Added: shell(1,2), proxmox(200-231), service(150-154),
    database(170-193), runtime(243-249), signal(139,141,143)
  - Split timeout from network (28 was in both)
  - Added DPKG(255) to dependency category
- Update all API functions to use build_error_string():
  post_update_to_api, post_update_to_api_extended,
  post_tool_to_api, post_addon_to_api
- Add ensure_log_on_host() calls to on_exit, on_interrupt,
  on_terminate handlers to prevent race condition where
  telemetry reports before container log is pulled to host

* fix(ui): improve error output formatting and remove redundant log paths

- error_handler: Use msg_info/msg_ok/msg_warn for container cleanup
  instead of raw echo with manual ANSI codes
- error_handler: Add  icon before 'Remove broken container?' prompt
- error_handler: Indent log output with TAB for visual consistency
- build.func: Use msg_custom for installation log path display
- build.func: Use msg_info → msg_ok for container removal flow
- build.func: Use msg_warn for 'kept for debugging' message
- core.func/vm-core.func: Remove redundant container-internal log
  path display (📋 View full log) since combined log on host is
  the canonical location shown after failure
2026-02-14 15:28:30 +01:00
3 changed files with 59 additions and 60 deletions

View File

@@ -407,6 +407,8 @@ Exercise vigilance regarding copycat or coat-tailing sites that seek to exploit
- #### ✨ New Features
- core: validate container IDs cluster-wide across all nodes [@MickLesk](https://github.com/MickLesk) ([#11906](https://github.com/community-scripts/ProxmoxVE/pull/11906))
- core: improve error reporting with structured error strings and better categorization + output formatting [@MickLesk](https://github.com/MickLesk) ([#11907](https://github.com/community-scripts/ProxmoxVE/pull/11907))
- core: unified logging system with combined logs [@MickLesk](https://github.com/MickLesk) ([#11761](https://github.com/community-scripts/ProxmoxVE/pull/11761))
### ❔ Uncategorized

View File

@@ -402,14 +402,19 @@ detect_ram() {
if command -v dmidecode &>/dev/null; then
# Get configured memory speed (actual running speed)
RAM_SPEED=$(dmidecode -t memory 2>/dev/null | grep -m1 "Configured Memory Speed:" | grep -oE "[0-9]+" | head -1)
RAM_SPEED=$(dmidecode -t memory 2>/dev/null | grep -m1 "Configured Memory Speed:" | grep -oE "[0-9]+" | head -1 || true)
# Fallback to Speed: if Configured not available
if [[ -z "$RAM_SPEED" ]]; then
RAM_SPEED=$(dmidecode -t memory 2>/dev/null | grep -m1 "Speed:" | grep -oE "[0-9]+" | head -1)
RAM_SPEED=$(dmidecode -t memory 2>/dev/null | grep -m1 "Speed:" | grep -oE "[0-9]+" | head -1 || true)
fi
fi
# Ensure RAM_SPEED is a valid integer (PocketBase stores it as integer)
if [[ -z "$RAM_SPEED" || ! "$RAM_SPEED" =~ ^[0-9]+$ ]]; then
RAM_SPEED=0
fi
export RAM_SPEED
}
@@ -504,7 +509,7 @@ post_to_api() {
"gpu_vendor": "${gpu_vendor}",
"gpu_model": "${gpu_model}",
"gpu_passthrough": "${gpu_passthrough}",
"ram_speed": "${ram_speed}",
"ram_speed": ${ram_speed:-0},
"repo_source": "${REPO_SOURCE}"
}
EOF
@@ -608,7 +613,7 @@ post_to_api_vm() {
"gpu_vendor": "${gpu_vendor}",
"gpu_model": "${gpu_model}",
"gpu_passthrough": "${gpu_passthrough}",
"ram_speed": "${ram_speed}",
"ram_speed": ${ram_speed:-0},
"repo_source": "${REPO_SOURCE}"
}
EOF
@@ -742,7 +747,7 @@ post_update_to_api() {
"gpu_vendor": "${gpu_vendor}",
"gpu_model": "${gpu_model}",
"gpu_passthrough": "${gpu_passthrough}",
"ram_speed": "${ram_speed}",
"ram_speed": ${ram_speed:-0},
"repo_source": "${REPO_SOURCE}"
}
EOF
@@ -784,7 +789,7 @@ EOF
"gpu_vendor": "${gpu_vendor}",
"gpu_model": "${gpu_model}",
"gpu_passthrough": "${gpu_passthrough}",
"ram_speed": "${ram_speed}",
"ram_speed": ${ram_speed:-0},
"repo_source": "${REPO_SOURCE}"
}
EOF
@@ -881,7 +886,7 @@ categorize_error() {
# Signal/Process errors (SIGTERM, SIGPIPE, SIGSEGV)
139 | 141 | 143) echo "signal" ;;
# Shell errors (general error, syntax error)
# Shell errors (general error, syntax error)
1 | 2) echo "shell" ;;
# Default - truly unknown

View File

@@ -277,8 +277,9 @@ install_ssh_keys_into_ct() {
# ------------------------------------------------------------------------------
# validate_container_id()
#
# - Validates if a container ID is available for use
# - Checks if ID is already used by VM or LXC container
# - Validates if a container ID is available for use (CLUSTER-WIDE)
# - Checks cluster resources via pvesh for VMs/CTs on ALL nodes
# - Falls back to local config file check if pvesh unavailable
# - Checks if ID is used in LVM logical volumes
# - Returns 0 if ID is available, 1 if already in use
# ------------------------------------------------------------------------------
@@ -290,11 +291,35 @@ validate_container_id() {
return 1
fi
# Check if config file exists for VM or LXC
# CLUSTER-WIDE CHECK: Query all VMs/CTs across all nodes
# This catches IDs used on other nodes in the cluster
# NOTE: Works on single-node too - Proxmox always has internal cluster structure
# Falls back gracefully if pvesh unavailable or returns empty
if command -v pvesh &>/dev/null; then
local cluster_ids
cluster_ids=$(pvesh get /cluster/resources --type vm --output-format json 2>/dev/null |
grep -oP '"vmid":\s*\K[0-9]+' 2>/dev/null || true)
if [[ -n "$cluster_ids" ]] && echo "$cluster_ids" | grep -qw "$ctid"; then
return 1
fi
fi
# LOCAL FALLBACK: Check if config file exists for VM or LXC
# This handles edge cases where pvesh might not return all info
if [[ -f "/etc/pve/qemu-server/${ctid}.conf" ]] || [[ -f "/etc/pve/lxc/${ctid}.conf" ]]; then
return 1
fi
# Check ALL nodes in cluster for config files (handles pmxcfs sync delays)
# NOTE: On single-node, /etc/pve/nodes/ contains just the one node - still works
if [[ -d "/etc/pve/nodes" ]]; then
for node_dir in /etc/pve/nodes/*/; do
if [[ -f "${node_dir}qemu-server/${ctid}.conf" ]] || [[ -f "${node_dir}lxc/${ctid}.conf" ]]; then
return 1
fi
done
fi
# Check if ID is used in LVM logical volumes
if lvs --noheadings -o lv_name 2>/dev/null | grep -qE "(^|[-_])${ctid}($|[-_])"; then
return 1
@@ -306,63 +331,30 @@ validate_container_id() {
# ------------------------------------------------------------------------------
# get_valid_container_id()
#
# - Returns a valid, unused container ID
# - Returns a valid, unused container ID (CLUSTER-AWARE)
# - Uses pvesh /cluster/nextid as starting point (already cluster-aware)
# - If provided ID is valid, returns it
# - Otherwise increments from suggested ID until a free one is found
# - Otherwise increments until a free one is found across entire cluster
# - Calls validate_container_id() to check availability
# ------------------------------------------------------------------------------
get_valid_container_id() {
local suggested_id="${1:-$(pvesh get /cluster/nextid)}"
while ! validate_container_id "$suggested_id"; do
suggested_id=$((suggested_id + 1))
done
echo "$suggested_id"
}
# ------------------------------------------------------------------------------
# validate_container_id()
#
# - Validates if a container ID is available for use
# - Checks if ID is already used by VM or LXC container
# - Checks if ID is used in LVM logical volumes
# - Returns 0 if ID is available, 1 if already in use
# ------------------------------------------------------------------------------
validate_container_id() {
local ctid="$1"
# Check if ID is numeric
if ! [[ "$ctid" =~ ^[0-9]+$ ]]; then
return 1
fi
# Check if config file exists for VM or LXC
if [[ -f "/etc/pve/qemu-server/${ctid}.conf" ]] || [[ -f "/etc/pve/lxc/${ctid}.conf" ]]; then
return 1
fi
# Check if ID is used in LVM logical volumes
if lvs --noheadings -o lv_name 2>/dev/null | grep -qE "(^|[-_])${ctid}($|[-_])"; then
return 1
fi
return 0
}
# ------------------------------------------------------------------------------
# get_valid_container_id()
#
# - Returns a valid, unused container ID
# - If provided ID is valid, returns it
# - Otherwise increments from suggested ID until a free one is found
# - Calls validate_container_id() to check availability
# ------------------------------------------------------------------------------
get_valid_container_id() {
local suggested_id="${1:-$(pvesh get /cluster/nextid)}"
local suggested_id="${1:-$(pvesh get /cluster/nextid 2>/dev/null || echo 100)}"
# Ensure we have a valid starting ID
if ! [[ "$suggested_id" =~ ^[0-9]+$ ]]; then
suggested_id=$(pvesh get /cluster/nextid 2>/dev/null || echo 100)
fi
local max_attempts=1000
local attempts=0
while ! validate_container_id "$suggested_id"; do
suggested_id=$((suggested_id + 1))
attempts=$((attempts + 1))
if [[ $attempts -ge $max_attempts ]]; then
msg_error "Could not find available container ID after $max_attempts attempts"
exit 1
fi
done
echo "$suggested_id"