feat(vm): add smart recovery for VM creation failures (Phase 1-3)

Adds error classification, recovery menu, and retry mechanism for VM
creation failures in docker-vm.sh.

vm-core.func:
- vm_classify_error(): classifies errors into 7 categories
  (download, disk_import, virt_customize, vmid_conflict,
   storage_full, network, unknown)
- is_vm_download_error(), is_vm_disk_import_error(),
  is_vm_virt_customize_error(), is_vm_vmid_conflict(),
  is_vm_storage_full(), is_vm_network_error(): detection helpers
- vm_show_recovery_menu(): dynamic whiptail menu with options
  based on error category (retry, retry w/ settings, skip
  virt-customize, new VMID, keep VM, abort)
- vm_handle_recovery(): orchestrates classification, menu, and
  chosen action (cleanup + retry / keep / abort)
- vm_log_cmd(): stderr capture wrapper for VM_ERROR_LOG
- VM_MAX_RETRIES=2 (bounded recursion depth)

docker-vm.sh:
- Wrapped VM creation in create_vm() function for retry
- error_handler(): during VM_CREATION_PHASE, delegates to
  vm_handle_recovery; on retry, temporarily disables ERR trap,
  re-invokes create_vm recursively (bounded by VM_MAX_RETRIES)
- SKIP_VIRT_CUSTOMIZE flag: virt-customize failure recovery
  can skip it and use first-boot Docker install fallback
- Critical commands (qm create, disk import) now capture
  stderr to VM_ERROR_LOG for error classification
This commit is contained in:
CanbiZ (MickLesk)
2026-02-16 18:49:53 +01:00
parent 85c3977c73
commit 6b249d9533
2 changed files with 650 additions and 192 deletions

View File

@@ -624,3 +624,415 @@ EOF
qm set "$VMID" -description "$DESCRIPTION" >/dev/null
}
# ==============================================================================
# SECTION: VM SMART RECOVERY
# ==============================================================================
# Global error log for VM creation — captures stderr from critical commands
VM_ERROR_LOG="${VM_ERROR_LOG:-/tmp/vm-install-$$.log}"
VM_RECOVERY_ATTEMPT=${VM_RECOVERY_ATTEMPT:-0}
VM_MAX_RETRIES=${VM_MAX_RETRIES:-2}
# ------------------------------------------------------------------------------
# vm_log_cmd()
#
# - Wraps a command to capture stderr into VM_ERROR_LOG
# - Passes stdout through normally
# - Returns the original exit code
# Usage: vm_log_cmd qm importdisk "$VMID" "$IMAGE" "$STORAGE"
# ------------------------------------------------------------------------------
vm_log_cmd() {
"$@" 2>>"$VM_ERROR_LOG"
}
# ------------------------------------------------------------------------------
# is_vm_download_error()
#
# - Detects download failures based on exit code and error log
# - Checks curl exit codes (6, 7, 22, 28, 35, 52, 56) and HTTP error patterns
# - Returns 0 (true) if download error detected, 1 otherwise
# ------------------------------------------------------------------------------
is_vm_download_error() {
local exit_code="${1:-0}"
local log_file="${2:-$VM_ERROR_LOG}"
# curl-specific exit codes indicating download issues
case "$exit_code" in
6 | 7 | 22 | 28 | 35 | 52 | 56) return 0 ;;
esac
# Check log for download-related patterns
if [[ -s "$log_file" ]]; then
if grep -qiE "curl.*failed|download.*failed|HTTP.*[45][0-9]{2}|Could not resolve|Connection refused|Connection timed out|SSL.*error" "$log_file" 2>/dev/null; then
return 0
fi
fi
return 1
}
# ------------------------------------------------------------------------------
# is_vm_disk_import_error()
#
# - Detects disk import failures (qm importdisk / qm disk import)
# - Checks for storage allocation and format conversion errors
# - Returns 0 (true) if disk import error detected, 1 otherwise
# ------------------------------------------------------------------------------
is_vm_disk_import_error() {
local exit_code="${1:-0}"
local log_file="${2:-$VM_ERROR_LOG}"
if [[ -s "$log_file" ]]; then
if grep -qiE "importdisk.*failed|disk import.*error|storage.*allocation.*failed|qcow2.*error|raw.*error|pvesm.*alloc.*failed|unable to create|volume.*already exists" "$log_file" 2>/dev/null; then
return 0
fi
fi
return 1
}
# ------------------------------------------------------------------------------
# is_vm_virt_customize_error()
#
# - Detects virt-customize / libguestfs failures
# - Checks for guestfs, supermin, appliance boot errors
# - Returns 0 (true) if virt-customize error detected, 1 otherwise
# ------------------------------------------------------------------------------
is_vm_virt_customize_error() {
local exit_code="${1:-0}"
local log_file="${2:-$VM_ERROR_LOG}"
if [[ -s "$log_file" ]]; then
if grep -qiE "virt-customize|libguestfs|guestfs|supermin|appliance.*boot|virt-.*failed|launch.*failed" "$log_file" 2>/dev/null; then
return 0
fi
fi
return 1
}
# ------------------------------------------------------------------------------
# is_vm_vmid_conflict()
#
# - Detects VMID conflicts (VM already exists)
# - Returns 0 (true) if conflict detected, 1 otherwise
# ------------------------------------------------------------------------------
is_vm_vmid_conflict() {
local exit_code="${1:-0}"
local log_file="${2:-$VM_ERROR_LOG}"
if [[ -s "$log_file" ]]; then
if grep -qiE "already exists|VM $VMID already|unable to create VM|VMID.*in use" "$log_file" 2>/dev/null; then
return 0
fi
fi
return 1
}
# ------------------------------------------------------------------------------
# is_vm_storage_full()
#
# - Detects storage full / space exhaustion errors
# - Returns 0 (true) if storage space issue detected, 1 otherwise
# ------------------------------------------------------------------------------
is_vm_storage_full() {
local exit_code="${1:-0}"
local log_file="${2:-$VM_ERROR_LOG}"
if [[ -s "$log_file" ]]; then
if grep -qiE "not enough space|no space left|storage.*full|disk quota|ENOSPC|insufficient.*space|thin pool.*full" "$log_file" 2>/dev/null; then
return 0
fi
fi
return 1
}
# ------------------------------------------------------------------------------
# is_vm_network_error()
#
# - Detects general network/DNS errors beyond download failures
# - Returns 0 (true) if network issue detected, 1 otherwise
# ------------------------------------------------------------------------------
is_vm_network_error() {
local exit_code="${1:-0}"
local log_file="${2:-$VM_ERROR_LOG}"
# Network-related curl/wget exit codes
case "$exit_code" in
6 | 7 | 28 | 52 | 56) return 0 ;;
esac
if [[ -s "$log_file" ]]; then
if grep -qiE "Name or service not known|Temporary failure in name resolution|Network is unreachable|No route to host|DNS.*failed|could not resolve" "$log_file" 2>/dev/null; then
return 0
fi
fi
return 1
}
# ------------------------------------------------------------------------------
# vm_classify_error()
#
# - Classifies a VM creation error into a category
# - Order matters: most specific checks first
# - Returns category string via stdout
# - Categories: vmid_conflict, storage_full, download, disk_import,
# virt_customize, network, unknown
# ------------------------------------------------------------------------------
vm_classify_error() {
local exit_code="${1:-0}"
local log_file="${2:-$VM_ERROR_LOG}"
if is_vm_vmid_conflict "$exit_code" "$log_file"; then
echo "vmid_conflict"
elif is_vm_storage_full "$exit_code" "$log_file"; then
echo "storage_full"
elif is_vm_download_error "$exit_code" "$log_file"; then
echo "download"
elif is_vm_disk_import_error "$exit_code" "$log_file"; then
echo "disk_import"
elif is_vm_virt_customize_error "$exit_code" "$log_file"; then
echo "virt_customize"
elif is_vm_network_error "$exit_code" "$log_file"; then
echo "network"
else
echo "unknown"
fi
}
# ------------------------------------------------------------------------------
# vm_show_recovery_menu()
#
# - Displays a whiptail menu with recovery options after a VM creation failure
# - Options are dynamically built based on error category
# - Returns the selected option via stdout
# - Arguments:
# $1: exit_code
# $2: error_category (from vm_classify_error)
# $3: current attempt number
# ------------------------------------------------------------------------------
vm_show_recovery_menu() {
local exit_code="${1:-1}"
local error_category="${2:-unknown}"
local attempt="${3:-1}"
local menu_items=()
local menu_height=12
local item_count=0
# --- Dynamic options based on error category ---
# Retry (always available unless max retries reached)
if ((attempt < VM_MAX_RETRIES)); then
case "$error_category" in
download)
menu_items+=("RETRY_DOWNLOAD" "🔄 Retry download (clear cache & re-download)" "ON")
((item_count++))
;;
disk_import)
menu_items+=("RETRY" "🔄 Retry VM creation" "ON")
((item_count++))
;;
virt_customize)
menu_items+=("RETRY" "🔄 Retry VM creation" "ON")
((item_count++))
menu_items+=("SKIP_CUSTOMIZE" "⏭️ Skip virt-customize (use first-boot fallback)" "OFF")
((item_count++))
;;
network)
menu_items+=("RETRY" "🔄 Retry VM creation" "ON")
((item_count++))
;;
vmid_conflict)
menu_items+=("NEW_VMID" "🆔 Choose a different VM ID" "ON")
((item_count++))
;;
storage_full)
menu_items+=("RETRY_SETTINGS" "⚙️ Retry with different settings (storage/disk)" "ON")
((item_count++))
;;
*)
menu_items+=("RETRY" "🔄 Retry VM creation" "ON")
((item_count++))
;;
esac
# Retry with different resources (always offered)
menu_items+=("RETRY_SETTINGS" "⚙️ Retry with different settings (RAM/CPU/Disk)" "OFF")
((item_count++))
fi
# Keep VM for debugging (always available)
menu_items+=("KEEP" "🔍 Keep partial VM for manual debugging" "OFF")
((item_count++))
# Abort (always available)
menu_items+=("ABORT" "❌ Destroy VM and exit" "OFF")
((item_count++))
menu_height=$((item_count + 10))
# Error info for title
local title="VM CREATION FAILED"
local body="Exit code: ${exit_code} | Category: ${error_category}\nAttempt: ${attempt}/${VM_MAX_RETRIES}\n\nChoose a recovery action:"
if ((attempt >= VM_MAX_RETRIES)); then
body="Exit code: ${exit_code} | Category: ${error_category}\n⚠ Maximum retries (${VM_MAX_RETRIES}) reached.\n\nChoose an action:"
fi
local choice
choice=$(whiptail --backtitle "Proxmox VE Helper Scripts" --title "$title" \
--radiolist "$body" "$menu_height" 72 "$item_count" \
"${menu_items[@]}" 3>&1 1>&2 2>&3) || choice="ABORT"
echo "$choice"
}
# ------------------------------------------------------------------------------
# vm_handle_recovery()
#
# - Main recovery handler called from error_handler or a wrapper
# - Classifies the error, shows recovery menu, and executes the chosen action
# - Arguments:
# $1: exit_code
# $2: line_number
# $3: failed_command
# $4: cleanup_fn — function to call for VM cleanup (default: cleanup_vmid)
# $5: retry_fn — function to re-invoke for full retry (required for retry)
# - Uses global: VM_ERROR_LOG, VM_RECOVERY_ATTEMPT, VM_MAX_RETRIES, VMID
# - Returns: 0 if retry was chosen (caller should re-run), 1 if abort/keep
# ------------------------------------------------------------------------------
vm_handle_recovery() {
local exit_code="${1:-1}"
local line_number="${2:-?}"
local failed_command="${3:-unknown}"
local cleanup_fn="${4:-cleanup_vmid}"
local retry_fn="${5:-}"
# Stop any running spinner
stop_spinner 2>/dev/null || true
# Classify the error
local error_category
error_category=$(vm_classify_error "$exit_code" "$VM_ERROR_LOG")
((VM_RECOVERY_ATTEMPT++))
# Show error details
echo ""
msg_error "VM creation failed in line ${line_number}"
msg_error "Exit code: ${exit_code} | Category: ${error_category}"
msg_error "Command: ${failed_command}"
# Show last few lines of error log if available
if [[ -s "$VM_ERROR_LOG" ]]; then
echo -e "\n${TAB}${YW}--- Last 5 lines of error log ---${CL}"
tail -n 5 "$VM_ERROR_LOG" 2>/dev/null | while IFS= read -r line; do
echo -e "${TAB} ${line}"
done
echo -e "${TAB}${YW}----------------------------------${CL}\n"
fi
# Show recovery menu
local choice
choice=$(vm_show_recovery_menu "$exit_code" "$error_category" "$VM_RECOVERY_ATTEMPT")
case "$choice" in
RETRY | RETRY_DOWNLOAD)
msg_info "Cleaning up failed VM ${VMID} for retry"
"$cleanup_fn" 2>/dev/null || true
rm -f "$VM_ERROR_LOG"
if [[ "$choice" == "RETRY_DOWNLOAD" ]]; then
# Clear cached image
if [[ -n "${CACHE_FILE:-}" && -f "$CACHE_FILE" ]]; then
msg_info "Clearing cached image: $(basename "$CACHE_FILE")"
rm -f "$CACHE_FILE"
msg_ok "Cache cleared"
fi
fi
msg_ok "Ready for retry (attempt $((VM_RECOVERY_ATTEMPT + 1))/${VM_MAX_RETRIES})"
if [[ -n "$retry_fn" ]]; then
# Re-invoke the retry function — caller loop handles this
return 0
else
msg_warn "No retry function provided — please re-run the script manually"
return 1
fi
;;
SKIP_CUSTOMIZE)
msg_info "Cleaning up failed VM ${VMID} for retry (skipping virt-customize)"
"$cleanup_fn" 2>/dev/null || true
rm -f "$VM_ERROR_LOG"
# Set flag so docker-vm.sh skips virt-customize
export SKIP_VIRT_CUSTOMIZE="yes"
msg_ok "Will use first-boot fallback for package installation"
if [[ -n "$retry_fn" ]]; then
return 0
else
msg_warn "No retry function provided — please re-run the script manually"
return 1
fi
;;
RETRY_SETTINGS)
msg_info "Cleaning up failed VM ${VMID} for retry with new settings"
"$cleanup_fn" 2>/dev/null || true
rm -f "$VM_ERROR_LOG"
# Let user choose new settings via advanced_settings if available
if declare -f advanced_settings >/dev/null 2>&1; then
header_info 2>/dev/null || true
echo -e "${ADVANCED:-}${BOLD}${RD}Reconfigure VM Settings${CL}"
advanced_settings
else
msg_warn "advanced_settings() not available — using current settings"
fi
if [[ -n "$retry_fn" ]]; then
return 0
else
msg_warn "No retry function provided — please re-run the script manually"
return 1
fi
;;
NEW_VMID)
msg_info "Cleaning up conflicting VM ${VMID}"
"$cleanup_fn" 2>/dev/null || true
rm -f "$VM_ERROR_LOG"
# Get new VMID
VMID=$(get_valid_nextid)
echo -e "${CONTAINERID:-}${BOLD}${DGN}New Virtual Machine ID: ${BGN}${VMID}${CL}"
msg_ok "Using new VMID: ${VMID}"
if [[ -n "$retry_fn" ]]; then
return 0
else
msg_warn "No retry function provided — please re-run the script manually"
return 1
fi
;;
KEEP)
msg_warn "Keeping partial VM ${VMID} for manual debugging"
msg_warn "You can inspect it with: qm config ${VMID}"
msg_warn "To remove it later: qm destroy ${VMID} --destroy-unreferenced-disks --purge"
# Report failure to telemetry
post_update_to_api "failed" "$exit_code" 2>/dev/null || true
exit "$exit_code"
;;
ABORT | *)
msg_info "Destroying failed VM ${VMID}"
"$cleanup_fn" 2>/dev/null || true
rm -f "$VM_ERROR_LOG"
post_update_to_api "failed" "$exit_code" 2>/dev/null || true
msg_error "VM creation aborted by user"
exit "$exit_code"
;;
esac
}

View File

@@ -40,10 +40,32 @@ trap cleanup EXIT
trap 'post_update_to_api "failed" "INTERRUPTED"' SIGINT
trap 'post_update_to_api "failed" "TERMINATED"' SIGTERM
# Flag to control whether recovery menu is shown (set during create_vm)
VM_CREATION_PHASE="no"
function error_handler() {
local exit_code="$?"
local line_number="$1"
local command="$2"
# During VM creation phase: use smart recovery if available
if [[ "$VM_CREATION_PHASE" == "yes" ]] && declare -f vm_handle_recovery >/dev/null 2>&1; then
# Temporarily disable ERR trap + set -e to prevent recursion during recovery menu
trap - ERR
set +e
if vm_handle_recovery "$exit_code" "$line_number" "$command" "cleanup_vmid" "create_vm"; then
# Recovery chose retry — re-invoke create_vm with traps restored
set -e
trap 'error_handler $LINENO "$BASH_COMMAND"' ERR
create_vm
exit $?
fi
# Recovery chose abort/keep — vm_handle_recovery already called exit
exit "$exit_code"
fi
# Default error handling (outside VM creation phase)
local error_message="${RD}[ERROR]${CL} in line ${RD}$line_number${CL}: exit code ${RD}$exit_code${CL}: while executing command ${YW}$command${CL}"
post_update_to_api "failed" "${exit_code}"
echo -e "\n$error_message\n"
@@ -437,74 +459,87 @@ if ! command -v virt-customize &>/dev/null; then
fi
# ==============================================================================
# IMAGE DOWNLOAD
# VM CREATION FUNCTION (wrapped for smart recovery retry)
# ==============================================================================
msg_info "Retrieving the URL for the ${OS_DISPLAY} Qcow2 Disk Image"
URL=$(get_image_url)
CACHE_DIR="/var/lib/vz/template/cache"
CACHE_FILE="$CACHE_DIR/$(basename "$URL")"
mkdir -p "$CACHE_DIR"
msg_ok "${CL}${BL}${URL}${CL}"
create_vm() {
if [[ ! -s "$CACHE_FILE" ]]; then
curl -f#SL -o "$CACHE_FILE" "$URL"
echo -en "\e[1A\e[0K"
msg_ok "Downloaded ${CL}${BL}$(basename "$CACHE_FILE")${CL}"
else
msg_ok "Using cached image ${CL}${BL}$(basename "$CACHE_FILE")${CL}"
fi
# Reset error log for this attempt
VM_ERROR_LOG="/tmp/vm-install-${VMID}.log"
: >"$VM_ERROR_LOG"
# ==============================================================================
# STORAGE TYPE DETECTION
# ==============================================================================
STORAGE_TYPE=$(pvesm status -storage "$STORAGE" | awk 'NR>1 {print $2}')
case $STORAGE_TYPE in
nfs | dir)
DISK_EXT=".qcow2"
DISK_REF="$VMID/"
DISK_IMPORT="--format qcow2"
THIN=""
;;
btrfs)
DISK_EXT=".raw"
DISK_REF="$VMID/"
DISK_IMPORT="--format raw"
FORMAT=",efitype=4m"
THIN=""
;;
*)
DISK_EXT=""
DISK_REF=""
DISK_IMPORT="--format raw"
;;
esac
# ==============================================================================
# IMAGE DOWNLOAD
# ==============================================================================
msg_info "Retrieving the URL for the ${OS_DISPLAY} Qcow2 Disk Image"
URL=$(get_image_url)
CACHE_DIR="/var/lib/vz/template/cache"
CACHE_FILE="$CACHE_DIR/$(basename "$URL")"
mkdir -p "$CACHE_DIR"
msg_ok "${CL}${BL}${URL}${CL}"
# ==============================================================================
# IMAGE CUSTOMIZATION WITH DOCKER
# ==============================================================================
msg_info "Preparing ${OS_DISPLAY} image with Docker"
if [[ ! -s "$CACHE_FILE" ]]; then
curl -f#SL -o "$CACHE_FILE" "$URL"
echo -en "\e[1A\e[0K"
msg_ok "Downloaded ${CL}${BL}$(basename "$CACHE_FILE")${CL}"
else
msg_ok "Using cached image ${CL}${BL}$(basename "$CACHE_FILE")${CL}"
fi
WORK_FILE=$(mktemp --suffix=.qcow2)
cp "$CACHE_FILE" "$WORK_FILE"
# ==============================================================================
# STORAGE TYPE DETECTION
# ==============================================================================
STORAGE_TYPE=$(pvesm status -storage "$STORAGE" | awk 'NR>1 {print $2}')
case $STORAGE_TYPE in
nfs | dir)
DISK_EXT=".qcow2"
DISK_REF="$VMID/"
DISK_IMPORT="--format qcow2"
THIN=""
;;
btrfs)
DISK_EXT=".raw"
DISK_REF="$VMID/"
DISK_IMPORT="--format raw"
FORMAT=",efitype=4m"
THIN=""
;;
*)
DISK_EXT=""
DISK_REF=""
DISK_IMPORT="--format raw"
;;
esac
export LIBGUESTFS_BACKEND_SETTINGS=dns=8.8.8.8,1.1.1.1
# ==============================================================================
# IMAGE CUSTOMIZATION WITH DOCKER
# ==============================================================================
msg_info "Preparing ${OS_DISPLAY} image with Docker"
DOCKER_PREINSTALLED="no"
WORK_FILE=$(mktemp --suffix=.qcow2)
cp "$CACHE_FILE" "$WORK_FILE"
# Install qemu-guest-agent and Docker during image customization
msg_info "Installing base packages in image"
if virt-customize -a "$WORK_FILE" --install qemu-guest-agent,curl,ca-certificates >/dev/null 2>&1; then
msg_ok "Installed base packages"
export LIBGUESTFS_BACKEND_SETTINGS=dns=8.8.8.8,1.1.1.1
msg_info "Installing Docker (this may take 2-5 minutes)"
if virt-customize -q -a "$WORK_FILE" --run-command "curl -fsSL https://get.docker.com | sh" >/dev/null 2>&1 &&
virt-customize -q -a "$WORK_FILE" --run-command "systemctl enable docker" >/dev/null 2>&1; then
msg_ok "Installed Docker"
DOCKER_PREINSTALLED="no"
msg_info "Configuring Docker daemon"
# Optimize Docker daemon configuration
virt-customize -q -a "$WORK_FILE" --run-command "mkdir -p /etc/docker" >/dev/null 2>&1
virt-customize -q -a "$WORK_FILE" --run-command 'cat > /etc/docker/daemon.json << EOF
# Install qemu-guest-agent and Docker during image customization
# Skip if recovery set SKIP_VIRT_CUSTOMIZE (virt-customize failed before)
if [[ "${SKIP_VIRT_CUSTOMIZE:-}" == "yes" ]]; then
msg_ok "Skipping virt-customize (using first-boot fallback)"
else
msg_info "Installing base packages in image"
if virt-customize -a "$WORK_FILE" --install qemu-guest-agent,curl,ca-certificates 2>>"$VM_ERROR_LOG" >/dev/null; then
msg_ok "Installed base packages"
msg_info "Installing Docker (this may take 2-5 minutes)"
if virt-customize -q -a "$WORK_FILE" --run-command "curl -fsSL https://get.docker.com | sh" >/dev/null 2>&1 &&
virt-customize -q -a "$WORK_FILE" --run-command "systemctl enable docker" >/dev/null 2>&1; then
msg_ok "Installed Docker"
msg_info "Configuring Docker daemon"
# Optimize Docker daemon configuration
virt-customize -q -a "$WORK_FILE" --run-command "mkdir -p /etc/docker" >/dev/null 2>&1
virt-customize -q -a "$WORK_FILE" --run-command 'cat > /etc/docker/daemon.json << EOF
{
"storage-driver": "overlay2",
"log-driver": "json-file",
@@ -514,45 +549,46 @@ if virt-customize -a "$WORK_FILE" --install qemu-guest-agent,curl,ca-certificate
}
}
EOF' >/dev/null 2>&1
DOCKER_PREINSTALLED="yes"
msg_ok "Configured Docker daemon"
else
msg_ok "Docker will be installed on first boot"
DOCKER_PREINSTALLED="yes"
msg_ok "Configured Docker daemon"
else
msg_ok "Docker will be installed on first boot"
fi
else
msg_ok "Packages will be installed on first boot"
fi
fi
else
msg_ok "Packages will be installed on first boot"
fi
msg_info "Finalizing image (hostname, SSH config)"
# Set hostname and prepare for unique machine-id
virt-customize -q -a "$WORK_FILE" --hostname "${HN}" >/dev/null 2>&1
virt-customize -q -a "$WORK_FILE" --run-command "truncate -s 0 /etc/machine-id" >/dev/null 2>&1
virt-customize -q -a "$WORK_FILE" --run-command "rm -f /var/lib/dbus/machine-id" >/dev/null 2>&1
msg_info "Finalizing image (hostname, SSH config)"
# Set hostname and prepare for unique machine-id
virt-customize -q -a "$WORK_FILE" --hostname "${HN}" >/dev/null 2>&1
virt-customize -q -a "$WORK_FILE" --run-command "truncate -s 0 /etc/machine-id" >/dev/null 2>&1
virt-customize -q -a "$WORK_FILE" --run-command "rm -f /var/lib/dbus/machine-id" >/dev/null 2>&1
# Configure SSH for Cloud-Init
if [ "$USE_CLOUD_INIT" = "yes" ]; then
virt-customize -q -a "$WORK_FILE" --run-command "sed -i 's/^#*PermitRootLogin.*/PermitRootLogin yes/' /etc/ssh/sshd_config" >/dev/null 2>&1 || true
virt-customize -q -a "$WORK_FILE" --run-command "sed -i 's/^#*PasswordAuthentication.*/PasswordAuthentication yes/' /etc/ssh/sshd_config" >/dev/null 2>&1 || true
else
# Configure auto-login for nocloud images (no Cloud-Init)
virt-customize -q -a "$WORK_FILE" --run-command "mkdir -p /etc/systemd/system/serial-getty@ttyS0.service.d" >/dev/null 2>&1 || true
virt-customize -q -a "$WORK_FILE" --run-command 'cat > /etc/systemd/system/serial-getty@ttyS0.service.d/autologin.conf << EOF
# Configure SSH for Cloud-Init
if [ "$USE_CLOUD_INIT" = "yes" ]; then
virt-customize -q -a "$WORK_FILE" --run-command "sed -i 's/^#*PermitRootLogin.*/PermitRootLogin yes/' /etc/ssh/sshd_config" >/dev/null 2>&1 || true
virt-customize -q -a "$WORK_FILE" --run-command "sed -i 's/^#*PasswordAuthentication.*/PasswordAuthentication yes/' /etc/ssh/sshd_config" >/dev/null 2>&1 || true
else
# Configure auto-login for nocloud images (no Cloud-Init)
virt-customize -q -a "$WORK_FILE" --run-command "mkdir -p /etc/systemd/system/serial-getty@ttyS0.service.d" >/dev/null 2>&1 || true
virt-customize -q -a "$WORK_FILE" --run-command 'cat > /etc/systemd/system/serial-getty@ttyS0.service.d/autologin.conf << EOF
[Service]
ExecStart=
ExecStart=-/sbin/agetty --autologin root --noclear %I \$TERM
EOF' >/dev/null 2>&1 || true
virt-customize -q -a "$WORK_FILE" --run-command "mkdir -p /etc/systemd/system/getty@tty1.service.d" >/dev/null 2>&1 || true
virt-customize -q -a "$WORK_FILE" --run-command 'cat > /etc/systemd/system/getty@tty1.service.d/autologin.conf << EOF
virt-customize -q -a "$WORK_FILE" --run-command "mkdir -p /etc/systemd/system/getty@tty1.service.d" >/dev/null 2>&1 || true
virt-customize -q -a "$WORK_FILE" --run-command 'cat > /etc/systemd/system/getty@tty1.service.d/autologin.conf << EOF
[Service]
ExecStart=
ExecStart=-/sbin/agetty --autologin root --noclear %I \$TERM
EOF' >/dev/null 2>&1 || true
fi
msg_ok "Finalized image"
fi
msg_ok "Finalized image"
# Create first-boot Docker install script (fallback if virt-customize failed)
if [ "$DOCKER_PREINSTALLED" = "no" ]; then
virt-customize -q -a "$WORK_FILE" --run-command 'cat > /root/install-docker.sh << "DOCKERSCRIPT"
# Create first-boot Docker install script (fallback if virt-customize failed)
if [ "$DOCKER_PREINSTALLED" = "no" ]; then
virt-customize -q -a "$WORK_FILE" --run-command 'cat > /root/install-docker.sh << "DOCKERSCRIPT"
#!/bin/bash
exec > /var/log/install-docker.log 2>&1
echo "[$(date)] Starting Docker installation"
@@ -583,7 +619,7 @@ echo "[$(date)] Docker installation completed"
DOCKERSCRIPT
chmod +x /root/install-docker.sh' >/dev/null 2>&1
virt-customize -q -a "$WORK_FILE" --run-command 'cat > /etc/systemd/system/install-docker.service << "DOCKERSERVICE"
virt-customize -q -a "$WORK_FILE" --run-command 'cat > /etc/systemd/system/install-docker.service << "DOCKERSERVICE"
[Unit]
Description=Install Docker on First Boot
After=network-online.target
@@ -599,113 +635,123 @@ RemainAfterExit=yes
WantedBy=multi-user.target
DOCKERSERVICE
systemctl enable install-docker.service' >/dev/null 2>&1
fi
fi
# Resize disk to target size
msg_info "Resizing disk image to ${DISK_SIZE}"
qemu-img resize "$WORK_FILE" "${DISK_SIZE}" >/dev/null 2>&1
msg_ok "Resized disk image"
# Resize disk to target size
msg_info "Resizing disk image to ${DISK_SIZE}"
qemu-img resize "$WORK_FILE" "${DISK_SIZE}" >/dev/null 2>&1
msg_ok "Resized disk image"
# ==============================================================================
# VM CREATION
# ==============================================================================
msg_info "Creating Docker VM shell"
qm create $VMID -agent 1${MACHINE} -tablet 0 -localtime 1 -bios ovmf${CPU_TYPE} -cores $CORE_COUNT -memory $RAM_SIZE \
-name $HN -tags community-script -net0 virtio,bridge=$BRG,macaddr=$MAC$VLAN$MTU -onboot 1 -ostype l26 -scsihw virtio-scsi-pci 2>>"$VM_ERROR_LOG" >/dev/null
msg_ok "Created VM shell"
# ==============================================================================
# DISK IMPORT
# ==============================================================================
msg_info "Importing disk into storage ($STORAGE)"
if qm disk import --help >/dev/null 2>&1; then
IMPORT_CMD=(qm disk import)
else
IMPORT_CMD=(qm importdisk)
fi
IMPORT_OUT="$("${IMPORT_CMD[@]}" "$VMID" "$WORK_FILE" "$STORAGE" ${DISK_IMPORT:-} 2> >(tee -a "$VM_ERROR_LOG") || true)"
DISK_REF_IMPORTED="$(printf '%s\n' "$IMPORT_OUT" | sed -n "s/.*successfully imported disk '\([^']\+\)'.*/\1/p" | tr -d "\r\"'")"
[[ -z "$DISK_REF_IMPORTED" ]] && DISK_REF_IMPORTED="$(pvesm list "$STORAGE" | awk -v id="$VMID" '$5 ~ ("vm-"id"-disk-") {print $1":"$5}' | sort | tail -n1)"
[[ -z "$DISK_REF_IMPORTED" ]] && {
msg_error "Unable to determine imported disk reference."
echo "$IMPORT_OUT"
exit 1
}
msg_ok "Imported disk (${CL}${BL}${DISK_REF_IMPORTED}${CL})"
# Clean up work file
rm -f "$WORK_FILE"
# ==============================================================================
# VM CONFIGURATION
# ==============================================================================
msg_info "Attaching EFI and root disk"
qm set "$VMID" \
--efidisk0 "${STORAGE}:0,efitype=4m" \
--scsi0 "${DISK_REF_IMPORTED},${DISK_CACHE}${THIN%,}" \
--boot order=scsi0 \
--serial0 socket >/dev/null
qm set $VMID --agent enabled=1 >/dev/null
msg_ok "Attached EFI and root disk"
# Set VM description
set_description
# Cloud-Init configuration
if [ "$USE_CLOUD_INIT" = "yes" ]; then
msg_info "Configuring Cloud-Init"
setup_cloud_init "$VMID" "$STORAGE" "$HN" "yes"
msg_ok "Cloud-Init configured"
fi
# Start VM
if [ "$START_VM" == "yes" ]; then
msg_info "Starting Docker VM"
qm start $VMID >/dev/null 2>&1
msg_ok "Started Docker VM"
fi
# ==============================================================================
# FINAL OUTPUT
# ==============================================================================
VM_IP=""
if [ "$START_VM" == "yes" ]; then
set +e
for i in {1..10}; do
VM_IP=$(qm guest cmd "$VMID" network-get-interfaces 2>/dev/null |
jq -r '.[] | select(.name != "lo") | ."ip-addresses"[]? | select(."ip-address-type" == "ipv4") | ."ip-address"' 2>/dev/null |
grep -v "^127\." | head -1) || true
[ -n "$VM_IP" ] && break
sleep 3
done
set -e
fi
echo -e "\n${INFO}${BOLD}${GN}Docker VM Configuration Summary:${CL}"
echo -e "${TAB}${DGN}VM ID: ${BGN}${VMID}${CL}"
echo -e "${TAB}${DGN}Hostname: ${BGN}${HN}${CL}"
echo -e "${TAB}${DGN}OS: ${BGN}${OS_DISPLAY}${CL}"
[ -n "$VM_IP" ] && echo -e "${TAB}${DGN}IP Address: ${BGN}${VM_IP}${CL}"
if [ "$DOCKER_PREINSTALLED" = "yes" ]; then
echo -e "${TAB}${DGN}Docker: ${BGN}Pre-installed (via get.docker.com)${CL}"
else
echo -e "${TAB}${DGN}Docker: ${BGN}Installing on first boot${CL}"
echo -e "${TAB}${YW}⚠️ Wait 2-3 minutes for installation to complete${CL}"
echo -e "${TAB}${YW}⚠️ Check progress: ${BL}cat /var/log/install-docker.log${CL}"
fi
if [ "$USE_CLOUD_INIT" = "yes" ]; then
display_cloud_init_info "$VMID" "$HN" 2>/dev/null || true
fi
post_update_to_api "done" "none"
msg_ok "Completed successfully!\n"
} # end of create_vm()
# ==============================================================================
# VM CREATION
# VM CREATION WITH SMART RECOVERY
# ==============================================================================
msg_info "Creating Docker VM shell"
qm create $VMID -agent 1${MACHINE} -tablet 0 -localtime 1 -bios ovmf${CPU_TYPE} -cores $CORE_COUNT -memory $RAM_SIZE \
-name $HN -tags community-script -net0 virtio,bridge=$BRG,macaddr=$MAC$VLAN$MTU -onboot 1 -ostype l26 -scsihw virtio-scsi-pci >/dev/null
msg_ok "Created VM shell"
# ==============================================================================
# DISK IMPORT
# ==============================================================================
msg_info "Importing disk into storage ($STORAGE)"
if qm disk import --help >/dev/null 2>&1; then
IMPORT_CMD=(qm disk import)
else
IMPORT_CMD=(qm importdisk)
fi
IMPORT_OUT="$("${IMPORT_CMD[@]}" "$VMID" "$WORK_FILE" "$STORAGE" ${DISK_IMPORT:-} 2>&1 || true)"
DISK_REF_IMPORTED="$(printf '%s\n' "$IMPORT_OUT" | sed -n "s/.*successfully imported disk '\([^']\+\)'.*/\1/p" | tr -d "\r\"'")"
[[ -z "$DISK_REF_IMPORTED" ]] && DISK_REF_IMPORTED="$(pvesm list "$STORAGE" | awk -v id="$VMID" '$5 ~ ("vm-"id"-disk-") {print $1":"$5}' | sort | tail -n1)"
[[ -z "$DISK_REF_IMPORTED" ]] && {
msg_error "Unable to determine imported disk reference."
echo "$IMPORT_OUT"
exit 1
}
msg_ok "Imported disk (${CL}${BL}${DISK_REF_IMPORTED}${CL})"
# Clean up work file
rm -f "$WORK_FILE"
# ==============================================================================
# VM CONFIGURATION
# ==============================================================================
msg_info "Attaching EFI and root disk"
qm set "$VMID" \
--efidisk0 "${STORAGE}:0,efitype=4m" \
--scsi0 "${DISK_REF_IMPORTED},${DISK_CACHE}${THIN%,}" \
--boot order=scsi0 \
--serial0 socket >/dev/null
qm set $VMID --agent enabled=1 >/dev/null
msg_ok "Attached EFI and root disk"
# Set VM description
set_description
# Cloud-Init configuration
if [ "$USE_CLOUD_INIT" = "yes" ]; then
msg_info "Configuring Cloud-Init"
setup_cloud_init "$VMID" "$STORAGE" "$HN" "yes"
msg_ok "Cloud-Init configured"
fi
# Start VM
if [ "$START_VM" == "yes" ]; then
msg_info "Starting Docker VM"
qm start $VMID >/dev/null 2>&1
msg_ok "Started Docker VM"
fi
# ==============================================================================
# FINAL OUTPUT
# ==============================================================================
VM_IP=""
if [ "$START_VM" == "yes" ]; then
set +e
for i in {1..10}; do
VM_IP=$(qm guest cmd "$VMID" network-get-interfaces 2>/dev/null |
jq -r '.[] | select(.name != "lo") | ."ip-addresses"[]? | select(."ip-address-type" == "ipv4") | ."ip-address"' 2>/dev/null |
grep -v "^127\." | head -1) || true
[ -n "$VM_IP" ] && break
sleep 3
done
set -e
fi
echo -e "\n${INFO}${BOLD}${GN}Docker VM Configuration Summary:${CL}"
echo -e "${TAB}${DGN}VM ID: ${BGN}${VMID}${CL}"
echo -e "${TAB}${DGN}Hostname: ${BGN}${HN}${CL}"
echo -e "${TAB}${DGN}OS: ${BGN}${OS_DISPLAY}${CL}"
[ -n "$VM_IP" ] && echo -e "${TAB}${DGN}IP Address: ${BGN}${VM_IP}${CL}"
if [ "$DOCKER_PREINSTALLED" = "yes" ]; then
echo -e "${TAB}${DGN}Docker: ${BGN}Pre-installed (via get.docker.com)${CL}"
else
echo -e "${TAB}${DGN}Docker: ${BGN}Installing on first boot${CL}"
echo -e "${TAB}${YW}⚠️ Wait 2-3 minutes for installation to complete${CL}"
echo -e "${TAB}${YW}⚠️ Check progress: ${BL}cat /var/log/install-docker.log${CL}"
fi
if [ "$USE_CLOUD_INIT" = "yes" ]; then
display_cloud_init_info "$VMID" "$HN" 2>/dev/null || true
fi
post_update_to_api "done" "none"
msg_ok "Completed successfully!\n"
VM_CREATION_PHASE="yes"
create_vm
VM_CREATION_PHASE="no"
rm -f "$VM_ERROR_LOG" 2>/dev/null || true