From 6b249d953318983c82afa91e81e49c1e256db9ad Mon Sep 17 00:00:00 2001 From: "CanbiZ (MickLesk)" <47820557+MickLesk@users.noreply.github.com> Date: Mon, 16 Feb 2026 18:49:53 +0100 Subject: [PATCH] feat(vm): add smart recovery for VM creation failures (Phase 1-3) Adds error classification, recovery menu, and retry mechanism for VM creation failures in docker-vm.sh. vm-core.func: - vm_classify_error(): classifies errors into 7 categories (download, disk_import, virt_customize, vmid_conflict, storage_full, network, unknown) - is_vm_download_error(), is_vm_disk_import_error(), is_vm_virt_customize_error(), is_vm_vmid_conflict(), is_vm_storage_full(), is_vm_network_error(): detection helpers - vm_show_recovery_menu(): dynamic whiptail menu with options based on error category (retry, retry w/ settings, skip virt-customize, new VMID, keep VM, abort) - vm_handle_recovery(): orchestrates classification, menu, and chosen action (cleanup + retry / keep / abort) - vm_log_cmd(): stderr capture wrapper for VM_ERROR_LOG - VM_MAX_RETRIES=2 (bounded recursion depth) docker-vm.sh: - Wrapped VM creation in create_vm() function for retry - error_handler(): during VM_CREATION_PHASE, delegates to vm_handle_recovery; on retry, temporarily disables ERR trap, re-invokes create_vm recursively (bounded by VM_MAX_RETRIES) - SKIP_VIRT_CUSTOMIZE flag: virt-customize failure recovery can skip it and use first-boot Docker install fallback - Critical commands (qm create, disk import) now capture stderr to VM_ERROR_LOG for error classification --- misc/vm-core.func | 412 ++++++++++++++++++++++++++++++++++++++++++++ vm/docker-vm.sh | 430 +++++++++++++++++++++++++--------------------- 2 files changed, 650 insertions(+), 192 deletions(-) diff --git a/misc/vm-core.func b/misc/vm-core.func index 66949fa69..15bdd8ca9 100644 --- a/misc/vm-core.func +++ b/misc/vm-core.func @@ -624,3 +624,415 @@ EOF qm set "$VMID" -description "$DESCRIPTION" >/dev/null } + +# ============================================================================== +# SECTION: VM SMART RECOVERY +# ============================================================================== + +# Global error log for VM creation — captures stderr from critical commands +VM_ERROR_LOG="${VM_ERROR_LOG:-/tmp/vm-install-$$.log}" +VM_RECOVERY_ATTEMPT=${VM_RECOVERY_ATTEMPT:-0} +VM_MAX_RETRIES=${VM_MAX_RETRIES:-2} + +# ------------------------------------------------------------------------------ +# vm_log_cmd() +# +# - Wraps a command to capture stderr into VM_ERROR_LOG +# - Passes stdout through normally +# - Returns the original exit code +# Usage: vm_log_cmd qm importdisk "$VMID" "$IMAGE" "$STORAGE" +# ------------------------------------------------------------------------------ +vm_log_cmd() { + "$@" 2>>"$VM_ERROR_LOG" +} + +# ------------------------------------------------------------------------------ +# is_vm_download_error() +# +# - Detects download failures based on exit code and error log +# - Checks curl exit codes (6, 7, 22, 28, 35, 52, 56) and HTTP error patterns +# - Returns 0 (true) if download error detected, 1 otherwise +# ------------------------------------------------------------------------------ +is_vm_download_error() { + local exit_code="${1:-0}" + local log_file="${2:-$VM_ERROR_LOG}" + + # curl-specific exit codes indicating download issues + case "$exit_code" in + 6 | 7 | 22 | 28 | 35 | 52 | 56) return 0 ;; + esac + + # Check log for download-related patterns + if [[ -s "$log_file" ]]; then + if grep -qiE "curl.*failed|download.*failed|HTTP.*[45][0-9]{2}|Could not resolve|Connection refused|Connection timed out|SSL.*error" "$log_file" 2>/dev/null; then + return 0 + fi + fi + return 1 +} + +# ------------------------------------------------------------------------------ +# is_vm_disk_import_error() +# +# - Detects disk import failures (qm importdisk / qm disk import) +# - Checks for storage allocation and format conversion errors +# - Returns 0 (true) if disk import error detected, 1 otherwise +# ------------------------------------------------------------------------------ +is_vm_disk_import_error() { + local exit_code="${1:-0}" + local log_file="${2:-$VM_ERROR_LOG}" + + if [[ -s "$log_file" ]]; then + if grep -qiE "importdisk.*failed|disk import.*error|storage.*allocation.*failed|qcow2.*error|raw.*error|pvesm.*alloc.*failed|unable to create|volume.*already exists" "$log_file" 2>/dev/null; then + return 0 + fi + fi + return 1 +} + +# ------------------------------------------------------------------------------ +# is_vm_virt_customize_error() +# +# - Detects virt-customize / libguestfs failures +# - Checks for guestfs, supermin, appliance boot errors +# - Returns 0 (true) if virt-customize error detected, 1 otherwise +# ------------------------------------------------------------------------------ +is_vm_virt_customize_error() { + local exit_code="${1:-0}" + local log_file="${2:-$VM_ERROR_LOG}" + + if [[ -s "$log_file" ]]; then + if grep -qiE "virt-customize|libguestfs|guestfs|supermin|appliance.*boot|virt-.*failed|launch.*failed" "$log_file" 2>/dev/null; then + return 0 + fi + fi + return 1 +} + +# ------------------------------------------------------------------------------ +# is_vm_vmid_conflict() +# +# - Detects VMID conflicts (VM already exists) +# - Returns 0 (true) if conflict detected, 1 otherwise +# ------------------------------------------------------------------------------ +is_vm_vmid_conflict() { + local exit_code="${1:-0}" + local log_file="${2:-$VM_ERROR_LOG}" + + if [[ -s "$log_file" ]]; then + if grep -qiE "already exists|VM $VMID already|unable to create VM|VMID.*in use" "$log_file" 2>/dev/null; then + return 0 + fi + fi + return 1 +} + +# ------------------------------------------------------------------------------ +# is_vm_storage_full() +# +# - Detects storage full / space exhaustion errors +# - Returns 0 (true) if storage space issue detected, 1 otherwise +# ------------------------------------------------------------------------------ +is_vm_storage_full() { + local exit_code="${1:-0}" + local log_file="${2:-$VM_ERROR_LOG}" + + if [[ -s "$log_file" ]]; then + if grep -qiE "not enough space|no space left|storage.*full|disk quota|ENOSPC|insufficient.*space|thin pool.*full" "$log_file" 2>/dev/null; then + return 0 + fi + fi + return 1 +} + +# ------------------------------------------------------------------------------ +# is_vm_network_error() +# +# - Detects general network/DNS errors beyond download failures +# - Returns 0 (true) if network issue detected, 1 otherwise +# ------------------------------------------------------------------------------ +is_vm_network_error() { + local exit_code="${1:-0}" + local log_file="${2:-$VM_ERROR_LOG}" + + # Network-related curl/wget exit codes + case "$exit_code" in + 6 | 7 | 28 | 52 | 56) return 0 ;; + esac + + if [[ -s "$log_file" ]]; then + if grep -qiE "Name or service not known|Temporary failure in name resolution|Network is unreachable|No route to host|DNS.*failed|could not resolve" "$log_file" 2>/dev/null; then + return 0 + fi + fi + return 1 +} + +# ------------------------------------------------------------------------------ +# vm_classify_error() +# +# - Classifies a VM creation error into a category +# - Order matters: most specific checks first +# - Returns category string via stdout +# - Categories: vmid_conflict, storage_full, download, disk_import, +# virt_customize, network, unknown +# ------------------------------------------------------------------------------ +vm_classify_error() { + local exit_code="${1:-0}" + local log_file="${2:-$VM_ERROR_LOG}" + + if is_vm_vmid_conflict "$exit_code" "$log_file"; then + echo "vmid_conflict" + elif is_vm_storage_full "$exit_code" "$log_file"; then + echo "storage_full" + elif is_vm_download_error "$exit_code" "$log_file"; then + echo "download" + elif is_vm_disk_import_error "$exit_code" "$log_file"; then + echo "disk_import" + elif is_vm_virt_customize_error "$exit_code" "$log_file"; then + echo "virt_customize" + elif is_vm_network_error "$exit_code" "$log_file"; then + echo "network" + else + echo "unknown" + fi +} + +# ------------------------------------------------------------------------------ +# vm_show_recovery_menu() +# +# - Displays a whiptail menu with recovery options after a VM creation failure +# - Options are dynamically built based on error category +# - Returns the selected option via stdout +# - Arguments: +# $1: exit_code +# $2: error_category (from vm_classify_error) +# $3: current attempt number +# ------------------------------------------------------------------------------ +vm_show_recovery_menu() { + local exit_code="${1:-1}" + local error_category="${2:-unknown}" + local attempt="${3:-1}" + + local menu_items=() + local menu_height=12 + local item_count=0 + + # --- Dynamic options based on error category --- + + # Retry (always available unless max retries reached) + if ((attempt < VM_MAX_RETRIES)); then + case "$error_category" in + download) + menu_items+=("RETRY_DOWNLOAD" "šŸ”„ Retry download (clear cache & re-download)" "ON") + ((item_count++)) + ;; + disk_import) + menu_items+=("RETRY" "šŸ”„ Retry VM creation" "ON") + ((item_count++)) + ;; + virt_customize) + menu_items+=("RETRY" "šŸ”„ Retry VM creation" "ON") + ((item_count++)) + menu_items+=("SKIP_CUSTOMIZE" "ā­ļø Skip virt-customize (use first-boot fallback)" "OFF") + ((item_count++)) + ;; + network) + menu_items+=("RETRY" "šŸ”„ Retry VM creation" "ON") + ((item_count++)) + ;; + vmid_conflict) + menu_items+=("NEW_VMID" "šŸ†” Choose a different VM ID" "ON") + ((item_count++)) + ;; + storage_full) + menu_items+=("RETRY_SETTINGS" "āš™ļø Retry with different settings (storage/disk)" "ON") + ((item_count++)) + ;; + *) + menu_items+=("RETRY" "šŸ”„ Retry VM creation" "ON") + ((item_count++)) + ;; + esac + + # Retry with different resources (always offered) + menu_items+=("RETRY_SETTINGS" "āš™ļø Retry with different settings (RAM/CPU/Disk)" "OFF") + ((item_count++)) + fi + + # Keep VM for debugging (always available) + menu_items+=("KEEP" "šŸ” Keep partial VM for manual debugging" "OFF") + ((item_count++)) + + # Abort (always available) + menu_items+=("ABORT" "āŒ Destroy VM and exit" "OFF") + ((item_count++)) + + menu_height=$((item_count + 10)) + + # Error info for title + local title="VM CREATION FAILED" + local body="Exit code: ${exit_code} | Category: ${error_category}\nAttempt: ${attempt}/${VM_MAX_RETRIES}\n\nChoose a recovery action:" + + if ((attempt >= VM_MAX_RETRIES)); then + body="Exit code: ${exit_code} | Category: ${error_category}\nāš ļø Maximum retries (${VM_MAX_RETRIES}) reached.\n\nChoose an action:" + fi + + local choice + choice=$(whiptail --backtitle "Proxmox VE Helper Scripts" --title "$title" \ + --radiolist "$body" "$menu_height" 72 "$item_count" \ + "${menu_items[@]}" 3>&1 1>&2 2>&3) || choice="ABORT" + + echo "$choice" +} + +# ------------------------------------------------------------------------------ +# vm_handle_recovery() +# +# - Main recovery handler called from error_handler or a wrapper +# - Classifies the error, shows recovery menu, and executes the chosen action +# - Arguments: +# $1: exit_code +# $2: line_number +# $3: failed_command +# $4: cleanup_fn — function to call for VM cleanup (default: cleanup_vmid) +# $5: retry_fn — function to re-invoke for full retry (required for retry) +# - Uses global: VM_ERROR_LOG, VM_RECOVERY_ATTEMPT, VM_MAX_RETRIES, VMID +# - Returns: 0 if retry was chosen (caller should re-run), 1 if abort/keep +# ------------------------------------------------------------------------------ +vm_handle_recovery() { + local exit_code="${1:-1}" + local line_number="${2:-?}" + local failed_command="${3:-unknown}" + local cleanup_fn="${4:-cleanup_vmid}" + local retry_fn="${5:-}" + + # Stop any running spinner + stop_spinner 2>/dev/null || true + + # Classify the error + local error_category + error_category=$(vm_classify_error "$exit_code" "$VM_ERROR_LOG") + + ((VM_RECOVERY_ATTEMPT++)) + + # Show error details + echo "" + msg_error "VM creation failed in line ${line_number}" + msg_error "Exit code: ${exit_code} | Category: ${error_category}" + msg_error "Command: ${failed_command}" + + # Show last few lines of error log if available + if [[ -s "$VM_ERROR_LOG" ]]; then + echo -e "\n${TAB}${YW}--- Last 5 lines of error log ---${CL}" + tail -n 5 "$VM_ERROR_LOG" 2>/dev/null | while IFS= read -r line; do + echo -e "${TAB} ${line}" + done + echo -e "${TAB}${YW}----------------------------------${CL}\n" + fi + + # Show recovery menu + local choice + choice=$(vm_show_recovery_menu "$exit_code" "$error_category" "$VM_RECOVERY_ATTEMPT") + + case "$choice" in + RETRY | RETRY_DOWNLOAD) + msg_info "Cleaning up failed VM ${VMID} for retry" + "$cleanup_fn" 2>/dev/null || true + rm -f "$VM_ERROR_LOG" + + if [[ "$choice" == "RETRY_DOWNLOAD" ]]; then + # Clear cached image + if [[ -n "${CACHE_FILE:-}" && -f "$CACHE_FILE" ]]; then + msg_info "Clearing cached image: $(basename "$CACHE_FILE")" + rm -f "$CACHE_FILE" + msg_ok "Cache cleared" + fi + fi + + msg_ok "Ready for retry (attempt $((VM_RECOVERY_ATTEMPT + 1))/${VM_MAX_RETRIES})" + + if [[ -n "$retry_fn" ]]; then + # Re-invoke the retry function — caller loop handles this + return 0 + else + msg_warn "No retry function provided — please re-run the script manually" + return 1 + fi + ;; + + SKIP_CUSTOMIZE) + msg_info "Cleaning up failed VM ${VMID} for retry (skipping virt-customize)" + "$cleanup_fn" 2>/dev/null || true + rm -f "$VM_ERROR_LOG" + # Set flag so docker-vm.sh skips virt-customize + export SKIP_VIRT_CUSTOMIZE="yes" + msg_ok "Will use first-boot fallback for package installation" + + if [[ -n "$retry_fn" ]]; then + return 0 + else + msg_warn "No retry function provided — please re-run the script manually" + return 1 + fi + ;; + + RETRY_SETTINGS) + msg_info "Cleaning up failed VM ${VMID} for retry with new settings" + "$cleanup_fn" 2>/dev/null || true + rm -f "$VM_ERROR_LOG" + + # Let user choose new settings via advanced_settings if available + if declare -f advanced_settings >/dev/null 2>&1; then + header_info 2>/dev/null || true + echo -e "${ADVANCED:-}${BOLD}${RD}Reconfigure VM Settings${CL}" + advanced_settings + else + msg_warn "advanced_settings() not available — using current settings" + fi + + if [[ -n "$retry_fn" ]]; then + return 0 + else + msg_warn "No retry function provided — please re-run the script manually" + return 1 + fi + ;; + + NEW_VMID) + msg_info "Cleaning up conflicting VM ${VMID}" + "$cleanup_fn" 2>/dev/null || true + rm -f "$VM_ERROR_LOG" + + # Get new VMID + VMID=$(get_valid_nextid) + echo -e "${CONTAINERID:-}${BOLD}${DGN}New Virtual Machine ID: ${BGN}${VMID}${CL}" + msg_ok "Using new VMID: ${VMID}" + + if [[ -n "$retry_fn" ]]; then + return 0 + else + msg_warn "No retry function provided — please re-run the script manually" + return 1 + fi + ;; + + KEEP) + msg_warn "Keeping partial VM ${VMID} for manual debugging" + msg_warn "You can inspect it with: qm config ${VMID}" + msg_warn "To remove it later: qm destroy ${VMID} --destroy-unreferenced-disks --purge" + # Report failure to telemetry + post_update_to_api "failed" "$exit_code" 2>/dev/null || true + exit "$exit_code" + ;; + + ABORT | *) + msg_info "Destroying failed VM ${VMID}" + "$cleanup_fn" 2>/dev/null || true + rm -f "$VM_ERROR_LOG" + post_update_to_api "failed" "$exit_code" 2>/dev/null || true + msg_error "VM creation aborted by user" + exit "$exit_code" + ;; + esac +} diff --git a/vm/docker-vm.sh b/vm/docker-vm.sh index b4de65d94..d81a7b6c2 100644 --- a/vm/docker-vm.sh +++ b/vm/docker-vm.sh @@ -40,10 +40,32 @@ trap cleanup EXIT trap 'post_update_to_api "failed" "INTERRUPTED"' SIGINT trap 'post_update_to_api "failed" "TERMINATED"' SIGTERM +# Flag to control whether recovery menu is shown (set during create_vm) +VM_CREATION_PHASE="no" + function error_handler() { local exit_code="$?" local line_number="$1" local command="$2" + + # During VM creation phase: use smart recovery if available + if [[ "$VM_CREATION_PHASE" == "yes" ]] && declare -f vm_handle_recovery >/dev/null 2>&1; then + # Temporarily disable ERR trap + set -e to prevent recursion during recovery menu + trap - ERR + set +e + + if vm_handle_recovery "$exit_code" "$line_number" "$command" "cleanup_vmid" "create_vm"; then + # Recovery chose retry — re-invoke create_vm with traps restored + set -e + trap 'error_handler $LINENO "$BASH_COMMAND"' ERR + create_vm + exit $? + fi + # Recovery chose abort/keep — vm_handle_recovery already called exit + exit "$exit_code" + fi + + # Default error handling (outside VM creation phase) local error_message="${RD}[ERROR]${CL} in line ${RD}$line_number${CL}: exit code ${RD}$exit_code${CL}: while executing command ${YW}$command${CL}" post_update_to_api "failed" "${exit_code}" echo -e "\n$error_message\n" @@ -437,74 +459,87 @@ if ! command -v virt-customize &>/dev/null; then fi # ============================================================================== -# IMAGE DOWNLOAD +# VM CREATION FUNCTION (wrapped for smart recovery retry) # ============================================================================== -msg_info "Retrieving the URL for the ${OS_DISPLAY} Qcow2 Disk Image" -URL=$(get_image_url) -CACHE_DIR="/var/lib/vz/template/cache" -CACHE_FILE="$CACHE_DIR/$(basename "$URL")" -mkdir -p "$CACHE_DIR" -msg_ok "${CL}${BL}${URL}${CL}" +create_vm() { -if [[ ! -s "$CACHE_FILE" ]]; then - curl -f#SL -o "$CACHE_FILE" "$URL" - echo -en "\e[1A\e[0K" - msg_ok "Downloaded ${CL}${BL}$(basename "$CACHE_FILE")${CL}" -else - msg_ok "Using cached image ${CL}${BL}$(basename "$CACHE_FILE")${CL}" -fi + # Reset error log for this attempt + VM_ERROR_LOG="/tmp/vm-install-${VMID}.log" + : >"$VM_ERROR_LOG" -# ============================================================================== -# STORAGE TYPE DETECTION -# ============================================================================== -STORAGE_TYPE=$(pvesm status -storage "$STORAGE" | awk 'NR>1 {print $2}') -case $STORAGE_TYPE in -nfs | dir) - DISK_EXT=".qcow2" - DISK_REF="$VMID/" - DISK_IMPORT="--format qcow2" - THIN="" - ;; -btrfs) - DISK_EXT=".raw" - DISK_REF="$VMID/" - DISK_IMPORT="--format raw" - FORMAT=",efitype=4m" - THIN="" - ;; -*) - DISK_EXT="" - DISK_REF="" - DISK_IMPORT="--format raw" - ;; -esac + # ============================================================================== + # IMAGE DOWNLOAD + # ============================================================================== + msg_info "Retrieving the URL for the ${OS_DISPLAY} Qcow2 Disk Image" + URL=$(get_image_url) + CACHE_DIR="/var/lib/vz/template/cache" + CACHE_FILE="$CACHE_DIR/$(basename "$URL")" + mkdir -p "$CACHE_DIR" + msg_ok "${CL}${BL}${URL}${CL}" -# ============================================================================== -# IMAGE CUSTOMIZATION WITH DOCKER -# ============================================================================== -msg_info "Preparing ${OS_DISPLAY} image with Docker" + if [[ ! -s "$CACHE_FILE" ]]; then + curl -f#SL -o "$CACHE_FILE" "$URL" + echo -en "\e[1A\e[0K" + msg_ok "Downloaded ${CL}${BL}$(basename "$CACHE_FILE")${CL}" + else + msg_ok "Using cached image ${CL}${BL}$(basename "$CACHE_FILE")${CL}" + fi -WORK_FILE=$(mktemp --suffix=.qcow2) -cp "$CACHE_FILE" "$WORK_FILE" + # ============================================================================== + # STORAGE TYPE DETECTION + # ============================================================================== + STORAGE_TYPE=$(pvesm status -storage "$STORAGE" | awk 'NR>1 {print $2}') + case $STORAGE_TYPE in + nfs | dir) + DISK_EXT=".qcow2" + DISK_REF="$VMID/" + DISK_IMPORT="--format qcow2" + THIN="" + ;; + btrfs) + DISK_EXT=".raw" + DISK_REF="$VMID/" + DISK_IMPORT="--format raw" + FORMAT=",efitype=4m" + THIN="" + ;; + *) + DISK_EXT="" + DISK_REF="" + DISK_IMPORT="--format raw" + ;; + esac -export LIBGUESTFS_BACKEND_SETTINGS=dns=8.8.8.8,1.1.1.1 + # ============================================================================== + # IMAGE CUSTOMIZATION WITH DOCKER + # ============================================================================== + msg_info "Preparing ${OS_DISPLAY} image with Docker" -DOCKER_PREINSTALLED="no" + WORK_FILE=$(mktemp --suffix=.qcow2) + cp "$CACHE_FILE" "$WORK_FILE" -# Install qemu-guest-agent and Docker during image customization -msg_info "Installing base packages in image" -if virt-customize -a "$WORK_FILE" --install qemu-guest-agent,curl,ca-certificates >/dev/null 2>&1; then - msg_ok "Installed base packages" + export LIBGUESTFS_BACKEND_SETTINGS=dns=8.8.8.8,1.1.1.1 - msg_info "Installing Docker (this may take 2-5 minutes)" - if virt-customize -q -a "$WORK_FILE" --run-command "curl -fsSL https://get.docker.com | sh" >/dev/null 2>&1 && - virt-customize -q -a "$WORK_FILE" --run-command "systemctl enable docker" >/dev/null 2>&1; then - msg_ok "Installed Docker" + DOCKER_PREINSTALLED="no" - msg_info "Configuring Docker daemon" - # Optimize Docker daemon configuration - virt-customize -q -a "$WORK_FILE" --run-command "mkdir -p /etc/docker" >/dev/null 2>&1 - virt-customize -q -a "$WORK_FILE" --run-command 'cat > /etc/docker/daemon.json << EOF + # Install qemu-guest-agent and Docker during image customization + # Skip if recovery set SKIP_VIRT_CUSTOMIZE (virt-customize failed before) + if [[ "${SKIP_VIRT_CUSTOMIZE:-}" == "yes" ]]; then + msg_ok "Skipping virt-customize (using first-boot fallback)" + else + msg_info "Installing base packages in image" + if virt-customize -a "$WORK_FILE" --install qemu-guest-agent,curl,ca-certificates 2>>"$VM_ERROR_LOG" >/dev/null; then + msg_ok "Installed base packages" + + msg_info "Installing Docker (this may take 2-5 minutes)" + if virt-customize -q -a "$WORK_FILE" --run-command "curl -fsSL https://get.docker.com | sh" >/dev/null 2>&1 && + virt-customize -q -a "$WORK_FILE" --run-command "systemctl enable docker" >/dev/null 2>&1; then + msg_ok "Installed Docker" + + msg_info "Configuring Docker daemon" + # Optimize Docker daemon configuration + virt-customize -q -a "$WORK_FILE" --run-command "mkdir -p /etc/docker" >/dev/null 2>&1 + virt-customize -q -a "$WORK_FILE" --run-command 'cat > /etc/docker/daemon.json << EOF { "storage-driver": "overlay2", "log-driver": "json-file", @@ -514,45 +549,46 @@ if virt-customize -a "$WORK_FILE" --install qemu-guest-agent,curl,ca-certificate } } EOF' >/dev/null 2>&1 - DOCKER_PREINSTALLED="yes" - msg_ok "Configured Docker daemon" - else - msg_ok "Docker will be installed on first boot" + DOCKER_PREINSTALLED="yes" + msg_ok "Configured Docker daemon" + else + msg_ok "Docker will be installed on first boot" + fi + else + msg_ok "Packages will be installed on first boot" + fi fi -else - msg_ok "Packages will be installed on first boot" -fi -msg_info "Finalizing image (hostname, SSH config)" -# Set hostname and prepare for unique machine-id -virt-customize -q -a "$WORK_FILE" --hostname "${HN}" >/dev/null 2>&1 -virt-customize -q -a "$WORK_FILE" --run-command "truncate -s 0 /etc/machine-id" >/dev/null 2>&1 -virt-customize -q -a "$WORK_FILE" --run-command "rm -f /var/lib/dbus/machine-id" >/dev/null 2>&1 + msg_info "Finalizing image (hostname, SSH config)" + # Set hostname and prepare for unique machine-id + virt-customize -q -a "$WORK_FILE" --hostname "${HN}" >/dev/null 2>&1 + virt-customize -q -a "$WORK_FILE" --run-command "truncate -s 0 /etc/machine-id" >/dev/null 2>&1 + virt-customize -q -a "$WORK_FILE" --run-command "rm -f /var/lib/dbus/machine-id" >/dev/null 2>&1 -# Configure SSH for Cloud-Init -if [ "$USE_CLOUD_INIT" = "yes" ]; then - virt-customize -q -a "$WORK_FILE" --run-command "sed -i 's/^#*PermitRootLogin.*/PermitRootLogin yes/' /etc/ssh/sshd_config" >/dev/null 2>&1 || true - virt-customize -q -a "$WORK_FILE" --run-command "sed -i 's/^#*PasswordAuthentication.*/PasswordAuthentication yes/' /etc/ssh/sshd_config" >/dev/null 2>&1 || true -else - # Configure auto-login for nocloud images (no Cloud-Init) - virt-customize -q -a "$WORK_FILE" --run-command "mkdir -p /etc/systemd/system/serial-getty@ttyS0.service.d" >/dev/null 2>&1 || true - virt-customize -q -a "$WORK_FILE" --run-command 'cat > /etc/systemd/system/serial-getty@ttyS0.service.d/autologin.conf << EOF + # Configure SSH for Cloud-Init + if [ "$USE_CLOUD_INIT" = "yes" ]; then + virt-customize -q -a "$WORK_FILE" --run-command "sed -i 's/^#*PermitRootLogin.*/PermitRootLogin yes/' /etc/ssh/sshd_config" >/dev/null 2>&1 || true + virt-customize -q -a "$WORK_FILE" --run-command "sed -i 's/^#*PasswordAuthentication.*/PasswordAuthentication yes/' /etc/ssh/sshd_config" >/dev/null 2>&1 || true + else + # Configure auto-login for nocloud images (no Cloud-Init) + virt-customize -q -a "$WORK_FILE" --run-command "mkdir -p /etc/systemd/system/serial-getty@ttyS0.service.d" >/dev/null 2>&1 || true + virt-customize -q -a "$WORK_FILE" --run-command 'cat > /etc/systemd/system/serial-getty@ttyS0.service.d/autologin.conf << EOF [Service] ExecStart= ExecStart=-/sbin/agetty --autologin root --noclear %I \$TERM EOF' >/dev/null 2>&1 || true - virt-customize -q -a "$WORK_FILE" --run-command "mkdir -p /etc/systemd/system/getty@tty1.service.d" >/dev/null 2>&1 || true - virt-customize -q -a "$WORK_FILE" --run-command 'cat > /etc/systemd/system/getty@tty1.service.d/autologin.conf << EOF + virt-customize -q -a "$WORK_FILE" --run-command "mkdir -p /etc/systemd/system/getty@tty1.service.d" >/dev/null 2>&1 || true + virt-customize -q -a "$WORK_FILE" --run-command 'cat > /etc/systemd/system/getty@tty1.service.d/autologin.conf << EOF [Service] ExecStart= ExecStart=-/sbin/agetty --autologin root --noclear %I \$TERM EOF' >/dev/null 2>&1 || true -fi -msg_ok "Finalized image" + fi + msg_ok "Finalized image" -# Create first-boot Docker install script (fallback if virt-customize failed) -if [ "$DOCKER_PREINSTALLED" = "no" ]; then - virt-customize -q -a "$WORK_FILE" --run-command 'cat > /root/install-docker.sh << "DOCKERSCRIPT" + # Create first-boot Docker install script (fallback if virt-customize failed) + if [ "$DOCKER_PREINSTALLED" = "no" ]; then + virt-customize -q -a "$WORK_FILE" --run-command 'cat > /root/install-docker.sh << "DOCKERSCRIPT" #!/bin/bash exec > /var/log/install-docker.log 2>&1 echo "[$(date)] Starting Docker installation" @@ -583,7 +619,7 @@ echo "[$(date)] Docker installation completed" DOCKERSCRIPT chmod +x /root/install-docker.sh' >/dev/null 2>&1 - virt-customize -q -a "$WORK_FILE" --run-command 'cat > /etc/systemd/system/install-docker.service << "DOCKERSERVICE" + virt-customize -q -a "$WORK_FILE" --run-command 'cat > /etc/systemd/system/install-docker.service << "DOCKERSERVICE" [Unit] Description=Install Docker on First Boot After=network-online.target @@ -599,113 +635,123 @@ RemainAfterExit=yes WantedBy=multi-user.target DOCKERSERVICE systemctl enable install-docker.service' >/dev/null 2>&1 -fi + fi -# Resize disk to target size -msg_info "Resizing disk image to ${DISK_SIZE}" -qemu-img resize "$WORK_FILE" "${DISK_SIZE}" >/dev/null 2>&1 -msg_ok "Resized disk image" + # Resize disk to target size + msg_info "Resizing disk image to ${DISK_SIZE}" + qemu-img resize "$WORK_FILE" "${DISK_SIZE}" >/dev/null 2>&1 + msg_ok "Resized disk image" + + # ============================================================================== + # VM CREATION + # ============================================================================== + msg_info "Creating Docker VM shell" + + qm create $VMID -agent 1${MACHINE} -tablet 0 -localtime 1 -bios ovmf${CPU_TYPE} -cores $CORE_COUNT -memory $RAM_SIZE \ + -name $HN -tags community-script -net0 virtio,bridge=$BRG,macaddr=$MAC$VLAN$MTU -onboot 1 -ostype l26 -scsihw virtio-scsi-pci 2>>"$VM_ERROR_LOG" >/dev/null + + msg_ok "Created VM shell" + + # ============================================================================== + # DISK IMPORT + # ============================================================================== + msg_info "Importing disk into storage ($STORAGE)" + + if qm disk import --help >/dev/null 2>&1; then + IMPORT_CMD=(qm disk import) + else + IMPORT_CMD=(qm importdisk) + fi + + IMPORT_OUT="$("${IMPORT_CMD[@]}" "$VMID" "$WORK_FILE" "$STORAGE" ${DISK_IMPORT:-} 2> >(tee -a "$VM_ERROR_LOG") || true)" + DISK_REF_IMPORTED="$(printf '%s\n' "$IMPORT_OUT" | sed -n "s/.*successfully imported disk '\([^']\+\)'.*/\1/p" | tr -d "\r\"'")" + [[ -z "$DISK_REF_IMPORTED" ]] && DISK_REF_IMPORTED="$(pvesm list "$STORAGE" | awk -v id="$VMID" '$5 ~ ("vm-"id"-disk-") {print $1":"$5}' | sort | tail -n1)" + [[ -z "$DISK_REF_IMPORTED" ]] && { + msg_error "Unable to determine imported disk reference." + echo "$IMPORT_OUT" + exit 1 + } + + msg_ok "Imported disk (${CL}${BL}${DISK_REF_IMPORTED}${CL})" + + # Clean up work file + rm -f "$WORK_FILE" + + # ============================================================================== + # VM CONFIGURATION + # ============================================================================== + msg_info "Attaching EFI and root disk" + + qm set "$VMID" \ + --efidisk0 "${STORAGE}:0,efitype=4m" \ + --scsi0 "${DISK_REF_IMPORTED},${DISK_CACHE}${THIN%,}" \ + --boot order=scsi0 \ + --serial0 socket >/dev/null + + qm set $VMID --agent enabled=1 >/dev/null + + msg_ok "Attached EFI and root disk" + + # Set VM description + set_description + + # Cloud-Init configuration + if [ "$USE_CLOUD_INIT" = "yes" ]; then + msg_info "Configuring Cloud-Init" + setup_cloud_init "$VMID" "$STORAGE" "$HN" "yes" + msg_ok "Cloud-Init configured" + fi + + # Start VM + if [ "$START_VM" == "yes" ]; then + msg_info "Starting Docker VM" + qm start $VMID >/dev/null 2>&1 + msg_ok "Started Docker VM" + fi + + # ============================================================================== + # FINAL OUTPUT + # ============================================================================== + VM_IP="" + if [ "$START_VM" == "yes" ]; then + set +e + for i in {1..10}; do + VM_IP=$(qm guest cmd "$VMID" network-get-interfaces 2>/dev/null | + jq -r '.[] | select(.name != "lo") | ."ip-addresses"[]? | select(."ip-address-type" == "ipv4") | ."ip-address"' 2>/dev/null | + grep -v "^127\." | head -1) || true + [ -n "$VM_IP" ] && break + sleep 3 + done + set -e + fi + + echo -e "\n${INFO}${BOLD}${GN}Docker VM Configuration Summary:${CL}" + echo -e "${TAB}${DGN}VM ID: ${BGN}${VMID}${CL}" + echo -e "${TAB}${DGN}Hostname: ${BGN}${HN}${CL}" + echo -e "${TAB}${DGN}OS: ${BGN}${OS_DISPLAY}${CL}" + [ -n "$VM_IP" ] && echo -e "${TAB}${DGN}IP Address: ${BGN}${VM_IP}${CL}" + + if [ "$DOCKER_PREINSTALLED" = "yes" ]; then + echo -e "${TAB}${DGN}Docker: ${BGN}Pre-installed (via get.docker.com)${CL}" + else + echo -e "${TAB}${DGN}Docker: ${BGN}Installing on first boot${CL}" + echo -e "${TAB}${YW}āš ļø Wait 2-3 minutes for installation to complete${CL}" + echo -e "${TAB}${YW}āš ļø Check progress: ${BL}cat /var/log/install-docker.log${CL}" + fi + + if [ "$USE_CLOUD_INIT" = "yes" ]; then + display_cloud_init_info "$VMID" "$HN" 2>/dev/null || true + fi + + post_update_to_api "done" "none" + msg_ok "Completed successfully!\n" + +} # end of create_vm() # ============================================================================== -# VM CREATION +# VM CREATION WITH SMART RECOVERY # ============================================================================== -msg_info "Creating Docker VM shell" - -qm create $VMID -agent 1${MACHINE} -tablet 0 -localtime 1 -bios ovmf${CPU_TYPE} -cores $CORE_COUNT -memory $RAM_SIZE \ - -name $HN -tags community-script -net0 virtio,bridge=$BRG,macaddr=$MAC$VLAN$MTU -onboot 1 -ostype l26 -scsihw virtio-scsi-pci >/dev/null - -msg_ok "Created VM shell" - -# ============================================================================== -# DISK IMPORT -# ============================================================================== -msg_info "Importing disk into storage ($STORAGE)" - -if qm disk import --help >/dev/null 2>&1; then - IMPORT_CMD=(qm disk import) -else - IMPORT_CMD=(qm importdisk) -fi - -IMPORT_OUT="$("${IMPORT_CMD[@]}" "$VMID" "$WORK_FILE" "$STORAGE" ${DISK_IMPORT:-} 2>&1 || true)" -DISK_REF_IMPORTED="$(printf '%s\n' "$IMPORT_OUT" | sed -n "s/.*successfully imported disk '\([^']\+\)'.*/\1/p" | tr -d "\r\"'")" -[[ -z "$DISK_REF_IMPORTED" ]] && DISK_REF_IMPORTED="$(pvesm list "$STORAGE" | awk -v id="$VMID" '$5 ~ ("vm-"id"-disk-") {print $1":"$5}' | sort | tail -n1)" -[[ -z "$DISK_REF_IMPORTED" ]] && { - msg_error "Unable to determine imported disk reference." - echo "$IMPORT_OUT" - exit 1 -} - -msg_ok "Imported disk (${CL}${BL}${DISK_REF_IMPORTED}${CL})" - -# Clean up work file -rm -f "$WORK_FILE" - -# ============================================================================== -# VM CONFIGURATION -# ============================================================================== -msg_info "Attaching EFI and root disk" - -qm set "$VMID" \ - --efidisk0 "${STORAGE}:0,efitype=4m" \ - --scsi0 "${DISK_REF_IMPORTED},${DISK_CACHE}${THIN%,}" \ - --boot order=scsi0 \ - --serial0 socket >/dev/null - -qm set $VMID --agent enabled=1 >/dev/null - -msg_ok "Attached EFI and root disk" - -# Set VM description -set_description - -# Cloud-Init configuration -if [ "$USE_CLOUD_INIT" = "yes" ]; then - msg_info "Configuring Cloud-Init" - setup_cloud_init "$VMID" "$STORAGE" "$HN" "yes" - msg_ok "Cloud-Init configured" -fi - -# Start VM -if [ "$START_VM" == "yes" ]; then - msg_info "Starting Docker VM" - qm start $VMID >/dev/null 2>&1 - msg_ok "Started Docker VM" -fi - -# ============================================================================== -# FINAL OUTPUT -# ============================================================================== -VM_IP="" -if [ "$START_VM" == "yes" ]; then - set +e - for i in {1..10}; do - VM_IP=$(qm guest cmd "$VMID" network-get-interfaces 2>/dev/null | - jq -r '.[] | select(.name != "lo") | ."ip-addresses"[]? | select(."ip-address-type" == "ipv4") | ."ip-address"' 2>/dev/null | - grep -v "^127\." | head -1) || true - [ -n "$VM_IP" ] && break - sleep 3 - done - set -e -fi - -echo -e "\n${INFO}${BOLD}${GN}Docker VM Configuration Summary:${CL}" -echo -e "${TAB}${DGN}VM ID: ${BGN}${VMID}${CL}" -echo -e "${TAB}${DGN}Hostname: ${BGN}${HN}${CL}" -echo -e "${TAB}${DGN}OS: ${BGN}${OS_DISPLAY}${CL}" -[ -n "$VM_IP" ] && echo -e "${TAB}${DGN}IP Address: ${BGN}${VM_IP}${CL}" - -if [ "$DOCKER_PREINSTALLED" = "yes" ]; then - echo -e "${TAB}${DGN}Docker: ${BGN}Pre-installed (via get.docker.com)${CL}" -else - echo -e "${TAB}${DGN}Docker: ${BGN}Installing on first boot${CL}" - echo -e "${TAB}${YW}āš ļø Wait 2-3 minutes for installation to complete${CL}" - echo -e "${TAB}${YW}āš ļø Check progress: ${BL}cat /var/log/install-docker.log${CL}" -fi - -if [ "$USE_CLOUD_INIT" = "yes" ]; then - display_cloud_init_info "$VMID" "$HN" 2>/dev/null || true -fi - -post_update_to_api "done" "none" -msg_ok "Completed successfully!\n" +VM_CREATION_PHASE="yes" +create_vm +VM_CREATION_PHASE="no" +rm -f "$VM_ERROR_LOG" 2>/dev/null || true