#!/bin/bash VERSION="1.1.0" ################### # Timeout Configuration ################### readonly CMD_TIMEOUT=30 # Default timeout in seconds for external commands ################### # Pattern Constants ################### # Virtual/firewall interface patterns to skip readonly VIRTUAL_IFACE_PATTERN="^(veth|fwbr|fwln|fwpr|tap)" # Storage controller patterns for HBA detection readonly STORAGE_CONTROLLER_PATTERN="RAID|SAS|SATA|SCSI|Mass storage|Serial Attached|Fibre Channel|NVMe" # Disk device patterns readonly DISK_DEVICE_PATTERN="^sd|^nvme" # PCI devices to exclude from hardware info readonly EXCLUDED_PCI_PATTERN="Host bridge|PCI bridge|ISA bridge|SMBus|IOMMU|Dummy|USB controller|Audio device|Encryption controller|Multimedia controller" ################### # Color Definitions ################### NC="\033[00m" GREEN="\033[01;32m" RED="\033[01;31m" YELLOW="\033[01;33m" ################### # Utility Functions ################### print_header() { echo " ____ ____ / __ \_________ _ __/ __ \____ _____ / /_/ / ___/ __ \| |/_/ / / / __ \/ ___/ / ____/ / / /_/ /> &2 ERRORS_OCCURRED=$((ERRORS_OCCURRED + 1)) if [[ "$fatal" == "true" ]]; then exit 1 fi } log_message() { local level="$1" local message="$2" case "$level" in info) echo -e "${GREEN}[INFO]${NC} $message" ;; warn) echo -e "${YELLOW}[WARN]${NC} $message" WARNINGS_OCCURRED=$((WARNINGS_OCCURRED + 1)) ;; error) echo -e "${RED}[ERROR]${NC} $message" >&2 ERRORS_OCCURRED=$((ERRORS_OCCURRED + 1)) ;; esac } check_requirements() { log_message info "Checking medical equipment..." local tools=("dmidecode" "lscpu" "ip" "smartctl" "sensors" "lspci" "bc") local missing=() for tool in "${tools[@]}"; do if ! command -v "$tool" >/dev/null 2>&1; then missing+=("$tool") fi done if [[ ${#missing[@]} -gt 0 ]]; then handle_error "Missing tools: ${missing[*]}\n Please install with 'curl -s http://10.10.10.63:3000/LotusGuild/freshStartScript/raw/branch/main/freshStart.sh | bash'" fi } checkIfOnHypervisor() { command -v pveversion >/dev/null 2>&1 } ################### # System Information Functions ################### get_system_info() { echo -e "\n${GREEN}=== System Information ===${NC}" echo -e "\n${GREEN}=== Diagnostic Run: $(date '+%Y-%m-%d %H:%M:%S') ===${NC}" echo -e "${GREEN}Hostname:${NC} $(uname -n)" echo -e "${GREEN}Kernel:${NC} $(uname -r)" if checkIfOnHypervisor; then echo -e "\n${GREEN}=== Proxmox Version ===${NC}" pveversion else echo -e "\n${GREEN}=== OS Information ===${NC}" if [[ -f /etc/os-release ]]; then source /etc/os-release echo -e "${GREEN}Distribution:${NC} $PRETTY_NAME" else echo "OS information not available" fi fi } get_temp_info() { echo -e "\n${GREEN}=== Temperature Information ===${NC}" if command -v sensors >/dev/null 2>&1; then sensors else log_message warn "sensors command not found. Install lm-sensors package for temperature monitoring" fi } get_disk_health() { echo -e "\n${GREEN}=== Disk Health Status ===${NC}" if command -v smartctl >/dev/null 2>&1; then while IFS= read -r disk; do [[ -z "$disk" ]] && continue echo -e "\nChecking /dev/$disk:" if ! timeout $CMD_TIMEOUT smartctl -H "/dev/$disk"; then log_message warn "smartctl timed out or failed for /dev/$disk" fi done < <(lsblk -d -o name | grep -E "$DISK_DEVICE_PATTERN") else log_message warn "smartctl not found. Install smartmontools for disk health monitoring" fi } get_cpu_info() { local cpu_info cpu_cores cpu_mhz cpu_info=$(grep -m 1 -w 'model name' /proc/cpuinfo 2>/dev/null | awk -F: '{print $2}' | xargs) cpu_cores=$(lscpu 2>/dev/null | grep '^CPU(s):' | awk '{print $2}') cpu_mhz=$(lscpu 2>/dev/null | grep 'MHz' | awk '{print $4}') echo -e "${GREEN}CPU Model:${NC} ${cpu_info:-Unknown}" echo -e "${GREEN}CPU Cores:${NC} ${cpu_cores:-Unknown}" echo -e "${GREEN}CPU MHz:${NC} ${cpu_mhz:-Unknown}" } get_ram_info() { local ram_total ram_used ram_free ram_total=$(free -h 2>/dev/null | grep 'Mem:' | awk '{print $2}') ram_used=$(free -h 2>/dev/null | grep 'Mem:' | awk '{print $3}') ram_free=$(free -h 2>/dev/null | grep 'Mem:' | awk '{print $4}') echo -e "${GREEN}Total RAM:${NC} ${ram_total:-Unknown}" echo -e "${GREEN}Used RAM:${NC} ${ram_used:-Unknown}" echo -e "${GREEN}Free RAM:${NC} ${ram_free:-Unknown}" } get_storage_info() { echo -e "${GREEN}Storage Information:${NC}" df -h --output=source,size,used,avail,pcent | grep '^/dev' if command -v zpool >/dev/null 2>&1; then echo -e "\n${GREEN}=== ZFS Pool Status ===${NC}" zpool status fi } get_network_info() { local default_gateway ip_addresses default_gateway=$(ip route 2>/dev/null | grep default | awk '{print $3}') ip_addresses=$(hostname -I 2>/dev/null | xargs) echo -e "${GREEN}Default Gateway:${NC} ${default_gateway:-Not configured}" echo -e "${GREEN}IP Addresses:${NC} ${ip_addresses:-None detected}" } get_detailed_network() { echo -e "\n${GREEN}=== Network Interface Statistics ===${NC}" local iface while IFS= read -r iface; do [[ -z "$iface" ]] && continue ip -s link show "$iface" 2>/dev/null done < <(get_physical_interfaces) echo -e "\n${GREEN}=== Network Statistics ===${NC}" if command -v ss >/dev/null 2>&1; then ss -s elif command -v netstat >/dev/null 2>&1; then netstat -i else log_message warn "netstat/ss not found for network statistics" fi } get_hardware_info() { echo -e "${GREEN}BIOS Version:${NC} $(dmidecode -s bios-version)" echo -e "\n${GREEN}=== PCI Devices ===${NC}" # Show interesting devices, exclude bridges, infrastructure, and integrated motherboard devices lspci | grep -v -E "$EXCLUDED_PCI_PATTERN" } get_motherboard_info() { echo -e "\n${GREEN}=== Motherboard Information ===${NC}" echo -e "${GREEN}Manufacturer:${NC} $(dmidecode -s baseboard-manufacturer)" echo -e "${GREEN}Product Name:${NC} $(dmidecode -s baseboard-product-name)" echo -e "${GREEN}Version:${NC} $(dmidecode -s baseboard-version)" echo -e "${GREEN}Serial Number:${NC} $(dmidecode -s baseboard-serial-number)" echo -e "${GREEN}System Manufacturer:${NC} $(dmidecode -s system-manufacturer)" echo -e "${GREEN}System Product:${NC} $(dmidecode -s system-product-name)" echo -e "${GREEN}System Serial:${NC} $(dmidecode -s system-serial-number)" } get_memory_details() { echo -e "\n${GREEN}=== Memory DIMM Information ===${NC}" # Use a more robust parsing approach local locator size type speed manufacturer local in_device=false # Print header printf "%-12s %-12s %-10s %-12s %-20s\n" "Slot" "Size" "Type" "Speed" "Manufacturer" printf "%-12s %-12s %-10s %-12s %-20s\n" "----" "----" "----" "-----" "------------" while IFS= read -r line; do # Detect start of a memory device section if [[ "$line" =~ ^Memory[[:space:]]Device ]]; then # If we have data from previous device, print it if [[ -n "$locator" && -n "$size" && ! "$size" =~ (No|Not|Installed) ]]; then printf "%-12s %-12s %-10s %-12s %-20s\n" \ "${locator:-N/A}" \ "${size:-N/A}" \ "${type:-N/A}" \ "${speed:-N/A}" \ "${manufacturer:-N/A}" fi # Reset variables for new device locator="" size="" type="" speed="" manufacturer="" in_device=true continue fi # Skip if not in a device section [[ "$in_device" != true ]] && continue # Parse fields (case-insensitive, flexible whitespace) if [[ "$line" =~ ^[[:space:]]*Locator:[[:space:]]*(.+)$ ]] && [[ ! "$line" =~ Bank ]]; then locator="${BASH_REMATCH[1]}" locator="${locator// /_}" # Replace spaces with underscores elif [[ "$line" =~ ^[[:space:]]*Size:[[:space:]]*(.+)$ ]]; then size="${BASH_REMATCH[1]}" elif [[ "$line" =~ ^[[:space:]]*Type:[[:space:]]*(.+)$ ]]; then type="${BASH_REMATCH[1]}" # Skip if it's an error or unknown type [[ "$type" =~ (Unknown|Error|Correction) ]] && type="" elif [[ "$line" =~ ^[[:space:]]*Speed:[[:space:]]*(.+)$ ]]; then speed="${BASH_REMATCH[1]}" [[ "$speed" =~ Unknown ]] && speed="" elif [[ "$line" =~ ^[[:space:]]*Manufacturer:[[:space:]]*(.+)$ ]]; then manufacturer="${BASH_REMATCH[1]}" [[ "$manufacturer" =~ (Unknown|NO DIMM) ]] && manufacturer="" fi # Empty line marks end of device section if [[ -z "$line" ]]; then in_device=false fi done < <(dmidecode -t memory 2>/dev/null) # Print last device if it has data if [[ -n "$locator" && -n "$size" && ! "$size" =~ (No|Not|Installed) ]]; then printf "%-12s %-12s %-10s %-12s %-20s\n" \ "${locator:-N/A}" \ "${size:-N/A}" \ "${type:-N/A}" \ "${speed:-N/A}" \ "${manufacturer:-N/A}" fi # Memory summary echo -e "\n${GREEN}Memory Summary:${NC}" # Count slots more reliably local total_slots=0 local populated=0 while IFS= read -r line; do if [[ "$line" =~ ^[[:space:]]*Locator: ]] && [[ ! "$line" =~ Bank ]]; then ((total_slots++)) fi done < <(dmidecode -t memory 2>/dev/null) while IFS= read -r line; do if [[ "$line" =~ ^[[:space:]]*Size:[[:space:]]*(.+)$ ]]; then local size_val="${BASH_REMATCH[1]}" if [[ ! "$size_val" =~ (No|Not|Installed) ]]; then ((populated++)) fi fi done < <(dmidecode -t memory 2>/dev/null) echo -e " Total Slots: $total_slots" echo -e " Populated: $populated" # Get max capacity local max_capacity max_capacity=$(dmidecode -t memory 2>/dev/null | grep -i "Maximum Capacity" | head -1 | sed 's/.*: //') echo -e " Max Capacity: ${max_capacity:-Unknown}" } get_nic_details() { echo -e "\n${GREEN}=== Network Interface Details ===${NC}" local iface while IFS= read -r iface; do [[ -z "$iface" ]] && continue echo -e "\n${GREEN}Interface: $iface${NC}" # Get driver info if [[ -L "/sys/class/net/$iface/device/driver" ]]; then local driver driver=$(basename "$(readlink "/sys/class/net/$iface/device/driver")") echo -e " Driver: $driver" fi # Get MAC address if [[ -f "/sys/class/net/$iface/address" ]]; then echo -e " MAC: $(cat "/sys/class/net/$iface/address")" fi # Get link state if [[ -f "/sys/class/net/$iface/operstate" ]]; then echo -e " State: $(cat "/sys/class/net/$iface/operstate")" fi # Use ethtool if available if command -v ethtool >/dev/null 2>&1; then # Get speed and duplex local link_info link_info=$(ethtool "$iface" 2>/dev/null | grep -E "Speed:|Duplex:|Link detected:") if [[ -n "$link_info" ]]; then echo "$link_info" | while IFS= read -r line; do echo -e " $line" done fi # Get firmware version local fw_ver fw_ver=$(ethtool -i "$iface" 2>/dev/null | grep "firmware-version" | awk '{print $2}') if [[ -n "$fw_ver" ]]; then echo -e " Firmware: $fw_ver" fi fi done < <(get_physical_interfaces) } get_physical_interfaces() { local iface for iface in /sys/class/net/*; do # Skip if glob didn't match anything [[ -e "$iface" ]] || continue # Get just the interface name iface=$(basename "$iface") # Skip loopback [[ "$iface" == "lo" ]] && continue # Skip virtual/firewall interfaces [[ "$iface" =~ $VIRTUAL_IFACE_PATTERN ]] && continue # This is a physical interface echo "$iface" done } get_hba_info() { echo -e "\n${GREEN}=== HBA/Storage Controller Information ===${NC}" # Find RAID, SAS, SATA, SCSI, and storage controllers lspci -vmm 2>/dev/null | awk -v pattern="$STORAGE_CONTROLLER_PATTERN" ' BEGIN { RS=""; FS="\n" } $0 ~ pattern { for (i=1; i<=NF; i++) { if ($i ~ /^Slot:/) slot = substr($i, 7) if ($i ~ /^Class:/) class = substr($i, 8) if ($i ~ /^Vendor:/) vendor = substr($i, 9) if ($i ~ /^Device:/) device = substr($i, 9) if ($i ~ /^Rev:/) rev = substr($i, 6) } printf "\n%s\n", slot printf " Class: %s\n", class printf " Vendor: %s\n", vendor printf " Device: %s\n", device if (rev) printf " Rev: %s\n", rev slot=""; class=""; vendor=""; device=""; rev="" } ' # Show detailed info for storage controllers echo -e "\n${GREEN}=== Storage Controller Details ===${NC}" for ctrl in $(lspci | grep -iE "$STORAGE_CONTROLLER_PATTERN" | awk '{print $1}'); do echo -e "\n${GREEN}Controller $ctrl:${NC}" lspci -vvs "$ctrl" 2>/dev/null | grep -E "^\s+(Subsystem|LnkSta|Kernel driver)" | head -5 done } get_system_status() { echo -e "\n${GREEN}=== System Load ===${NC}" uptime echo -e "\n${GREEN}=== Service Status ===${NC}" systemctl list-units --type=service --state=running | wc -l echo -e "\n${GREEN}=== Recent System Errors ===${NC}" journalctl -p err -n 5 --no-pager } ################### # DriveAtlas & Monitoring Functions ################### get_drive_atlas() { echo -e "\n${GREEN}=== Drive Atlas - Physical Bay Mapping ===${NC}" if command -v curl >/dev/null 2>&1; then if ! bash <(curl -sL "http://10.10.10.63:3000/LotusGuild/driveAtlas/raw/branch/main/driveAtlas.sh") 2>/dev/null; then log_message warn "DriveAtlas failed to execute or server unavailable" fi else log_message warn "curl not installed - cannot fetch DriveAtlas" fi } get_ceph_health() { echo -e "\n${GREEN}=== Ceph Cluster Health ===${NC}" if command -v ceph >/dev/null 2>&1; then echo -e "${GREEN}Health Status:${NC}" timeout $CMD_TIMEOUT ceph health detail 2>/dev/null || log_message warn "Cannot connect to Ceph cluster or timed out" echo -e "\n${GREEN}=== Ceph OSD Tree ===${NC}" timeout $CMD_TIMEOUT ceph osd tree 2>/dev/null || log_message warn "Ceph OSD tree timed out" echo -e "\n${GREEN}=== Ceph Pool Usage ===${NC}" timeout $CMD_TIMEOUT ceph df 2>/dev/null || log_message warn "Ceph df timed out" echo -e "\n${GREEN}=== Ceph OSD Usage ===${NC}" timeout $CMD_TIMEOUT ceph osd df 2>/dev/null || log_message warn "Ceph OSD df timed out" else log_message info "Ceph tools not installed on this node" fi } get_node_exporter_status() { echo -e "\n${GREEN}=== Node Exporter Status ===${NC}" if systemctl is-active --quiet node_exporter 2>/dev/null; then echo -e "${GREEN}Service:${NC} Running" local ip=$(hostname -I | awk '{print $1}') echo -e "${GREEN}Metrics URL:${NC} http://${ip}:9100/metrics" if ss -tlnp 2>/dev/null | grep -q ':9100'; then echo -e "${GREEN}Port 9100:${NC} Listening" else log_message warn "Port 9100 not listening" fi elif systemctl list-unit-files 2>/dev/null | grep -q node_exporter; then log_message warn "Node Exporter is installed but not running" echo -e "Start with: systemctl start node_exporter" else log_message info "Node Exporter not installed" fi } get_hwmon_status() { echo -e "\n${GREEN}=== hwmon Daemon Status ===${NC}" if systemctl is-active --quiet hwmon.timer 2>/dev/null; then echo -e "${GREEN}Timer:${NC} Active" systemctl list-timers hwmon.timer --no-pager 2>/dev/null echo -e "\n${GREEN}Last Run:${NC}" journalctl -u hwmon.service -n 3 --no-pager 2>/dev/null || true elif systemctl list-unit-files 2>/dev/null | grep -q hwmon.timer; then log_message warn "hwmon timer is installed but not active" echo -e "Enable with: systemctl enable --now hwmon.timer" else log_message info "hwmon daemon not installed" fi } # Valid check names for selective mode readonly VALID_CHECKS="cpu ram memory storage disk network hardware temps services ceph vms containers" run_selective_checks() { local checks="$1" if [[ -z "$checks" ]]; then log_message error "No checks specified. Use --checks=cpu,ram,disk" echo "Valid checks: $VALID_CHECKS" exit 1 fi # Validate check names IFS=',' read -ra check_array <<< "$checks" for check in "${check_array[@]}"; do if [[ ! " $VALID_CHECKS " =~ " $check " ]]; then log_message error "Unknown check: $check" echo "Valid checks: $VALID_CHECKS" exit 1 fi done log_message info "Running selective checks: $checks" echo "" for check in "${check_array[@]}"; do case "$check" in cpu) log_message info "Checking CPU..."; get_cpu_info ;; ram) log_message info "Checking RAM..."; get_ram_info ;; memory) log_message info "Checking memory details..."; get_memory_details ;; storage) log_message info "Checking storage..."; get_storage_info ;; disk) log_message info "Checking disk health..."; get_disk_health ;; network) log_message info "Checking network..."; get_network_info; get_detailed_network; get_nic_details ;; hardware) log_message info "Checking hardware..."; get_hardware_info; get_motherboard_info; get_hba_info ;; temps) log_message info "Checking temperatures..."; get_temp_info ;; services) log_message info "Checking services..."; check_services ;; ceph) log_message info "Checking Ceph..."; get_ceph_health ;; vms) log_message info "Checking VMs..."; list_vms ;; containers) log_message info "Checking containers..."; list_containers ;; esac done echo "" log_message info "Selective checks complete" } quick_health_check() { echo -e "\n${GREEN}=== Quick Health Check ===${NC}" echo -e "Running quick health assessment...\n" # Services check_services # Temperatures get_temp_info # Disk health (quick) echo -e "\n${GREEN}=== Disk Health Summary ===${NC}" if command -v smartctl >/dev/null 2>&1; then while IFS= read -r disk; do [[ -z "$disk" ]] && continue health=$(timeout $CMD_TIMEOUT smartctl -H "/dev/$disk" 2>/dev/null | grep -i "health" | awk -F: '{print $2}' | xargs) if [[ -n "$health" ]]; then echo -e "/dev/$disk: $health" else echo -e "/dev/$disk: ${YELLOW}check timed out or unavailable${NC}" fi done < <(lsblk -d -o name | grep -E "$DISK_DEVICE_PATTERN") fi # Node Exporter get_node_exporter_status # Ceph quick status if command -v ceph >/dev/null 2>&1; then echo -e "\n${GREEN}=== Ceph Quick Status ===${NC}" ceph health 2>/dev/null || true fi } ################### # Proxmox Specific Functions ################### check_services() { if ! checkIfOnHypervisor; then log_message warn "Not on Proxmox - skipping Proxmox service checks" return 0 fi echo -e "${GREEN}Checking critical services:${NC}" local services=("pvedaemon" "pveproxy" "pvecluster" "pve-cluster" "corosync") for service in "${services[@]}"; do local status status=$(systemctl is-active "$service" 2>/dev/null || echo "not-found") echo -e "${GREEN}$service:${NC} $status" done } check_pve_version() { local min_version="6.0" local current_version=$(pveversion | grep -oP 'pve-manager/\K[0-9]+\.[0-9]+' || echo "0.0") if (( $(echo "$current_version < $min_version" | bc -l) )); then log_message warn "Proxmox VE version $current_version may not support all features" fi } list_vms() { if ! checkIfOnHypervisor; then log_message info "Not on Proxmox - skipping VM list" return 0 fi if command -v qm >/dev/null 2>&1; then echo -e "${GREEN}Virtual Machine Status:${NC}" qm list else log_message warn "qm command not found" fi } list_containers() { if ! checkIfOnHypervisor; then log_message info "Not on Proxmox - skipping container list" return 0 fi if command -v pct >/dev/null 2>&1; then echo -e "\n${GREEN}=== LXC Container Status ===${NC}" pct list else log_message warn "pct command not found" fi } ################### # Command Line Interface Functions ################### help() { echo "ProxDoc - The Proxmox System Doctor v${VERSION}" echo "" echo "Usage: curl -sL \"http://10.10.10.63:3000/LotusGuild/proxDoc/raw/branch/main/proxDoc.sh\" | bash -s -- [OPTION]" echo "" echo "A comprehensive diagnostic tool for Proxmox server health checks." echo "" echo "Treatment Options:" echo " --help Show this prescription guide" echo " --diags Perform full system examination" echo " --quick Quick health check (services, temps, disks)" echo " --drives Show physical drive bay mapping (DriveAtlas)" echo " --ceph Check Ceph cluster health" echo " --node-exporter Check Node Exporter status" echo " --hwmon Check hwmon daemon status" echo " --services Check vital Proxmox services" echo " --vm-list Check VM vitals" echo " --ct-list Check container vitals" echo " --backup Review backup health" echo " --checks=LIST Run only specific checks (comma-separated)" echo "" echo "Valid checks for --checks option:" echo " cpu, ram, memory, storage, disk, network, hardware, temps," echo " services, ceph, vms, containers" echo "" echo "Examples:" echo " Run full diagnostics:" echo " curl -sL \"http://10.10.10.63:3000/LotusGuild/proxDoc/raw/branch/main/proxDoc.sh\" | bash -s -- --diags" echo "" echo " Quick health check:" echo " curl -sL \"http://10.10.10.63:3000/LotusGuild/proxDoc/raw/branch/main/proxDoc.sh\" | bash -s -- --quick" echo "" echo " Check only services and VMs:" echo " curl -sL \"http://10.10.10.63:3000/LotusGuild/proxDoc/raw/branch/main/proxDoc.sh\" | bash -s -- --services" echo " curl -sL \"http://10.10.10.63:3000/LotusGuild/proxDoc/raw/branch/main/proxDoc.sh\" | bash -s -- --vm-list" echo "" echo " View drive bay mapping:" echo " curl -sL \"http://10.10.10.63:3000/LotusGuild/proxDoc/raw/branch/main/proxDoc.sh\" | bash -s -- --drives" echo "" echo " Check Ceph cluster health:" echo " curl -sL \"http://10.10.10.63:3000/LotusGuild/proxDoc/raw/branch/main/proxDoc.sh\" | bash -s -- --ceph" echo "" echo " Run only CPU and RAM checks:" echo " curl -sL \"http://10.10.10.63:3000/LotusGuild/proxDoc/raw/branch/main/proxDoc.sh\" | bash -s -- --checks=cpu,ram" exit 0 } ################### # Main Functions ################### runDiags() { log_message info "Beginning system examination..." # Check if running on Proxmox local is_proxmox=false if checkIfOnHypervisor; then is_proxmox=true log_message info "Detected Proxmox VE hypervisor" else log_message warn "Not running on Proxmox VE - some checks will be skipped" fi echo "" log_message info "Checking system information..." get_system_info log_message info "Checking CPU..." get_cpu_info log_message info "Checking RAM..." get_ram_info log_message info "Checking memory details..." get_memory_details log_message info "Checking storage..." get_storage_info log_message info "Checking drive atlas..." get_drive_atlas log_message info "Checking network..." get_network_info get_detailed_network get_nic_details log_message info "Checking hardware..." get_hardware_info get_motherboard_info get_hba_info log_message info "Checking temperatures..." get_temp_info log_message info "Checking system status..." get_system_status log_message info "Checking monitoring services..." get_node_exporter_status get_hwmon_status # Only run Proxmox-specific checks if on Proxmox if [[ "$is_proxmox" == true ]]; then log_message info "Checking Ceph cluster..." get_ceph_health log_message info "Checking VMs..." list_vms log_message info "Checking containers..." list_containers fi echo "" log_message info "Examination complete" # Print summary if there were issues if [[ $WARNINGS_OCCURRED -gt 0 || $ERRORS_OCCURRED -gt 0 ]]; then echo -e "\n${YELLOW}=== Summary ===${NC}" [[ $WARNINGS_OCCURRED -gt 0 ]] && echo -e "Warnings: $WARNINGS_OCCURRED" [[ $ERRORS_OCCURRED -gt 0 ]] && echo -e "Errors: $ERRORS_OCCURRED" fi } # Whitelist of valid command options readonly VALID_OPTIONS="--help --diags --quick --drives --ceph --node-exporter --hwmon --services --vm-list --ct-list --backup --checks" validate_input() { local input="$1" # Check if input matches valid option pattern (starts with -- and contains only alphanumeric, hyphens, equals, commas) if [[ ! "$input" =~ ^--[a-z][-a-z=,]*$ ]]; then return 1 fi # Extract the option name (before any = sign) local opt_name="${input%%=*}" # Check against whitelist if [[ ! " $VALID_OPTIONS " =~ " $opt_name " ]]; then return 1 fi return 0 } checkForInput() { local input="$1" # Validate input against whitelist if ! validate_input "$input"; then echo -e "${RED}Invalid option: $input${NC}" echo -e "Use --help to see available options." exit 1 fi # Extract option name and value for --checks=X pattern local opt_name="${input%%=*}" local opt_value="${input#*=}" [[ "$opt_name" == "$opt_value" ]] && opt_value="" case "$opt_name" in --help) help ;; --diags) check_requirements; runDiags ;; --quick) quick_health_check ;; --drives) get_drive_atlas ;; --ceph) get_ceph_health ;; --node-exporter) get_node_exporter_status ;; --hwmon) get_hwmon_status ;; --services) check_services ;; --vm-list) list_vms ;; --ct-list) list_containers ;; --backup) echo -e "${GREEN}Backup Status:${NC}"; pvesm status 2>/dev/null || log_message warn "pvesm not available" ;; --checks) run_selective_checks "$opt_value" ;; esac } ################### # Script Execution ################### argOne=$1 # Show header print_header # Check root if [[ $EUID -ne 0 ]]; then handle_error "This script must be run as root" fi # Set trap for cleanup and interrupts trap cleanup EXIT trap 'echo -e "\n${RED}Script interrupted by user.${NC}"; exit 130' INT trap 'echo -e "\n${RED}Script terminated.${NC}"; exit 143' TERM if [[ -n $argOne ]]; then checkForInput "$argOne" else help fi