2025-01-01 18:28:45 -05:00
|
|
|
#!/bin/bash
|
|
|
|
|
|
2026-01-24 17:18:53 -05:00
|
|
|
VERSION="1.1.0"
|
2025-01-01 18:28:45 -05:00
|
|
|
SPINNER="/-\|"
|
|
|
|
|
|
|
|
|
|
###################
|
|
|
|
|
# Color Definitions
|
|
|
|
|
###################
|
|
|
|
|
NC="\033[00m"
|
|
|
|
|
GREEN="\033[01;32m"
|
|
|
|
|
RED="\033[01;31m"
|
|
|
|
|
YELLOW="\033[01;33m"
|
|
|
|
|
|
|
|
|
|
###################
|
|
|
|
|
# Utility Functions
|
|
|
|
|
###################
|
|
|
|
|
|
|
|
|
|
print_header() {
|
|
|
|
|
echo "
|
|
|
|
|
____ ____
|
|
|
|
|
/ __ \_________ _ __/ __ \____ _____
|
|
|
|
|
/ /_/ / ___/ __ \| |/_/ / / / __ \/ ___/
|
|
|
|
|
/ ____/ / / /_/ /> </ /_/ / /_/ / /__
|
|
|
|
|
/_/ /_/ \____/_/|_/_____/\____/\___/
|
|
|
|
|
|
|
|
|
|
The Proxmox System Doctor
|
|
|
|
|
Version ${VERSION}
|
|
|
|
|
======================================"
|
|
|
|
|
echo "Started at: $(date '+%Y-%m-%d %H:%M:%S')"
|
|
|
|
|
echo "Examining patient: $(hostname)"
|
|
|
|
|
echo "======================================"
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
handle_error() {
|
|
|
|
|
echo -e "${RED}Error: $1${NC}"
|
|
|
|
|
exit 1
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
log_message() {
|
|
|
|
|
local level=$1
|
|
|
|
|
local message=$2
|
|
|
|
|
case $level in
|
|
|
|
|
info) echo -e "${GREEN}[INFO]${NC} $message" ;;
|
|
|
|
|
warn) echo -e "${YELLOW}[WARN]${NC} $message" ;;
|
|
|
|
|
error) echo -e "${RED}[ERROR]${NC} $message" ;;
|
|
|
|
|
esac
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
show_progress() {
|
|
|
|
|
local pid=$1
|
|
|
|
|
local delay=0.1
|
|
|
|
|
local spinstr='|/-\'
|
|
|
|
|
while ps -p $pid > /dev/null; do
|
|
|
|
|
local temp=${spinstr#?}
|
|
|
|
|
printf " [%c] " "$spinstr"
|
|
|
|
|
local spinstr=$temp${spinstr%"$temp"}
|
|
|
|
|
sleep $delay
|
|
|
|
|
printf "\b\b\b\b\b\b"
|
|
|
|
|
done
|
|
|
|
|
printf " \b\b\b\b"
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
check_requirements() {
|
|
|
|
|
log_message info "Checking medical equipment..."
|
2026-01-24 17:18:53 -05:00
|
|
|
local tools=("dmidecode" "lscpu" "ip" "smartctl" "sensors" "lspci")
|
2025-01-01 18:28:45 -05:00
|
|
|
for tool in "${tools[@]}"; do
|
|
|
|
|
if ! command -v "$tool" >/dev/null 2>&1; then
|
|
|
|
|
handle_error "Required instrument '$tool' is missing"
|
|
|
|
|
fi
|
|
|
|
|
done
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
checkIfOnHypervisor() {
|
|
|
|
|
if ! command -v pveversion >/dev/null 2>&1; then
|
|
|
|
|
return 1
|
|
|
|
|
fi
|
|
|
|
|
return 0
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
###################
|
|
|
|
|
# System Information Functions
|
|
|
|
|
###################
|
|
|
|
|
get_system_info() {
|
|
|
|
|
echo -e "\n${GREEN}=== System Information ===${NC}"
|
|
|
|
|
echo -e "\n${GREEN}=== Diagnostic Run: $(date '+%Y-%m-%d %H:%M:%S') ===${NC}"
|
|
|
|
|
echo -e "${GREEN}Hostname:$(uname -n)${NC}"
|
|
|
|
|
echo -e "${GREEN}Kernel:$(uname -r)${NC}"
|
|
|
|
|
echo -e "\n${GREEN}=== Proxmox Version ===${NC}"
|
|
|
|
|
pveversion || echo "Not available"
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
get_temp_info() {
|
|
|
|
|
echo -e "\n${GREEN}=== Temperature Information ===${NC}"
|
|
|
|
|
if command -v sensors >/dev/null 2>&1; then
|
|
|
|
|
sensors
|
|
|
|
|
else
|
|
|
|
|
log_message warn "sensors command not found. Install lm-sensors package for temperature monitoring"
|
|
|
|
|
fi
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
get_disk_health() {
|
|
|
|
|
echo -e "\n${GREEN}=== Disk Health Status ===${NC}"
|
|
|
|
|
if command -v smartctl >/dev/null 2>&1; then
|
|
|
|
|
for disk in $(lsblk -d -o name | grep -E '^sd|^nvme'); do
|
|
|
|
|
echo -e "\nChecking /dev/$disk:"
|
|
|
|
|
smartctl -H /dev/$disk
|
|
|
|
|
done
|
|
|
|
|
else
|
|
|
|
|
log_message warn "smartctl not found. Install smartmontools for disk health monitoring"
|
|
|
|
|
fi
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
get_cpu_info() {
|
|
|
|
|
cpu_info=$(grep -m 1 -w 'model name' /proc/cpuinfo | awk -F: '{print $2}' | xargs) || {
|
|
|
|
|
echo -e "${RED}Failed to retrieve CPU model information.${NC}"
|
|
|
|
|
}
|
|
|
|
|
cpu_cores=$(lscpu | grep '^CPU(s):' | awk '{print $2}')
|
|
|
|
|
cpu_mhz=$(lscpu | grep 'MHz' | awk '{print $4}')
|
|
|
|
|
|
|
|
|
|
echo -e "${GREEN}CPU Model:${NC} $cpu_info"
|
|
|
|
|
echo -e "${GREEN}CPU Cores:${NC} $cpu_cores"
|
|
|
|
|
echo -e "${GREEN}CPU MHz:${NC} $cpu_mhz"
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
get_ram_info() {
|
|
|
|
|
ram_total=$(free -h | grep 'Mem:' | awk '{print $2}')
|
|
|
|
|
ram_used=$(free -h | grep 'Mem:' | awk '{print $3}')
|
|
|
|
|
ram_free=$(free -h | grep 'Mem:' | awk '{print $4}')
|
|
|
|
|
|
|
|
|
|
echo -e "${GREEN}Total RAM:${NC} $ram_total"
|
|
|
|
|
echo -e "${GREEN}Used RAM:${NC} $ram_used"
|
|
|
|
|
echo -e "${GREEN}Free RAM:${NC} $ram_free"
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
get_storage_info() {
|
|
|
|
|
echo -e "${GREEN}Storage Information:${NC}"
|
|
|
|
|
df -h --output=source,size,used,avail,pcent | grep '^/dev'
|
|
|
|
|
|
|
|
|
|
if command -v zpool >/dev/null 2>&1; then
|
|
|
|
|
echo -e "\n${GREEN}=== ZFS Pool Status ===${NC}"
|
|
|
|
|
zpool status
|
|
|
|
|
fi
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
get_network_info() {
|
|
|
|
|
default_gateway=$(ip route | grep default | awk '{print $3}')
|
|
|
|
|
ip_addresses=$(hostname -I | xargs)
|
|
|
|
|
echo -e "${GREEN}Default Gateway:${NC} $default_gateway"
|
|
|
|
|
echo -e "${GREEN}IP Addresses:${NC} $ip_addresses"
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
get_detailed_network() {
|
2026-01-24 17:18:53 -05:00
|
|
|
echo -e "\n${GREEN}=== Network Interface Statistics ===${NC}"
|
2026-01-24 21:34:17 -05:00
|
|
|
# Show only physical interfaces and bridges, skip veth* (container interfaces)
|
|
|
|
|
for iface in $(ls /sys/class/net | grep -v lo | grep -v "^veth"); do
|
|
|
|
|
ip -s link show "$iface" 2>/dev/null
|
|
|
|
|
done
|
2025-01-01 18:28:45 -05:00
|
|
|
echo -e "\n${GREEN}=== Network Statistics ===${NC}"
|
2026-01-24 21:34:17 -05:00
|
|
|
if command -v ss >/dev/null 2>&1; then
|
2026-01-24 17:18:53 -05:00
|
|
|
ss -s
|
2026-01-24 21:34:17 -05:00
|
|
|
elif command -v netstat >/dev/null 2>&1; then
|
|
|
|
|
netstat -i
|
2026-01-24 17:18:53 -05:00
|
|
|
else
|
|
|
|
|
log_message warn "netstat/ss not found for network statistics"
|
|
|
|
|
fi
|
2025-01-01 18:28:45 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
get_hardware_info() {
|
|
|
|
|
echo -e "${GREEN}BIOS Version:${NC} $(dmidecode -s bios-version)"
|
|
|
|
|
echo -e "\n${GREEN}=== PCI Devices ===${NC}"
|
2026-01-24 21:34:17 -05:00
|
|
|
# Show all interesting devices, exclude bridges and internal infrastructure
|
|
|
|
|
lspci | grep -v -E "Host bridge|PCI bridge|ISA bridge|SMBus|IOMMU|Dummy"
|
2025-01-01 18:28:45 -05:00
|
|
|
}
|
|
|
|
|
|
2026-01-24 17:18:53 -05:00
|
|
|
get_motherboard_info() {
|
|
|
|
|
echo -e "\n${GREEN}=== Motherboard Information ===${NC}"
|
|
|
|
|
echo -e "${GREEN}Manufacturer:${NC} $(dmidecode -s baseboard-manufacturer)"
|
|
|
|
|
echo -e "${GREEN}Product Name:${NC} $(dmidecode -s baseboard-product-name)"
|
|
|
|
|
echo -e "${GREEN}Version:${NC} $(dmidecode -s baseboard-version)"
|
|
|
|
|
echo -e "${GREEN}Serial Number:${NC} $(dmidecode -s baseboard-serial-number)"
|
|
|
|
|
echo -e "${GREEN}System Manufacturer:${NC} $(dmidecode -s system-manufacturer)"
|
|
|
|
|
echo -e "${GREEN}System Product:${NC} $(dmidecode -s system-product-name)"
|
|
|
|
|
echo -e "${GREEN}System Serial:${NC} $(dmidecode -s system-serial-number)"
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
get_memory_details() {
|
|
|
|
|
echo -e "\n${GREEN}=== Memory DIMM Information ===${NC}"
|
|
|
|
|
dmidecode -t memory | awk '
|
|
|
|
|
/Memory Device/,/^$/ {
|
|
|
|
|
if (/Size:/ && !/No Module Installed/) {
|
|
|
|
|
size=$2" "$3
|
|
|
|
|
}
|
|
|
|
|
if (/Type:/ && !/Unknown/ && !/Error/) {
|
|
|
|
|
type=$2
|
|
|
|
|
}
|
|
|
|
|
if (/Speed:/ && !/Unknown/ && $2 != "Unknown") {
|
|
|
|
|
speed=$2" "$3
|
|
|
|
|
}
|
|
|
|
|
if (/Manufacturer:/ && !/Unknown/ && $2 != "Unknown") {
|
|
|
|
|
mfr=$2
|
|
|
|
|
}
|
|
|
|
|
if (/Part Number:/ && !/Unknown/) {
|
|
|
|
|
part=$3
|
|
|
|
|
}
|
|
|
|
|
if (/Locator:/ && !/Bank/) {
|
|
|
|
|
loc=$2
|
|
|
|
|
if (size && size !~ /No/) {
|
|
|
|
|
printf "%-12s %-10s %-8s %-12s %-20s\n", loc, size, type, speed, mfr
|
|
|
|
|
size=""; type=""; speed=""; mfr=""; part=""
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
'
|
|
|
|
|
echo -e "\n${GREEN}Memory Summary:${NC}"
|
2026-01-24 21:29:14 -05:00
|
|
|
# Count actual DIMM slots by looking for Locator entries with slot-like names (DIMM, BANK, ChannelA, etc.)
|
|
|
|
|
# Filter out Bank Locator lines and count unique slot names
|
|
|
|
|
local total_slots=$(dmidecode -t memory | grep -E "^\s+Locator:" | grep -v "Bank Locator" | wc -l)
|
|
|
|
|
# Count populated slots - those with actual size values (not "No Module Installed" or "Not Installed")
|
|
|
|
|
local populated=$(dmidecode -t memory | grep -E "^\s+Size:" | grep -v -E "No Module|Not Installed" | wc -l)
|
2026-01-24 21:24:13 -05:00
|
|
|
echo -e " Total Slots: $total_slots"
|
|
|
|
|
echo -e " Populated: $populated"
|
2026-01-24 17:18:53 -05:00
|
|
|
echo -e " Max Capacity: $(dmidecode -t memory | grep "Maximum Capacity" | head -1 | awk '{print $3" "$4}')"
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
get_nic_details() {
|
|
|
|
|
echo -e "\n${GREEN}=== Network Interface Details ===${NC}"
|
2026-01-24 21:34:17 -05:00
|
|
|
# Show only physical interfaces and bridges, skip veth* (container interfaces)
|
|
|
|
|
for iface in $(ls /sys/class/net | grep -v lo | grep -v "^veth"); do
|
2026-01-24 17:18:53 -05:00
|
|
|
echo -e "\n${GREEN}Interface: $iface${NC}"
|
|
|
|
|
|
|
|
|
|
# Get driver info
|
|
|
|
|
if [ -L "/sys/class/net/$iface/device/driver" ]; then
|
|
|
|
|
driver=$(basename $(readlink /sys/class/net/$iface/device/driver))
|
|
|
|
|
echo -e " Driver: $driver"
|
|
|
|
|
fi
|
|
|
|
|
|
|
|
|
|
# Get MAC address
|
|
|
|
|
if [ -f "/sys/class/net/$iface/address" ]; then
|
|
|
|
|
echo -e " MAC: $(cat /sys/class/net/$iface/address)"
|
|
|
|
|
fi
|
|
|
|
|
|
|
|
|
|
# Get link state
|
|
|
|
|
if [ -f "/sys/class/net/$iface/operstate" ]; then
|
|
|
|
|
echo -e " State: $(cat /sys/class/net/$iface/operstate)"
|
|
|
|
|
fi
|
|
|
|
|
|
|
|
|
|
# Use ethtool if available
|
|
|
|
|
if command -v ethtool >/dev/null 2>&1; then
|
|
|
|
|
# Get speed and duplex
|
|
|
|
|
link_info=$(ethtool $iface 2>/dev/null | grep -E "Speed:|Duplex:|Link detected:")
|
|
|
|
|
if [ -n "$link_info" ]; then
|
|
|
|
|
echo "$link_info" | while read line; do
|
|
|
|
|
echo -e " $line"
|
|
|
|
|
done
|
|
|
|
|
fi
|
|
|
|
|
|
|
|
|
|
# Get firmware version
|
|
|
|
|
fw_ver=$(ethtool -i $iface 2>/dev/null | grep "firmware-version" | awk '{print $2}')
|
|
|
|
|
if [ -n "$fw_ver" ] && [ "$fw_ver" != "" ]; then
|
|
|
|
|
echo -e " Firmware: $fw_ver"
|
|
|
|
|
fi
|
|
|
|
|
fi
|
|
|
|
|
done
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
get_hba_info() {
|
|
|
|
|
echo -e "\n${GREEN}=== HBA/Storage Controller Information ===${NC}"
|
|
|
|
|
|
2026-01-24 21:29:14 -05:00
|
|
|
# Find RAID, SAS, SATA, SCSI, and storage controllers
|
2026-01-24 17:18:53 -05:00
|
|
|
lspci -vmm 2>/dev/null | awk '
|
|
|
|
|
BEGIN { RS=""; FS="\n" }
|
2026-01-24 21:29:14 -05:00
|
|
|
/RAID|SAS|SATA|SCSI|Mass storage|Serial Attached|Fibre Channel|NVMe/ {
|
2026-01-24 17:18:53 -05:00
|
|
|
for (i=1; i<=NF; i++) {
|
|
|
|
|
if ($i ~ /^Slot:/) slot = substr($i, 7)
|
|
|
|
|
if ($i ~ /^Class:/) class = substr($i, 8)
|
|
|
|
|
if ($i ~ /^Vendor:/) vendor = substr($i, 9)
|
|
|
|
|
if ($i ~ /^Device:/) device = substr($i, 9)
|
|
|
|
|
if ($i ~ /^Rev:/) rev = substr($i, 6)
|
|
|
|
|
}
|
|
|
|
|
printf "\n%s\n", slot
|
|
|
|
|
printf " Class: %s\n", class
|
|
|
|
|
printf " Vendor: %s\n", vendor
|
|
|
|
|
printf " Device: %s\n", device
|
|
|
|
|
if (rev) printf " Rev: %s\n", rev
|
|
|
|
|
slot=""; class=""; vendor=""; device=""; rev=""
|
|
|
|
|
}
|
|
|
|
|
'
|
|
|
|
|
|
|
|
|
|
# Show detailed info for storage controllers
|
|
|
|
|
echo -e "\n${GREEN}=== Storage Controller Details ===${NC}"
|
2026-01-24 21:29:14 -05:00
|
|
|
for ctrl in $(lspci | grep -iE "RAID|SAS|SATA|SCSI|Mass storage|NVMe" | awk '{print $1}'); do
|
2026-01-24 17:18:53 -05:00
|
|
|
echo -e "\n${GREEN}Controller $ctrl:${NC}"
|
|
|
|
|
lspci -vvs "$ctrl" 2>/dev/null | grep -E "^\s+(Subsystem|LnkSta|Kernel driver)" | head -5
|
|
|
|
|
done
|
|
|
|
|
}
|
|
|
|
|
|
2025-01-01 18:28:45 -05:00
|
|
|
get_system_status() {
|
|
|
|
|
echo -e "\n${GREEN}=== System Load ===${NC}"
|
|
|
|
|
uptime
|
|
|
|
|
|
|
|
|
|
echo -e "\n${GREEN}=== Service Status ===${NC}"
|
|
|
|
|
systemctl list-units --type=service --state=running | wc -l
|
|
|
|
|
|
|
|
|
|
echo -e "\n${GREEN}=== Recent System Errors ===${NC}"
|
|
|
|
|
journalctl -p err -n 5 --no-pager
|
|
|
|
|
}
|
|
|
|
|
|
2026-01-24 17:18:53 -05:00
|
|
|
###################
|
|
|
|
|
# DriveAtlas & Monitoring Functions
|
|
|
|
|
###################
|
|
|
|
|
|
|
|
|
|
get_drive_atlas() {
|
|
|
|
|
echo -e "\n${GREEN}=== Drive Atlas - Physical Bay Mapping ===${NC}"
|
|
|
|
|
if command -v curl >/dev/null 2>&1; then
|
|
|
|
|
if ! bash <(curl -sL "http://10.10.10.63:3000/LotusGuild/driveAtlas/raw/branch/main/driveAtlas.sh") 2>/dev/null; then
|
|
|
|
|
log_message warn "DriveAtlas failed to execute or server unavailable"
|
|
|
|
|
fi
|
|
|
|
|
else
|
|
|
|
|
log_message warn "curl not installed - cannot fetch DriveAtlas"
|
|
|
|
|
fi
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
get_ceph_health() {
|
|
|
|
|
echo -e "\n${GREEN}=== Ceph Cluster Health ===${NC}"
|
|
|
|
|
if command -v ceph >/dev/null 2>&1; then
|
|
|
|
|
echo -e "${GREEN}Health Status:${NC}"
|
|
|
|
|
ceph health detail 2>/dev/null || log_message warn "Cannot connect to Ceph cluster"
|
|
|
|
|
|
|
|
|
|
echo -e "\n${GREEN}=== Ceph OSD Tree ===${NC}"
|
|
|
|
|
ceph osd tree 2>/dev/null || true
|
|
|
|
|
|
|
|
|
|
echo -e "\n${GREEN}=== Ceph Pool Usage ===${NC}"
|
|
|
|
|
ceph df 2>/dev/null || true
|
|
|
|
|
|
|
|
|
|
echo -e "\n${GREEN}=== Ceph OSD Usage ===${NC}"
|
|
|
|
|
ceph osd df 2>/dev/null || true
|
|
|
|
|
else
|
|
|
|
|
log_message info "Ceph tools not installed on this node"
|
|
|
|
|
fi
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
get_node_exporter_status() {
|
|
|
|
|
echo -e "\n${GREEN}=== Node Exporter Status ===${NC}"
|
|
|
|
|
if systemctl is-active --quiet node_exporter 2>/dev/null; then
|
|
|
|
|
echo -e "${GREEN}Service:${NC} Running"
|
|
|
|
|
local ip=$(hostname -I | awk '{print $1}')
|
|
|
|
|
echo -e "${GREEN}Metrics URL:${NC} http://${ip}:9100/metrics"
|
|
|
|
|
if ss -tlnp 2>/dev/null | grep -q ':9100'; then
|
|
|
|
|
echo -e "${GREEN}Port 9100:${NC} Listening"
|
|
|
|
|
else
|
|
|
|
|
log_message warn "Port 9100 not listening"
|
|
|
|
|
fi
|
|
|
|
|
elif systemctl list-unit-files 2>/dev/null | grep -q node_exporter; then
|
|
|
|
|
log_message warn "Node Exporter is installed but not running"
|
|
|
|
|
echo -e "Start with: systemctl start node_exporter"
|
|
|
|
|
else
|
|
|
|
|
log_message info "Node Exporter not installed"
|
|
|
|
|
fi
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
get_hwmon_status() {
|
|
|
|
|
echo -e "\n${GREEN}=== hwmon Daemon Status ===${NC}"
|
|
|
|
|
if systemctl is-active --quiet hwmon.timer 2>/dev/null; then
|
|
|
|
|
echo -e "${GREEN}Timer:${NC} Active"
|
|
|
|
|
systemctl list-timers hwmon.timer --no-pager 2>/dev/null
|
|
|
|
|
echo -e "\n${GREEN}Last Run:${NC}"
|
|
|
|
|
journalctl -u hwmon.service -n 3 --no-pager 2>/dev/null || true
|
|
|
|
|
elif systemctl list-unit-files 2>/dev/null | grep -q hwmon.timer; then
|
|
|
|
|
log_message warn "hwmon timer is installed but not active"
|
|
|
|
|
echo -e "Enable with: systemctl enable --now hwmon.timer"
|
|
|
|
|
else
|
|
|
|
|
log_message info "hwmon daemon not installed"
|
|
|
|
|
fi
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
quick_health_check() {
|
|
|
|
|
echo -e "\n${GREEN}=== Quick Health Check ===${NC}"
|
|
|
|
|
echo -e "Running quick health assessment...\n"
|
|
|
|
|
|
|
|
|
|
# Services
|
|
|
|
|
check_services
|
|
|
|
|
|
|
|
|
|
# Temperatures
|
|
|
|
|
get_temp_info
|
|
|
|
|
|
|
|
|
|
# Disk health (quick)
|
|
|
|
|
echo -e "\n${GREEN}=== Disk Health Summary ===${NC}"
|
|
|
|
|
if command -v smartctl >/dev/null 2>&1; then
|
|
|
|
|
for disk in $(lsblk -d -o name | grep -E '^sd|^nvme'); do
|
|
|
|
|
health=$(smartctl -H /dev/$disk 2>/dev/null | grep -i "health" | awk -F: '{print $2}' | xargs)
|
|
|
|
|
if [[ -n "$health" ]]; then
|
|
|
|
|
echo -e "/dev/$disk: $health"
|
|
|
|
|
fi
|
|
|
|
|
done
|
|
|
|
|
fi
|
|
|
|
|
|
|
|
|
|
# Node Exporter
|
|
|
|
|
get_node_exporter_status
|
|
|
|
|
|
|
|
|
|
# Ceph quick status
|
|
|
|
|
if command -v ceph >/dev/null 2>&1; then
|
|
|
|
|
echo -e "\n${GREEN}=== Ceph Quick Status ===${NC}"
|
|
|
|
|
ceph health 2>/dev/null || true
|
|
|
|
|
fi
|
|
|
|
|
}
|
|
|
|
|
|
2025-01-01 18:28:45 -05:00
|
|
|
###################
|
|
|
|
|
# Proxmox Specific Functions
|
|
|
|
|
###################
|
|
|
|
|
check_services() {
|
|
|
|
|
echo -e "${GREEN}Checking critical services:${NC}"
|
|
|
|
|
services=("pvedaemon" "pveproxy" "pvecluster" "pve-cluster" "corosync")
|
|
|
|
|
for service in "${services[@]}"; do
|
|
|
|
|
status=$(systemctl is-active "$service")
|
|
|
|
|
echo -e "${GREEN}$service:${NC} $status"
|
|
|
|
|
done
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
check_pve_version() {
|
|
|
|
|
local min_version="6.0"
|
|
|
|
|
local current_version=$(pveversion | grep -oP 'pve-manager/\K[0-9]+\.[0-9]+' || echo "0.0")
|
|
|
|
|
if (( $(echo "$current_version < $min_version" | bc -l) )); then
|
|
|
|
|
log_message warn "Proxmox VE version $current_version may not support all features"
|
|
|
|
|
fi
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
list_vms() {
|
|
|
|
|
if command -v qm >/dev/null 2>&1; then
|
|
|
|
|
echo -e "${GREEN}Virtual Machine Status:${NC}"
|
|
|
|
|
qm list
|
|
|
|
|
else
|
|
|
|
|
handle_error "qm command not found"
|
|
|
|
|
fi
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
list_containers() {
|
|
|
|
|
if command -v pct >/dev/null 2>&1; then
|
|
|
|
|
echo -e "\n${GREEN}=== LXC Container Status ===${NC}"
|
|
|
|
|
pct list
|
|
|
|
|
else
|
|
|
|
|
log_message warn "pct command not found"
|
|
|
|
|
fi
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
###################
|
|
|
|
|
# Command Line Interface Functions
|
|
|
|
|
###################
|
|
|
|
|
help() {
|
|
|
|
|
echo "ProxDoc - The Proxmox System Doctor v${VERSION}"
|
2026-01-24 17:22:37 -05:00
|
|
|
echo ""
|
|
|
|
|
echo "Usage: curl -sL \"http://10.10.10.63:3000/LotusGuild/proxDoc/raw/branch/main/proxDoc.sh\" | bash -s -- [OPTION]"
|
|
|
|
|
echo ""
|
2025-01-01 18:28:45 -05:00
|
|
|
echo "A comprehensive diagnostic tool for Proxmox server health checks."
|
|
|
|
|
echo ""
|
|
|
|
|
echo "Treatment Options:"
|
2026-01-24 17:18:53 -05:00
|
|
|
echo " --help Show this prescription guide"
|
|
|
|
|
echo " --diags Perform full system examination"
|
|
|
|
|
echo " --quick Quick health check (services, temps, disks)"
|
|
|
|
|
echo " --drives Show physical drive bay mapping (DriveAtlas)"
|
|
|
|
|
echo " --ceph Check Ceph cluster health"
|
|
|
|
|
echo " --node-exporter Check Node Exporter status"
|
|
|
|
|
echo " --hwmon Check hwmon daemon status"
|
|
|
|
|
echo " --services Check vital Proxmox services"
|
|
|
|
|
echo " --vm-list Check VM vitals"
|
|
|
|
|
echo " --ct-list Check container vitals"
|
|
|
|
|
echo " --backup Review backup health"
|
2025-01-01 18:28:45 -05:00
|
|
|
exit 0
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2026-01-24 17:18:53 -05:00
|
|
|
|
2025-01-01 18:28:45 -05:00
|
|
|
###################
|
|
|
|
|
# Main Functions
|
|
|
|
|
###################
|
|
|
|
|
runDiags() {
|
|
|
|
|
log_message info "Beginning system examination..."
|
|
|
|
|
(
|
|
|
|
|
get_system_info
|
|
|
|
|
get_cpu_info
|
|
|
|
|
get_ram_info
|
2026-01-24 17:18:53 -05:00
|
|
|
get_memory_details
|
2025-01-01 18:28:45 -05:00
|
|
|
get_storage_info
|
2026-01-24 17:18:53 -05:00
|
|
|
get_drive_atlas
|
2025-01-01 18:28:45 -05:00
|
|
|
get_network_info
|
|
|
|
|
get_detailed_network
|
2026-01-24 17:18:53 -05:00
|
|
|
get_nic_details
|
2025-01-01 18:28:45 -05:00
|
|
|
get_hardware_info
|
2026-01-24 17:18:53 -05:00
|
|
|
get_motherboard_info
|
|
|
|
|
get_hba_info
|
2025-01-01 18:28:45 -05:00
|
|
|
get_temp_info
|
|
|
|
|
get_system_status
|
2026-01-24 17:18:53 -05:00
|
|
|
get_node_exporter_status
|
|
|
|
|
get_hwmon_status
|
|
|
|
|
get_ceph_health
|
|
|
|
|
list_vms
|
2025-01-01 18:28:45 -05:00
|
|
|
list_containers
|
|
|
|
|
) & show_progress $!
|
|
|
|
|
log_message info "Examination complete"
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
checkForInput() {
|
|
|
|
|
case $1 in
|
2026-01-24 17:18:53 -05:00
|
|
|
--help) help ;;
|
|
|
|
|
--diags) check_requirements; runDiags ;;
|
|
|
|
|
--quick) quick_health_check ;;
|
|
|
|
|
--drives) get_drive_atlas ;;
|
|
|
|
|
--ceph) get_ceph_health ;;
|
|
|
|
|
--node-exporter) get_node_exporter_status ;;
|
|
|
|
|
--hwmon) get_hwmon_status ;;
|
|
|
|
|
--services) check_services ;;
|
|
|
|
|
--vm-list) list_vms ;;
|
|
|
|
|
--ct-list) list_containers ;;
|
|
|
|
|
--backup) echo -e "${GREEN}Backup Status:${NC}"; pvesm status 2>/dev/null || log_message warn "pvesm not available" ;;
|
|
|
|
|
*) echo -e "${RED}Invalid option: $1${NC}"; help ;;
|
2025-01-01 18:28:45 -05:00
|
|
|
esac
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
###################
|
|
|
|
|
# Script Execution
|
|
|
|
|
###################
|
|
|
|
|
argOne=$1
|
|
|
|
|
|
2026-01-24 17:18:53 -05:00
|
|
|
# Show header
|
|
|
|
|
print_header
|
|
|
|
|
|
|
|
|
|
# Check root
|
|
|
|
|
if [[ $EUID -ne 0 ]]; then
|
|
|
|
|
handle_error "This script must be run as root"
|
|
|
|
|
fi
|
|
|
|
|
|
|
|
|
|
# Set trap for interrupts
|
|
|
|
|
trap 'echo -e "${RED}Script interrupted.${NC}"; exit 1' INT TERM
|
|
|
|
|
|
2025-01-01 18:28:45 -05:00
|
|
|
if [[ -n $argOne ]]; then
|
|
|
|
|
checkForInput "$argOne"
|
|
|
|
|
else
|
2026-01-24 17:22:37 -05:00
|
|
|
help
|
2026-01-24 17:18:53 -05:00
|
|
|
fi
|