Update to v1.1.0: Add interactive menu, DriveAtlas, and monitoring integrations
- Add interactive numbered menu when run without arguments - Add DriveAtlas integration (--drives) for physical drive bay mapping - Add Ceph cluster health monitoring (--ceph) - Add Node Exporter status check (--node-exporter) - Add hwmon daemon status check (--hwmon) - Add quick health check mode (--quick) - Add container list option (--ct-list) - Full diagnostics now includes all monitoring checks - Update README with new features and changelog Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
354
proxDoc.sh
354
proxDoc.sh
@@ -1,6 +1,6 @@
|
||||
#!/bin/bash
|
||||
|
||||
VERSION="1.0.0"
|
||||
VERSION="1.1.0"
|
||||
SPINNER="/-\|"
|
||||
|
||||
###################
|
||||
@@ -63,7 +63,7 @@ show_progress() {
|
||||
|
||||
check_requirements() {
|
||||
log_message info "Checking medical equipment..."
|
||||
local tools=("dmidecode" "lscpu" "ip" "smartctl" "sensors" "netstat")
|
||||
local tools=("dmidecode" "lscpu" "ip" "smartctl" "sensors" "lspci")
|
||||
for tool in "${tools[@]}"; do
|
||||
if ! command -v "$tool" >/dev/null 2>&1; then
|
||||
handle_error "Required instrument '$tool' is missing"
|
||||
@@ -151,10 +151,16 @@ get_network_info() {
|
||||
}
|
||||
|
||||
get_detailed_network() {
|
||||
echo -e "\n${GREEN}=== Network Interface Details ===${NC}"
|
||||
echo -e "\n${GREEN}=== Network Interface Statistics ===${NC}"
|
||||
ip -s link show
|
||||
echo -e "\n${GREEN}=== Network Statistics ===${NC}"
|
||||
netstat -i
|
||||
if command -v netstat >/dev/null 2>&1; then
|
||||
netstat -i
|
||||
elif command -v ss >/dev/null 2>&1; then
|
||||
ss -s
|
||||
else
|
||||
log_message warn "netstat/ss not found for network statistics"
|
||||
fi
|
||||
}
|
||||
|
||||
get_hardware_info() {
|
||||
@@ -163,6 +169,122 @@ get_hardware_info() {
|
||||
lspci | grep -i -E "vga|ethernet|raid"
|
||||
}
|
||||
|
||||
get_motherboard_info() {
|
||||
echo -e "\n${GREEN}=== Motherboard Information ===${NC}"
|
||||
echo -e "${GREEN}Manufacturer:${NC} $(dmidecode -s baseboard-manufacturer)"
|
||||
echo -e "${GREEN}Product Name:${NC} $(dmidecode -s baseboard-product-name)"
|
||||
echo -e "${GREEN}Version:${NC} $(dmidecode -s baseboard-version)"
|
||||
echo -e "${GREEN}Serial Number:${NC} $(dmidecode -s baseboard-serial-number)"
|
||||
echo -e "${GREEN}System Manufacturer:${NC} $(dmidecode -s system-manufacturer)"
|
||||
echo -e "${GREEN}System Product:${NC} $(dmidecode -s system-product-name)"
|
||||
echo -e "${GREEN}System Serial:${NC} $(dmidecode -s system-serial-number)"
|
||||
}
|
||||
|
||||
get_memory_details() {
|
||||
echo -e "\n${GREEN}=== Memory DIMM Information ===${NC}"
|
||||
dmidecode -t memory | awk '
|
||||
/Memory Device/,/^$/ {
|
||||
if (/Size:/ && !/No Module Installed/) {
|
||||
size=$2" "$3
|
||||
}
|
||||
if (/Type:/ && !/Unknown/ && !/Error/) {
|
||||
type=$2
|
||||
}
|
||||
if (/Speed:/ && !/Unknown/ && $2 != "Unknown") {
|
||||
speed=$2" "$3
|
||||
}
|
||||
if (/Manufacturer:/ && !/Unknown/ && $2 != "Unknown") {
|
||||
mfr=$2
|
||||
}
|
||||
if (/Part Number:/ && !/Unknown/) {
|
||||
part=$3
|
||||
}
|
||||
if (/Locator:/ && !/Bank/) {
|
||||
loc=$2
|
||||
if (size && size !~ /No/) {
|
||||
printf "%-12s %-10s %-8s %-12s %-20s\n", loc, size, type, speed, mfr
|
||||
size=""; type=""; speed=""; mfr=""; part=""
|
||||
}
|
||||
}
|
||||
}
|
||||
'
|
||||
echo -e "\n${GREEN}Memory Summary:${NC}"
|
||||
echo -e " Total Slots: $(dmidecode -t memory | grep -c "Memory Device")"
|
||||
echo -e " Populated: $(dmidecode -t memory | grep "Size:" | grep -cv "No Module")"
|
||||
echo -e " Max Capacity: $(dmidecode -t memory | grep "Maximum Capacity" | head -1 | awk '{print $3" "$4}')"
|
||||
}
|
||||
|
||||
get_nic_details() {
|
||||
echo -e "\n${GREEN}=== Network Interface Details ===${NC}"
|
||||
for iface in $(ls /sys/class/net | grep -v lo); do
|
||||
echo -e "\n${GREEN}Interface: $iface${NC}"
|
||||
|
||||
# Get driver info
|
||||
if [ -L "/sys/class/net/$iface/device/driver" ]; then
|
||||
driver=$(basename $(readlink /sys/class/net/$iface/device/driver))
|
||||
echo -e " Driver: $driver"
|
||||
fi
|
||||
|
||||
# Get MAC address
|
||||
if [ -f "/sys/class/net/$iface/address" ]; then
|
||||
echo -e " MAC: $(cat /sys/class/net/$iface/address)"
|
||||
fi
|
||||
|
||||
# Get link state
|
||||
if [ -f "/sys/class/net/$iface/operstate" ]; then
|
||||
echo -e " State: $(cat /sys/class/net/$iface/operstate)"
|
||||
fi
|
||||
|
||||
# Use ethtool if available
|
||||
if command -v ethtool >/dev/null 2>&1; then
|
||||
# Get speed and duplex
|
||||
link_info=$(ethtool $iface 2>/dev/null | grep -E "Speed:|Duplex:|Link detected:")
|
||||
if [ -n "$link_info" ]; then
|
||||
echo "$link_info" | while read line; do
|
||||
echo -e " $line"
|
||||
done
|
||||
fi
|
||||
|
||||
# Get firmware version
|
||||
fw_ver=$(ethtool -i $iface 2>/dev/null | grep "firmware-version" | awk '{print $2}')
|
||||
if [ -n "$fw_ver" ] && [ "$fw_ver" != "" ]; then
|
||||
echo -e " Firmware: $fw_ver"
|
||||
fi
|
||||
fi
|
||||
done
|
||||
}
|
||||
|
||||
get_hba_info() {
|
||||
echo -e "\n${GREEN}=== HBA/Storage Controller Information ===${NC}"
|
||||
|
||||
# Find RAID, SAS, SCSI, and storage controllers
|
||||
lspci -vmm 2>/dev/null | awk '
|
||||
BEGIN { RS=""; FS="\n" }
|
||||
/RAID|SAS|SCSI|Mass storage|Serial Attached|Fibre Channel|NVMe/ {
|
||||
for (i=1; i<=NF; i++) {
|
||||
if ($i ~ /^Slot:/) slot = substr($i, 7)
|
||||
if ($i ~ /^Class:/) class = substr($i, 8)
|
||||
if ($i ~ /^Vendor:/) vendor = substr($i, 9)
|
||||
if ($i ~ /^Device:/) device = substr($i, 9)
|
||||
if ($i ~ /^Rev:/) rev = substr($i, 6)
|
||||
}
|
||||
printf "\n%s\n", slot
|
||||
printf " Class: %s\n", class
|
||||
printf " Vendor: %s\n", vendor
|
||||
printf " Device: %s\n", device
|
||||
if (rev) printf " Rev: %s\n", rev
|
||||
slot=""; class=""; vendor=""; device=""; rev=""
|
||||
}
|
||||
'
|
||||
|
||||
# Show detailed info for storage controllers
|
||||
echo -e "\n${GREEN}=== Storage Controller Details ===${NC}"
|
||||
for ctrl in $(lspci | grep -iE "RAID|SAS|SCSI|Mass storage|NVMe" | awk '{print $1}'); do
|
||||
echo -e "\n${GREEN}Controller $ctrl:${NC}"
|
||||
lspci -vvs "$ctrl" 2>/dev/null | grep -E "^\s+(Subsystem|LnkSta|Kernel driver)" | head -5
|
||||
done
|
||||
}
|
||||
|
||||
get_system_status() {
|
||||
echo -e "\n${GREEN}=== System Load ===${NC}"
|
||||
uptime
|
||||
@@ -174,6 +296,105 @@ get_system_status() {
|
||||
journalctl -p err -n 5 --no-pager
|
||||
}
|
||||
|
||||
###################
|
||||
# DriveAtlas & Monitoring Functions
|
||||
###################
|
||||
|
||||
get_drive_atlas() {
|
||||
echo -e "\n${GREEN}=== Drive Atlas - Physical Bay Mapping ===${NC}"
|
||||
if command -v curl >/dev/null 2>&1; then
|
||||
if ! bash <(curl -sL "http://10.10.10.63:3000/LotusGuild/driveAtlas/raw/branch/main/driveAtlas.sh") 2>/dev/null; then
|
||||
log_message warn "DriveAtlas failed to execute or server unavailable"
|
||||
fi
|
||||
else
|
||||
log_message warn "curl not installed - cannot fetch DriveAtlas"
|
||||
fi
|
||||
}
|
||||
|
||||
get_ceph_health() {
|
||||
echo -e "\n${GREEN}=== Ceph Cluster Health ===${NC}"
|
||||
if command -v ceph >/dev/null 2>&1; then
|
||||
echo -e "${GREEN}Health Status:${NC}"
|
||||
ceph health detail 2>/dev/null || log_message warn "Cannot connect to Ceph cluster"
|
||||
|
||||
echo -e "\n${GREEN}=== Ceph OSD Tree ===${NC}"
|
||||
ceph osd tree 2>/dev/null || true
|
||||
|
||||
echo -e "\n${GREEN}=== Ceph Pool Usage ===${NC}"
|
||||
ceph df 2>/dev/null || true
|
||||
|
||||
echo -e "\n${GREEN}=== Ceph OSD Usage ===${NC}"
|
||||
ceph osd df 2>/dev/null || true
|
||||
else
|
||||
log_message info "Ceph tools not installed on this node"
|
||||
fi
|
||||
}
|
||||
|
||||
get_node_exporter_status() {
|
||||
echo -e "\n${GREEN}=== Node Exporter Status ===${NC}"
|
||||
if systemctl is-active --quiet node_exporter 2>/dev/null; then
|
||||
echo -e "${GREEN}Service:${NC} Running"
|
||||
local ip=$(hostname -I | awk '{print $1}')
|
||||
echo -e "${GREEN}Metrics URL:${NC} http://${ip}:9100/metrics"
|
||||
if ss -tlnp 2>/dev/null | grep -q ':9100'; then
|
||||
echo -e "${GREEN}Port 9100:${NC} Listening"
|
||||
else
|
||||
log_message warn "Port 9100 not listening"
|
||||
fi
|
||||
elif systemctl list-unit-files 2>/dev/null | grep -q node_exporter; then
|
||||
log_message warn "Node Exporter is installed but not running"
|
||||
echo -e "Start with: systemctl start node_exporter"
|
||||
else
|
||||
log_message info "Node Exporter not installed"
|
||||
fi
|
||||
}
|
||||
|
||||
get_hwmon_status() {
|
||||
echo -e "\n${GREEN}=== hwmon Daemon Status ===${NC}"
|
||||
if systemctl is-active --quiet hwmon.timer 2>/dev/null; then
|
||||
echo -e "${GREEN}Timer:${NC} Active"
|
||||
systemctl list-timers hwmon.timer --no-pager 2>/dev/null
|
||||
echo -e "\n${GREEN}Last Run:${NC}"
|
||||
journalctl -u hwmon.service -n 3 --no-pager 2>/dev/null || true
|
||||
elif systemctl list-unit-files 2>/dev/null | grep -q hwmon.timer; then
|
||||
log_message warn "hwmon timer is installed but not active"
|
||||
echo -e "Enable with: systemctl enable --now hwmon.timer"
|
||||
else
|
||||
log_message info "hwmon daemon not installed"
|
||||
fi
|
||||
}
|
||||
|
||||
quick_health_check() {
|
||||
echo -e "\n${GREEN}=== Quick Health Check ===${NC}"
|
||||
echo -e "Running quick health assessment...\n"
|
||||
|
||||
# Services
|
||||
check_services
|
||||
|
||||
# Temperatures
|
||||
get_temp_info
|
||||
|
||||
# Disk health (quick)
|
||||
echo -e "\n${GREEN}=== Disk Health Summary ===${NC}"
|
||||
if command -v smartctl >/dev/null 2>&1; then
|
||||
for disk in $(lsblk -d -o name | grep -E '^sd|^nvme'); do
|
||||
health=$(smartctl -H /dev/$disk 2>/dev/null | grep -i "health" | awk -F: '{print $2}' | xargs)
|
||||
if [[ -n "$health" ]]; then
|
||||
echo -e "/dev/$disk: $health"
|
||||
fi
|
||||
done
|
||||
fi
|
||||
|
||||
# Node Exporter
|
||||
get_node_exporter_status
|
||||
|
||||
# Ceph quick status
|
||||
if command -v ceph >/dev/null 2>&1; then
|
||||
echo -e "\n${GREEN}=== Ceph Quick Status ===${NC}"
|
||||
ceph health 2>/dev/null || true
|
||||
fi
|
||||
}
|
||||
|
||||
###################
|
||||
# Proxmox Specific Functions
|
||||
###################
|
||||
@@ -221,13 +442,21 @@ help() {
|
||||
echo "A comprehensive diagnostic tool for Proxmox server health checks."
|
||||
echo ""
|
||||
echo "Treatment Options:"
|
||||
echo " --help Show this prescription guide"
|
||||
echo " --diags Perform full system examination"
|
||||
echo " --connect Make a house call to a remote Proxmox host"
|
||||
echo " --services Check vital services"
|
||||
echo " --vm-list Check VM vitals"
|
||||
echo " --backup Review backup health"
|
||||
echo " --save Save examination results to medical record"
|
||||
echo " --help Show this prescription guide"
|
||||
echo " --diags Perform full system examination"
|
||||
echo " --quick Quick health check (services, temps, disks)"
|
||||
echo " --drives Show physical drive bay mapping (DriveAtlas)"
|
||||
echo " --ceph Check Ceph cluster health"
|
||||
echo " --node-exporter Check Node Exporter status"
|
||||
echo " --hwmon Check hwmon daemon status"
|
||||
echo " --services Check vital Proxmox services"
|
||||
echo " --vm-list Check VM vitals"
|
||||
echo " --ct-list Check container vitals"
|
||||
echo " --backup Review backup health"
|
||||
echo " --connect Make a house call to a remote Proxmox host"
|
||||
echo " --save Save examination results to medical record"
|
||||
echo ""
|
||||
echo "Interactive mode: Run without arguments for menu"
|
||||
exit 0
|
||||
}
|
||||
|
||||
@@ -242,6 +471,44 @@ connectToHost() {
|
||||
fi
|
||||
}
|
||||
|
||||
show_menu() {
|
||||
echo ""
|
||||
echo -e "${GREEN}Select a diagnostic option:${NC}"
|
||||
echo ""
|
||||
echo " 1) Full System Diagnostics"
|
||||
echo " 2) Quick Health Check"
|
||||
echo " 3) Drive Atlas (Physical Bay Mapping)"
|
||||
echo " 4) Check Proxmox Services"
|
||||
echo " 5) VM Status"
|
||||
echo " 6) Container Status"
|
||||
echo " 7) Ceph Cluster Health"
|
||||
echo " 8) Node Exporter Status"
|
||||
echo " 9) hwmon Daemon Status"
|
||||
echo " 10) Backup Status"
|
||||
echo " 11) Connect to Remote Host"
|
||||
echo " 12) Save Full Report to File"
|
||||
echo " 0) Exit"
|
||||
echo ""
|
||||
read -rp "Enter choice [0-12]: " choice
|
||||
|
||||
case $choice in
|
||||
1) check_requirements; runDiags ;;
|
||||
2) quick_health_check ;;
|
||||
3) get_drive_atlas ;;
|
||||
4) check_services ;;
|
||||
5) list_vms ;;
|
||||
6) list_containers ;;
|
||||
7) get_ceph_health ;;
|
||||
8) get_node_exporter_status ;;
|
||||
9) get_hwmon_status ;;
|
||||
10) echo -e "${GREEN}Backup Status:${NC}"; pvesm status 2>/dev/null || log_message warn "pvesm not available" ;;
|
||||
11) connectToHost ;;
|
||||
12) exec 1> >(tee "proxmox_diag_$(date '+%Y%m%d_%H%M%S').log"); runDiags ;;
|
||||
0) echo "Goodbye!"; exit 0 ;;
|
||||
*) echo -e "${RED}Invalid option${NC}"; show_menu ;;
|
||||
esac
|
||||
}
|
||||
|
||||
###################
|
||||
# Main Functions
|
||||
###################
|
||||
@@ -251,13 +518,22 @@ runDiags() {
|
||||
get_system_info
|
||||
get_cpu_info
|
||||
get_ram_info
|
||||
get_memory_details
|
||||
get_storage_info
|
||||
get_disk_health
|
||||
get_drive_atlas
|
||||
get_network_info
|
||||
get_detailed_network
|
||||
get_nic_details
|
||||
get_hardware_info
|
||||
get_motherboard_info
|
||||
get_hba_info
|
||||
get_temp_info
|
||||
get_system_status
|
||||
get_node_exporter_status
|
||||
get_hwmon_status
|
||||
get_ceph_health
|
||||
list_vms
|
||||
list_containers
|
||||
) & show_progress $!
|
||||
log_message info "Examination complete"
|
||||
@@ -266,44 +542,42 @@ runDiags() {
|
||||
|
||||
checkForInput() {
|
||||
case $1 in
|
||||
--help) help ;;
|
||||
--diags) check_requirements; runDiags ;;
|
||||
--connect) connectToHost ;;
|
||||
--services) check_services ;;
|
||||
--vm-list) list_vms ;;
|
||||
--backup) echo -e "${GREEN}Backup Status:${NC}"; pvesm status ;;
|
||||
--save) exec 1> >(tee "proxmox_diag_$(date '+%Y%m%d_%H%M%S').log"); runDiags ;;
|
||||
*) echo -e "${RED}Invalid option: $1${NC}"; help ;;
|
||||
--help) help ;;
|
||||
--diags) check_requirements; runDiags ;;
|
||||
--quick) quick_health_check ;;
|
||||
--drives) get_drive_atlas ;;
|
||||
--ceph) get_ceph_health ;;
|
||||
--node-exporter) get_node_exporter_status ;;
|
||||
--hwmon) get_hwmon_status ;;
|
||||
--connect) connectToHost ;;
|
||||
--services) check_services ;;
|
||||
--vm-list) list_vms ;;
|
||||
--ct-list) list_containers ;;
|
||||
--backup) echo -e "${GREEN}Backup Status:${NC}"; pvesm status 2>/dev/null || log_message warn "pvesm not available" ;;
|
||||
--save) exec 1> >(tee "proxmox_diag_$(date '+%Y%m%d_%H%M%S').log"); check_requirements; runDiags ;;
|
||||
*) echo -e "${RED}Invalid option: $1${NC}"; help ;;
|
||||
esac
|
||||
}
|
||||
|
||||
main() {
|
||||
print_header
|
||||
|
||||
trap 'echo -e "${RED}Script interrupted.${NC}"; exit 1' INT TERM
|
||||
|
||||
if [[ $EUID -ne 0 ]]; then
|
||||
handle_error "This script must be run as root"
|
||||
fi
|
||||
|
||||
if checkIfOnHypervisor; then
|
||||
runDiags
|
||||
else
|
||||
connectToHost
|
||||
fi
|
||||
}
|
||||
|
||||
###################
|
||||
# Script Execution
|
||||
###################
|
||||
argOne=$1
|
||||
|
||||
# Show header
|
||||
print_header
|
||||
|
||||
# Check root
|
||||
if [[ $EUID -ne 0 ]]; then
|
||||
handle_error "This script must be run as root"
|
||||
fi
|
||||
|
||||
# Set trap for interrupts
|
||||
trap 'echo -e "${RED}Script interrupted.${NC}"; exit 1' INT TERM
|
||||
|
||||
if [[ -n $argOne ]]; then
|
||||
checkForInput "$argOne"
|
||||
else
|
||||
echo "Please enter an option:"
|
||||
read -r argOne
|
||||
checkForInput "$argOne"
|
||||
fi
|
||||
|
||||
main
|
||||
show_menu
|
||||
fi
|
||||
Reference in New Issue
Block a user