Compare commits
12 Commits
e1dac4c08c
...
6125fb9d6b
| Author | SHA1 | Date | |
|---|---|---|---|
| 6125fb9d6b | |||
| 86be5fd1c1 | |||
| a491ae4592 | |||
| 7514e2ba7c | |||
| f7ed682bdb | |||
| 148a7ac644 | |||
| 67d4b76324 | |||
| 6633a0a9a1 | |||
| eff8eb3a3c | |||
| 07989c8788 | |||
| c8fadf924b | |||
| c25e3ccc76 |
330
proxDoc.sh
330
proxDoc.sh
@@ -2,6 +2,37 @@
|
|||||||
|
|
||||||
VERSION="1.1.0"
|
VERSION="1.1.0"
|
||||||
|
|
||||||
|
###################
|
||||||
|
# Timeout Configuration
|
||||||
|
###################
|
||||||
|
readonly CMD_TIMEOUT=30 # Default timeout in seconds for external commands
|
||||||
|
|
||||||
|
###################
|
||||||
|
# Logging Configuration
|
||||||
|
###################
|
||||||
|
# Optional log file - set via environment variable PROXDOC_LOGFILE
|
||||||
|
LOGFILE="${PROXDOC_LOGFILE:-}"
|
||||||
|
|
||||||
|
###################
|
||||||
|
# Cached Data
|
||||||
|
###################
|
||||||
|
# Disk list cache - populated on first use
|
||||||
|
DISK_LIST=""
|
||||||
|
# Unit files cache - populated on first use
|
||||||
|
UNIT_FILES=""
|
||||||
|
|
||||||
|
###################
|
||||||
|
# Pattern Constants
|
||||||
|
###################
|
||||||
|
# Virtual/firewall interface patterns to skip
|
||||||
|
readonly VIRTUAL_IFACE_PATTERN="^(veth|fwbr|fwln|fwpr|tap)"
|
||||||
|
# Storage controller patterns for HBA detection
|
||||||
|
readonly STORAGE_CONTROLLER_PATTERN="RAID|SAS|SATA|SCSI|Mass storage|Serial Attached|Fibre Channel|NVMe"
|
||||||
|
# Disk device patterns
|
||||||
|
readonly DISK_DEVICE_PATTERN="^sd|^nvme"
|
||||||
|
# PCI devices to exclude from hardware info
|
||||||
|
readonly EXCLUDED_PCI_PATTERN="Host bridge|PCI bridge|ISA bridge|SMBus|IOMMU|Dummy|USB controller|Audio device|Encryption controller|Multimedia controller"
|
||||||
|
|
||||||
###################
|
###################
|
||||||
# Color Definitions
|
# Color Definitions
|
||||||
###################
|
###################
|
||||||
@@ -31,18 +62,42 @@ print_header() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# Error handling flags
|
||||||
|
ERRORS_OCCURRED=0
|
||||||
|
WARNINGS_OCCURRED=0
|
||||||
|
|
||||||
|
cleanup() {
|
||||||
|
# Cleanup function called on exit
|
||||||
|
local exit_code=$?
|
||||||
|
if [[ $exit_code -ne 0 ]]; then
|
||||||
|
echo -e "\n${RED}Script terminated with exit code: $exit_code${NC}"
|
||||||
|
fi
|
||||||
|
# Add any cleanup tasks here (temp files, etc.)
|
||||||
|
}
|
||||||
|
|
||||||
handle_error() {
|
handle_error() {
|
||||||
echo -e "${RED}Error: $1${NC}"
|
local message="$1"
|
||||||
exit 1
|
local fatal="${2:-true}" # Default to fatal error
|
||||||
|
echo -e "${RED}Error: $message${NC}" >&2
|
||||||
|
ERRORS_OCCURRED=$((ERRORS_OCCURRED + 1))
|
||||||
|
if [[ "$fatal" == "true" ]]; then
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
}
|
}
|
||||||
|
|
||||||
log_message() {
|
log_message() {
|
||||||
local level=$1
|
local level="$1"
|
||||||
local message=$2
|
local message="$2"
|
||||||
case $level in
|
case "$level" in
|
||||||
info) echo -e "${GREEN}[INFO]${NC} $message" ;;
|
info) echo -e "${GREEN}[INFO]${NC} $message" ;;
|
||||||
warn) echo -e "${YELLOW}[WARN]${NC} $message" ;;
|
warn)
|
||||||
error) echo -e "${RED}[ERROR]${NC} $message" ;;
|
echo -e "${YELLOW}[WARN]${NC} $message"
|
||||||
|
WARNINGS_OCCURRED=$((WARNINGS_OCCURRED + 1))
|
||||||
|
;;
|
||||||
|
error)
|
||||||
|
echo -e "${RED}[ERROR]${NC} $message" >&2
|
||||||
|
ERRORS_OCCURRED=$((ERRORS_OCCURRED + 1))
|
||||||
|
;;
|
||||||
esac
|
esac
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -67,6 +122,46 @@ checkIfOnHypervisor() {
|
|||||||
command -v pveversion >/dev/null 2>&1
|
command -v pveversion >/dev/null 2>&1
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Get disk list with caching to avoid multiple lsblk calls
|
||||||
|
get_disk_list() {
|
||||||
|
if [[ -z "$DISK_LIST" ]]; then
|
||||||
|
DISK_LIST=$(lsblk -d -o name 2>/dev/null | grep -E "$DISK_DEVICE_PATTERN")
|
||||||
|
fi
|
||||||
|
echo "$DISK_LIST"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Get systemctl unit files with caching
|
||||||
|
get_unit_files() {
|
||||||
|
if [[ -z "$UNIT_FILES" ]]; then
|
||||||
|
UNIT_FILES=$(systemctl list-unit-files 2>/dev/null)
|
||||||
|
fi
|
||||||
|
echo "$UNIT_FILES"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Check if a unit file exists (uses cached data)
|
||||||
|
unit_file_exists() {
|
||||||
|
local unit_name="$1"
|
||||||
|
get_unit_files | grep -q "$unit_name"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Efficient process wait with optional spinner
|
||||||
|
# Usage: wait_for_process $pid [delay]
|
||||||
|
# Uses kill -0 instead of ps -p for efficiency
|
||||||
|
wait_for_process() {
|
||||||
|
local pid="$1"
|
||||||
|
local delay="${2:-0.1}"
|
||||||
|
local spinner='|/-\'
|
||||||
|
local i=0
|
||||||
|
|
||||||
|
while kill -0 "$pid" 2>/dev/null; do
|
||||||
|
printf "\r%c " "${spinner:i++%${#spinner}:1}"
|
||||||
|
sleep "$delay"
|
||||||
|
done
|
||||||
|
printf "\r \r" # Clear spinner
|
||||||
|
wait "$pid"
|
||||||
|
return $?
|
||||||
|
}
|
||||||
|
|
||||||
###################
|
###################
|
||||||
# System Information Functions
|
# System Information Functions
|
||||||
###################
|
###################
|
||||||
@@ -102,35 +197,40 @@ get_temp_info() {
|
|||||||
get_disk_health() {
|
get_disk_health() {
|
||||||
echo -e "\n${GREEN}=== Disk Health Status ===${NC}"
|
echo -e "\n${GREEN}=== Disk Health Status ===${NC}"
|
||||||
if command -v smartctl >/dev/null 2>&1; then
|
if command -v smartctl >/dev/null 2>&1; then
|
||||||
for disk in $(lsblk -d -o name | grep -E '^sd|^nvme'); do
|
while IFS= read -r disk; do
|
||||||
|
[[ -z "$disk" ]] && continue
|
||||||
echo -e "\nChecking /dev/$disk:"
|
echo -e "\nChecking /dev/$disk:"
|
||||||
smartctl -H /dev/$disk
|
if ! timeout $CMD_TIMEOUT smartctl -H "/dev/$disk"; then
|
||||||
done
|
log_message warn "smartctl timed out or failed for /dev/$disk"
|
||||||
|
fi
|
||||||
|
done <<< "$(get_disk_list)"
|
||||||
else
|
else
|
||||||
log_message warn "smartctl not found. Install smartmontools for disk health monitoring"
|
log_message warn "smartctl not found. Install smartmontools for disk health monitoring"
|
||||||
fi
|
fi
|
||||||
}
|
}
|
||||||
|
|
||||||
get_cpu_info() {
|
get_cpu_info() {
|
||||||
cpu_info=$(grep -m 1 -w 'model name' /proc/cpuinfo | awk -F: '{print $2}' | xargs) || {
|
local cpu_info cpu_cores cpu_mhz
|
||||||
echo -e "${RED}Failed to retrieve CPU model information.${NC}"
|
|
||||||
}
|
|
||||||
cpu_cores=$(lscpu | grep '^CPU(s):' | awk '{print $2}')
|
|
||||||
cpu_mhz=$(lscpu | grep 'MHz' | awk '{print $4}')
|
|
||||||
|
|
||||||
echo -e "${GREEN}CPU Model:${NC} $cpu_info"
|
cpu_info=$(grep -m 1 -w 'model name' /proc/cpuinfo 2>/dev/null | awk -F: '{print $2}' | xargs)
|
||||||
echo -e "${GREEN}CPU Cores:${NC} $cpu_cores"
|
cpu_cores=$(lscpu 2>/dev/null | grep '^CPU(s):' | awk '{print $2}')
|
||||||
echo -e "${GREEN}CPU MHz:${NC} $cpu_mhz"
|
cpu_mhz=$(lscpu 2>/dev/null | grep 'MHz' | awk '{print $4}')
|
||||||
|
|
||||||
|
echo -e "${GREEN}CPU Model:${NC} ${cpu_info:-Unknown}"
|
||||||
|
echo -e "${GREEN}CPU Cores:${NC} ${cpu_cores:-Unknown}"
|
||||||
|
echo -e "${GREEN}CPU MHz:${NC} ${cpu_mhz:-Unknown}"
|
||||||
}
|
}
|
||||||
|
|
||||||
get_ram_info() {
|
get_ram_info() {
|
||||||
ram_total=$(free -h | grep 'Mem:' | awk '{print $2}')
|
local ram_total ram_used ram_free
|
||||||
ram_used=$(free -h | grep 'Mem:' | awk '{print $3}')
|
|
||||||
ram_free=$(free -h | grep 'Mem:' | awk '{print $4}')
|
|
||||||
|
|
||||||
echo -e "${GREEN}Total RAM:${NC} $ram_total"
|
ram_total=$(free -h 2>/dev/null | grep 'Mem:' | awk '{print $2}')
|
||||||
echo -e "${GREEN}Used RAM:${NC} $ram_used"
|
ram_used=$(free -h 2>/dev/null | grep 'Mem:' | awk '{print $3}')
|
||||||
echo -e "${GREEN}Free RAM:${NC} $ram_free"
|
ram_free=$(free -h 2>/dev/null | grep 'Mem:' | awk '{print $4}')
|
||||||
|
|
||||||
|
echo -e "${GREEN}Total RAM:${NC} ${ram_total:-Unknown}"
|
||||||
|
echo -e "${GREEN}Used RAM:${NC} ${ram_used:-Unknown}"
|
||||||
|
echo -e "${GREEN}Free RAM:${NC} ${ram_free:-Unknown}"
|
||||||
}
|
}
|
||||||
|
|
||||||
get_storage_info() {
|
get_storage_info() {
|
||||||
@@ -144,10 +244,13 @@ get_storage_info() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
get_network_info() {
|
get_network_info() {
|
||||||
default_gateway=$(ip route | grep default | awk '{print $3}')
|
local default_gateway ip_addresses
|
||||||
ip_addresses=$(hostname -I | xargs)
|
|
||||||
echo -e "${GREEN}Default Gateway:${NC} $default_gateway"
|
default_gateway=$(ip route 2>/dev/null | grep default | awk '{print $3}')
|
||||||
echo -e "${GREEN}IP Addresses:${NC} $ip_addresses"
|
ip_addresses=$(hostname -I 2>/dev/null | xargs)
|
||||||
|
|
||||||
|
echo -e "${GREEN}Default Gateway:${NC} ${default_gateway:-Not configured}"
|
||||||
|
echo -e "${GREEN}IP Addresses:${NC} ${ip_addresses:-None detected}"
|
||||||
}
|
}
|
||||||
|
|
||||||
get_detailed_network() {
|
get_detailed_network() {
|
||||||
@@ -173,7 +276,7 @@ get_hardware_info() {
|
|||||||
echo -e "${GREEN}BIOS Version:${NC} $(dmidecode -s bios-version)"
|
echo -e "${GREEN}BIOS Version:${NC} $(dmidecode -s bios-version)"
|
||||||
echo -e "\n${GREEN}=== PCI Devices ===${NC}"
|
echo -e "\n${GREEN}=== PCI Devices ===${NC}"
|
||||||
# Show interesting devices, exclude bridges, infrastructure, and integrated motherboard devices
|
# Show interesting devices, exclude bridges, infrastructure, and integrated motherboard devices
|
||||||
lspci | grep -v -E "Host bridge|PCI bridge|ISA bridge|SMBus|IOMMU|Dummy|USB controller|Audio device|Encryption controller|Multimedia controller"
|
lspci | grep -v -E "$EXCLUDED_PCI_PATTERN"
|
||||||
}
|
}
|
||||||
|
|
||||||
get_motherboard_info() {
|
get_motherboard_info() {
|
||||||
@@ -350,7 +453,7 @@ get_physical_interfaces() {
|
|||||||
[[ "$iface" == "lo" ]] && continue
|
[[ "$iface" == "lo" ]] && continue
|
||||||
|
|
||||||
# Skip virtual/firewall interfaces
|
# Skip virtual/firewall interfaces
|
||||||
[[ "$iface" =~ ^(veth|fwbr|fwln|fwpr|tap) ]] && continue
|
[[ "$iface" =~ $VIRTUAL_IFACE_PATTERN ]] && continue
|
||||||
|
|
||||||
# This is a physical interface
|
# This is a physical interface
|
||||||
echo "$iface"
|
echo "$iface"
|
||||||
@@ -361,9 +464,9 @@ get_hba_info() {
|
|||||||
echo -e "\n${GREEN}=== HBA/Storage Controller Information ===${NC}"
|
echo -e "\n${GREEN}=== HBA/Storage Controller Information ===${NC}"
|
||||||
|
|
||||||
# Find RAID, SAS, SATA, SCSI, and storage controllers
|
# Find RAID, SAS, SATA, SCSI, and storage controllers
|
||||||
lspci -vmm 2>/dev/null | awk '
|
lspci -vmm 2>/dev/null | awk -v pattern="$STORAGE_CONTROLLER_PATTERN" '
|
||||||
BEGIN { RS=""; FS="\n" }
|
BEGIN { RS=""; FS="\n" }
|
||||||
/RAID|SAS|SATA|SCSI|Mass storage|Serial Attached|Fibre Channel|NVMe/ {
|
$0 ~ pattern {
|
||||||
for (i=1; i<=NF; i++) {
|
for (i=1; i<=NF; i++) {
|
||||||
if ($i ~ /^Slot:/) slot = substr($i, 7)
|
if ($i ~ /^Slot:/) slot = substr($i, 7)
|
||||||
if ($i ~ /^Class:/) class = substr($i, 8)
|
if ($i ~ /^Class:/) class = substr($i, 8)
|
||||||
@@ -382,7 +485,7 @@ get_hba_info() {
|
|||||||
|
|
||||||
# Show detailed info for storage controllers
|
# Show detailed info for storage controllers
|
||||||
echo -e "\n${GREEN}=== Storage Controller Details ===${NC}"
|
echo -e "\n${GREEN}=== Storage Controller Details ===${NC}"
|
||||||
for ctrl in $(lspci | grep -iE "RAID|SAS|SATA|SCSI|Mass storage|NVMe" | awk '{print $1}'); do
|
for ctrl in $(lspci | grep -iE "$STORAGE_CONTROLLER_PATTERN" | awk '{print $1}'); do
|
||||||
echo -e "\n${GREEN}Controller $ctrl:${NC}"
|
echo -e "\n${GREEN}Controller $ctrl:${NC}"
|
||||||
lspci -vvs "$ctrl" 2>/dev/null | grep -E "^\s+(Subsystem|LnkSta|Kernel driver)" | head -5
|
lspci -vvs "$ctrl" 2>/dev/null | grep -E "^\s+(Subsystem|LnkSta|Kernel driver)" | head -5
|
||||||
done
|
done
|
||||||
@@ -418,16 +521,16 @@ get_ceph_health() {
|
|||||||
echo -e "\n${GREEN}=== Ceph Cluster Health ===${NC}"
|
echo -e "\n${GREEN}=== Ceph Cluster Health ===${NC}"
|
||||||
if command -v ceph >/dev/null 2>&1; then
|
if command -v ceph >/dev/null 2>&1; then
|
||||||
echo -e "${GREEN}Health Status:${NC}"
|
echo -e "${GREEN}Health Status:${NC}"
|
||||||
ceph health detail 2>/dev/null || log_message warn "Cannot connect to Ceph cluster"
|
timeout $CMD_TIMEOUT ceph health detail 2>/dev/null || log_message warn "Cannot connect to Ceph cluster or timed out"
|
||||||
|
|
||||||
echo -e "\n${GREEN}=== Ceph OSD Tree ===${NC}"
|
echo -e "\n${GREEN}=== Ceph OSD Tree ===${NC}"
|
||||||
ceph osd tree 2>/dev/null || true
|
timeout $CMD_TIMEOUT ceph osd tree 2>/dev/null || log_message warn "Ceph OSD tree timed out"
|
||||||
|
|
||||||
echo -e "\n${GREEN}=== Ceph Pool Usage ===${NC}"
|
echo -e "\n${GREEN}=== Ceph Pool Usage ===${NC}"
|
||||||
ceph df 2>/dev/null || true
|
timeout $CMD_TIMEOUT ceph df 2>/dev/null || log_message warn "Ceph df timed out"
|
||||||
|
|
||||||
echo -e "\n${GREEN}=== Ceph OSD Usage ===${NC}"
|
echo -e "\n${GREEN}=== Ceph OSD Usage ===${NC}"
|
||||||
ceph osd df 2>/dev/null || true
|
timeout $CMD_TIMEOUT ceph osd df 2>/dev/null || log_message warn "Ceph OSD df timed out"
|
||||||
else
|
else
|
||||||
log_message info "Ceph tools not installed on this node"
|
log_message info "Ceph tools not installed on this node"
|
||||||
fi
|
fi
|
||||||
@@ -444,7 +547,7 @@ get_node_exporter_status() {
|
|||||||
else
|
else
|
||||||
log_message warn "Port 9100 not listening"
|
log_message warn "Port 9100 not listening"
|
||||||
fi
|
fi
|
||||||
elif systemctl list-unit-files 2>/dev/null | grep -q node_exporter; then
|
elif unit_file_exists node_exporter; then
|
||||||
log_message warn "Node Exporter is installed but not running"
|
log_message warn "Node Exporter is installed but not running"
|
||||||
echo -e "Start with: systemctl start node_exporter"
|
echo -e "Start with: systemctl start node_exporter"
|
||||||
else
|
else
|
||||||
@@ -459,7 +562,7 @@ get_hwmon_status() {
|
|||||||
systemctl list-timers hwmon.timer --no-pager 2>/dev/null
|
systemctl list-timers hwmon.timer --no-pager 2>/dev/null
|
||||||
echo -e "\n${GREEN}Last Run:${NC}"
|
echo -e "\n${GREEN}Last Run:${NC}"
|
||||||
journalctl -u hwmon.service -n 3 --no-pager 2>/dev/null || true
|
journalctl -u hwmon.service -n 3 --no-pager 2>/dev/null || true
|
||||||
elif systemctl list-unit-files 2>/dev/null | grep -q hwmon.timer; then
|
elif unit_file_exists hwmon.timer; then
|
||||||
log_message warn "hwmon timer is installed but not active"
|
log_message warn "hwmon timer is installed but not active"
|
||||||
echo -e "Enable with: systemctl enable --now hwmon.timer"
|
echo -e "Enable with: systemctl enable --now hwmon.timer"
|
||||||
else
|
else
|
||||||
@@ -467,6 +570,51 @@ get_hwmon_status() {
|
|||||||
fi
|
fi
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Valid check names for selective mode
|
||||||
|
readonly VALID_CHECKS="cpu ram memory storage disk network hardware temps services ceph vms containers"
|
||||||
|
|
||||||
|
run_selective_checks() {
|
||||||
|
local checks="$1"
|
||||||
|
if [[ -z "$checks" ]]; then
|
||||||
|
log_message error "No checks specified. Use --checks=cpu,ram,disk"
|
||||||
|
echo "Valid checks: $VALID_CHECKS"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Validate check names
|
||||||
|
IFS=',' read -ra check_array <<< "$checks"
|
||||||
|
for check in "${check_array[@]}"; do
|
||||||
|
if [[ ! " $VALID_CHECKS " =~ " $check " ]]; then
|
||||||
|
log_message error "Unknown check: $check"
|
||||||
|
echo "Valid checks: $VALID_CHECKS"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
|
||||||
|
log_message info "Running selective checks: $checks"
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
for check in "${check_array[@]}"; do
|
||||||
|
case "$check" in
|
||||||
|
cpu) log_message info "Checking CPU..."; get_cpu_info ;;
|
||||||
|
ram) log_message info "Checking RAM..."; get_ram_info ;;
|
||||||
|
memory) log_message info "Checking memory details..."; get_memory_details ;;
|
||||||
|
storage) log_message info "Checking storage..."; get_storage_info ;;
|
||||||
|
disk) log_message info "Checking disk health..."; get_disk_health ;;
|
||||||
|
network) log_message info "Checking network..."; get_network_info; get_detailed_network; get_nic_details ;;
|
||||||
|
hardware) log_message info "Checking hardware..."; get_hardware_info; get_motherboard_info; get_hba_info ;;
|
||||||
|
temps) log_message info "Checking temperatures..."; get_temp_info ;;
|
||||||
|
services) log_message info "Checking services..."; check_services ;;
|
||||||
|
ceph) log_message info "Checking Ceph..."; get_ceph_health ;;
|
||||||
|
vms) log_message info "Checking VMs..."; list_vms ;;
|
||||||
|
containers) log_message info "Checking containers..."; list_containers ;;
|
||||||
|
esac
|
||||||
|
done
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
log_message info "Selective checks complete"
|
||||||
|
}
|
||||||
|
|
||||||
quick_health_check() {
|
quick_health_check() {
|
||||||
echo -e "\n${GREEN}=== Quick Health Check ===${NC}"
|
echo -e "\n${GREEN}=== Quick Health Check ===${NC}"
|
||||||
echo -e "Running quick health assessment...\n"
|
echo -e "Running quick health assessment...\n"
|
||||||
@@ -480,12 +628,15 @@ quick_health_check() {
|
|||||||
# Disk health (quick)
|
# Disk health (quick)
|
||||||
echo -e "\n${GREEN}=== Disk Health Summary ===${NC}"
|
echo -e "\n${GREEN}=== Disk Health Summary ===${NC}"
|
||||||
if command -v smartctl >/dev/null 2>&1; then
|
if command -v smartctl >/dev/null 2>&1; then
|
||||||
for disk in $(lsblk -d -o name | grep -E '^sd|^nvme'); do
|
while IFS= read -r disk; do
|
||||||
health=$(smartctl -H /dev/$disk 2>/dev/null | grep -i "health" | awk -F: '{print $2}' | xargs)
|
[[ -z "$disk" ]] && continue
|
||||||
|
health=$(timeout $CMD_TIMEOUT smartctl -H "/dev/$disk" 2>/dev/null | grep -i "health" | awk -F: '{print $2}' | xargs)
|
||||||
if [[ -n "$health" ]]; then
|
if [[ -n "$health" ]]; then
|
||||||
echo -e "/dev/$disk: $health"
|
echo -e "/dev/$disk: $health"
|
||||||
|
else
|
||||||
|
echo -e "/dev/$disk: ${YELLOW}check timed out or unavailable${NC}"
|
||||||
fi
|
fi
|
||||||
done
|
done <<< "$(get_disk_list)"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Node Exporter
|
# Node Exporter
|
||||||
@@ -574,6 +725,31 @@ help() {
|
|||||||
echo " --vm-list Check VM vitals"
|
echo " --vm-list Check VM vitals"
|
||||||
echo " --ct-list Check container vitals"
|
echo " --ct-list Check container vitals"
|
||||||
echo " --backup Review backup health"
|
echo " --backup Review backup health"
|
||||||
|
echo " --checks=LIST Run only specific checks (comma-separated)"
|
||||||
|
echo ""
|
||||||
|
echo "Valid checks for --checks option:"
|
||||||
|
echo " cpu, ram, memory, storage, disk, network, hardware, temps,"
|
||||||
|
echo " services, ceph, vms, containers"
|
||||||
|
echo ""
|
||||||
|
echo "Examples:"
|
||||||
|
echo " Run full diagnostics:"
|
||||||
|
echo " curl -sL \"http://10.10.10.63:3000/LotusGuild/proxDoc/raw/branch/main/proxDoc.sh\" | bash -s -- --diags"
|
||||||
|
echo ""
|
||||||
|
echo " Quick health check:"
|
||||||
|
echo " curl -sL \"http://10.10.10.63:3000/LotusGuild/proxDoc/raw/branch/main/proxDoc.sh\" | bash -s -- --quick"
|
||||||
|
echo ""
|
||||||
|
echo " Check only services and VMs:"
|
||||||
|
echo " curl -sL \"http://10.10.10.63:3000/LotusGuild/proxDoc/raw/branch/main/proxDoc.sh\" | bash -s -- --services"
|
||||||
|
echo " curl -sL \"http://10.10.10.63:3000/LotusGuild/proxDoc/raw/branch/main/proxDoc.sh\" | bash -s -- --vm-list"
|
||||||
|
echo ""
|
||||||
|
echo " View drive bay mapping:"
|
||||||
|
echo " curl -sL \"http://10.10.10.63:3000/LotusGuild/proxDoc/raw/branch/main/proxDoc.sh\" | bash -s -- --drives"
|
||||||
|
echo ""
|
||||||
|
echo " Check Ceph cluster health:"
|
||||||
|
echo " curl -sL \"http://10.10.10.63:3000/LotusGuild/proxDoc/raw/branch/main/proxDoc.sh\" | bash -s -- --ceph"
|
||||||
|
echo ""
|
||||||
|
echo " Run only CPU and RAM checks:"
|
||||||
|
echo " curl -sL \"http://10.10.10.63:3000/LotusGuild/proxDoc/raw/branch/main/proxDoc.sh\" | bash -s -- --checks=cpu,ram"
|
||||||
exit 0
|
exit 0
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -583,7 +759,28 @@ help() {
|
|||||||
###################
|
###################
|
||||||
# Main Functions
|
# Main Functions
|
||||||
###################
|
###################
|
||||||
|
|
||||||
|
# Setup logging if LOGFILE is specified
|
||||||
|
setup_logging() {
|
||||||
|
if [[ -n "$LOGFILE" ]]; then
|
||||||
|
# Create log directory if needed
|
||||||
|
local log_dir
|
||||||
|
log_dir=$(dirname "$LOGFILE")
|
||||||
|
if [[ ! -d "$log_dir" ]]; then
|
||||||
|
mkdir -p "$log_dir" 2>/dev/null || {
|
||||||
|
log_message warn "Cannot create log directory: $log_dir"
|
||||||
|
LOGFILE=""
|
||||||
|
return
|
||||||
|
}
|
||||||
|
fi
|
||||||
|
log_message info "Logging output to: $LOGFILE"
|
||||||
|
# Redirect stdout and stderr to tee (no subshell overhead)
|
||||||
|
exec > >(tee -a "$LOGFILE") 2>&1
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
runDiags() {
|
runDiags() {
|
||||||
|
setup_logging
|
||||||
log_message info "Beginning system examination..."
|
log_message info "Beginning system examination..."
|
||||||
|
|
||||||
# Check if running on Proxmox
|
# Check if running on Proxmox
|
||||||
@@ -649,10 +846,49 @@ runDiags() {
|
|||||||
|
|
||||||
echo ""
|
echo ""
|
||||||
log_message info "Examination complete"
|
log_message info "Examination complete"
|
||||||
|
|
||||||
|
# Print summary if there were issues
|
||||||
|
if [[ $WARNINGS_OCCURRED -gt 0 || $ERRORS_OCCURRED -gt 0 ]]; then
|
||||||
|
echo -e "\n${YELLOW}=== Summary ===${NC}"
|
||||||
|
[[ $WARNINGS_OCCURRED -gt 0 ]] && echo -e "Warnings: $WARNINGS_OCCURRED"
|
||||||
|
[[ $ERRORS_OCCURRED -gt 0 ]] && echo -e "Errors: $ERRORS_OCCURRED"
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
# Whitelist of valid command options
|
||||||
|
readonly VALID_OPTIONS="--help --diags --quick --drives --ceph --node-exporter --hwmon --services --vm-list --ct-list --backup --checks"
|
||||||
|
|
||||||
|
validate_input() {
|
||||||
|
local input="$1"
|
||||||
|
# Check if input matches valid option pattern (starts with -- and contains only alphanumeric, hyphens, equals, commas)
|
||||||
|
if [[ ! "$input" =~ ^--[a-z][-a-z=,]*$ ]]; then
|
||||||
|
return 1
|
||||||
|
fi
|
||||||
|
# Extract the option name (before any = sign)
|
||||||
|
local opt_name="${input%%=*}"
|
||||||
|
# Check against whitelist
|
||||||
|
if [[ ! " $VALID_OPTIONS " =~ " $opt_name " ]]; then
|
||||||
|
return 1
|
||||||
|
fi
|
||||||
|
return 0
|
||||||
}
|
}
|
||||||
|
|
||||||
checkForInput() {
|
checkForInput() {
|
||||||
case $1 in
|
local input="$1"
|
||||||
|
|
||||||
|
# Validate input against whitelist
|
||||||
|
if ! validate_input "$input"; then
|
||||||
|
echo -e "${RED}Invalid option: $input${NC}"
|
||||||
|
echo -e "Use --help to see available options."
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Extract option name and value for --checks=X pattern
|
||||||
|
local opt_name="${input%%=*}"
|
||||||
|
local opt_value="${input#*=}"
|
||||||
|
[[ "$opt_name" == "$opt_value" ]] && opt_value=""
|
||||||
|
|
||||||
|
case "$opt_name" in
|
||||||
--help) help ;;
|
--help) help ;;
|
||||||
--diags) check_requirements; runDiags ;;
|
--diags) check_requirements; runDiags ;;
|
||||||
--quick) quick_health_check ;;
|
--quick) quick_health_check ;;
|
||||||
@@ -664,7 +900,7 @@ checkForInput() {
|
|||||||
--vm-list) list_vms ;;
|
--vm-list) list_vms ;;
|
||||||
--ct-list) list_containers ;;
|
--ct-list) list_containers ;;
|
||||||
--backup) echo -e "${GREEN}Backup Status:${NC}"; pvesm status 2>/dev/null || log_message warn "pvesm not available" ;;
|
--backup) echo -e "${GREEN}Backup Status:${NC}"; pvesm status 2>/dev/null || log_message warn "pvesm not available" ;;
|
||||||
*) echo -e "${RED}Invalid option: $1${NC}"; help ;;
|
--checks) run_selective_checks "$opt_value" ;;
|
||||||
esac
|
esac
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -682,8 +918,10 @@ if [[ $EUID -ne 0 ]]; then
|
|||||||
handle_error "This script must be run as root"
|
handle_error "This script must be run as root"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Set trap for interrupts
|
# Set trap for cleanup and interrupts
|
||||||
trap 'echo -e "${RED}Script interrupted.${NC}"; exit 1' INT TERM
|
trap cleanup EXIT
|
||||||
|
trap 'echo -e "\n${RED}Script interrupted by user.${NC}"; exit 130' INT
|
||||||
|
trap 'echo -e "\n${RED}Script terminated.${NC}"; exit 143' TERM
|
||||||
|
|
||||||
if [[ -n $argOne ]]; then
|
if [[ -n $argOne ]]; then
|
||||||
checkForInput "$argOne"
|
checkForInput "$argOne"
|
||||||
|
|||||||
Reference in New Issue
Block a user