Add timeout protection to external commands
Add a configurable CMD_TIMEOUT constant and apply timeouts to smartctl and ceph commands that may hang on unresponsive disks or network issues. This prevents the script from blocking indefinitely. #14 Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
21
proxDoc.sh
21
proxDoc.sh
@@ -2,6 +2,11 @@
|
|||||||
|
|
||||||
VERSION="1.1.0"
|
VERSION="1.1.0"
|
||||||
|
|
||||||
|
###################
|
||||||
|
# Timeout Configuration
|
||||||
|
###################
|
||||||
|
readonly CMD_TIMEOUT=30 # Default timeout in seconds for external commands
|
||||||
|
|
||||||
###################
|
###################
|
||||||
# Color Definitions
|
# Color Definitions
|
||||||
###################
|
###################
|
||||||
@@ -105,7 +110,9 @@ get_disk_health() {
|
|||||||
while IFS= read -r disk; do
|
while IFS= read -r disk; do
|
||||||
[[ -z "$disk" ]] && continue
|
[[ -z "$disk" ]] && continue
|
||||||
echo -e "\nChecking /dev/$disk:"
|
echo -e "\nChecking /dev/$disk:"
|
||||||
smartctl -H "/dev/$disk"
|
if ! timeout $CMD_TIMEOUT smartctl -H "/dev/$disk"; then
|
||||||
|
log_message warn "smartctl timed out or failed for /dev/$disk"
|
||||||
|
fi
|
||||||
done < <(lsblk -d -o name | grep -E '^sd|^nvme')
|
done < <(lsblk -d -o name | grep -E '^sd|^nvme')
|
||||||
else
|
else
|
||||||
log_message warn "smartctl not found. Install smartmontools for disk health monitoring"
|
log_message warn "smartctl not found. Install smartmontools for disk health monitoring"
|
||||||
@@ -419,16 +426,16 @@ get_ceph_health() {
|
|||||||
echo -e "\n${GREEN}=== Ceph Cluster Health ===${NC}"
|
echo -e "\n${GREEN}=== Ceph Cluster Health ===${NC}"
|
||||||
if command -v ceph >/dev/null 2>&1; then
|
if command -v ceph >/dev/null 2>&1; then
|
||||||
echo -e "${GREEN}Health Status:${NC}"
|
echo -e "${GREEN}Health Status:${NC}"
|
||||||
ceph health detail 2>/dev/null || log_message warn "Cannot connect to Ceph cluster"
|
timeout $CMD_TIMEOUT ceph health detail 2>/dev/null || log_message warn "Cannot connect to Ceph cluster or timed out"
|
||||||
|
|
||||||
echo -e "\n${GREEN}=== Ceph OSD Tree ===${NC}"
|
echo -e "\n${GREEN}=== Ceph OSD Tree ===${NC}"
|
||||||
ceph osd tree 2>/dev/null || true
|
timeout $CMD_TIMEOUT ceph osd tree 2>/dev/null || log_message warn "Ceph OSD tree timed out"
|
||||||
|
|
||||||
echo -e "\n${GREEN}=== Ceph Pool Usage ===${NC}"
|
echo -e "\n${GREEN}=== Ceph Pool Usage ===${NC}"
|
||||||
ceph df 2>/dev/null || true
|
timeout $CMD_TIMEOUT ceph df 2>/dev/null || log_message warn "Ceph df timed out"
|
||||||
|
|
||||||
echo -e "\n${GREEN}=== Ceph OSD Usage ===${NC}"
|
echo -e "\n${GREEN}=== Ceph OSD Usage ===${NC}"
|
||||||
ceph osd df 2>/dev/null || true
|
timeout $CMD_TIMEOUT ceph osd df 2>/dev/null || log_message warn "Ceph OSD df timed out"
|
||||||
else
|
else
|
||||||
log_message info "Ceph tools not installed on this node"
|
log_message info "Ceph tools not installed on this node"
|
||||||
fi
|
fi
|
||||||
@@ -492,9 +499,11 @@ quick_health_check() {
|
|||||||
if command -v smartctl >/dev/null 2>&1; then
|
if command -v smartctl >/dev/null 2>&1; then
|
||||||
while IFS= read -r disk; do
|
while IFS= read -r disk; do
|
||||||
[[ -z "$disk" ]] && continue
|
[[ -z "$disk" ]] && continue
|
||||||
health=$(smartctl -H "/dev/$disk" 2>/dev/null | grep -i "health" | awk -F: '{print $2}' | xargs)
|
health=$(timeout $CMD_TIMEOUT smartctl -H "/dev/$disk" 2>/dev/null | grep -i "health" | awk -F: '{print $2}' | xargs)
|
||||||
if [[ -n "$health" ]]; then
|
if [[ -n "$health" ]]; then
|
||||||
echo -e "/dev/$disk: $health"
|
echo -e "/dev/$disk: $health"
|
||||||
|
else
|
||||||
|
echo -e "/dev/$disk: ${YELLOW}check timed out or unavailable${NC}"
|
||||||
fi
|
fi
|
||||||
done < <(lsblk -d -o name | grep -E '^sd|^nvme')
|
done < <(lsblk -d -o name | grep -E '^sd|^nvme')
|
||||||
fi
|
fi
|
||||||
|
|||||||
Reference in New Issue
Block a user