Improve Ceph OSD parsing reliability with caching
Replace fragile per-device ceph-volume parsing (grep -B 20) with a single upfront query that builds lookup tables. New build_ceph_cache function: - Parses ceph-volume lvm list output using proper block detection - Extracts OSD IDs by matching "====== osd.X =======" headers - Maps block devices to their corresponding OSDs - Queries ceph osd tree once for all status info - Creates CEPH_DEVICE_TO_OSD, CEPH_OSD_STATUS, CEPH_OSD_IN arrays This is both more reliable and more efficient. Fixes: #9 Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -336,6 +336,67 @@ build_drive_map() {
|
|||||||
fi
|
fi
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#------------------------------------------------------------------------------
|
||||||
|
# build_ceph_cache
|
||||||
|
#
|
||||||
|
# Queries Ceph once and builds lookup tables for OSD information.
|
||||||
|
# This is much more efficient than querying ceph-volume per device.
|
||||||
|
#
|
||||||
|
# Sets global associative arrays:
|
||||||
|
# CEPH_DEVICE_TO_OSD - Maps device names to OSD IDs (e.g., sda -> osd.5)
|
||||||
|
# CEPH_OSD_STATUS - Maps OSD numbers to up/down status
|
||||||
|
# CEPH_OSD_IN - Maps OSD numbers to in/out status
|
||||||
|
#------------------------------------------------------------------------------
|
||||||
|
build_ceph_cache() {
|
||||||
|
declare -g -A CEPH_DEVICE_TO_OSD=()
|
||||||
|
declare -g -A CEPH_OSD_STATUS=()
|
||||||
|
declare -g -A CEPH_OSD_IN=()
|
||||||
|
|
||||||
|
# Skip if ceph-volume is not available
|
||||||
|
if ! command -v ceph-volume &>/dev/null; then
|
||||||
|
return
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Parse ceph-volume lvm list output
|
||||||
|
# Format: blocks starting with "====== osd.X =======" followed by device info
|
||||||
|
local current_osd=""
|
||||||
|
while IFS= read -r line; do
|
||||||
|
# Match OSD header: "====== osd.5 ======="
|
||||||
|
if [[ "$line" =~ ======[[:space:]]+osd\.([0-9]+)[[:space:]]+======= ]]; then
|
||||||
|
current_osd="osd.${BASH_REMATCH[1]}"
|
||||||
|
# Match block device line: " block device /dev/sda"
|
||||||
|
elif [[ -n "$current_osd" && "$line" =~ block[[:space:]]device[[:space:]]+/dev/([^[:space:]]+) ]]; then
|
||||||
|
local dev_name="${BASH_REMATCH[1]}"
|
||||||
|
CEPH_DEVICE_TO_OSD[$dev_name]="$current_osd"
|
||||||
|
fi
|
||||||
|
done < <(ceph-volume lvm list 2>/dev/null)
|
||||||
|
|
||||||
|
# Skip if ceph command is not available
|
||||||
|
if ! command -v ceph &>/dev/null; then
|
||||||
|
return
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Parse ceph osd tree for status
|
||||||
|
# Format: ID CLASS WEIGHT TYPE NAME STATUS REWEIGHT
|
||||||
|
while IFS= read -r line; do
|
||||||
|
# Match OSD lines: " 5 hdd 3.63660 osd.5 up 1.00000"
|
||||||
|
if [[ "$line" =~ ^[[:space:]]*([0-9]+)[[:space:]]+.*osd\.([0-9]+)[[:space:]]+(up|down)[[:space:]]+([0-9.]+) ]]; then
|
||||||
|
local osd_num="${BASH_REMATCH[1]}"
|
||||||
|
local status="${BASH_REMATCH[3]}"
|
||||||
|
local reweight="${BASH_REMATCH[4]}"
|
||||||
|
|
||||||
|
CEPH_OSD_STATUS[$osd_num]="$status"
|
||||||
|
|
||||||
|
# Determine in/out based on reweight
|
||||||
|
if awk "BEGIN {exit !($reweight > 0)}"; then
|
||||||
|
CEPH_OSD_IN[$osd_num]="in"
|
||||||
|
else
|
||||||
|
CEPH_OSD_IN[$osd_num]="out"
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
done < <(ceph osd tree 2>/dev/null)
|
||||||
|
}
|
||||||
|
|
||||||
#------------------------------------------------------------------------------
|
#------------------------------------------------------------------------------
|
||||||
# get_drive_smart_info
|
# get_drive_smart_info
|
||||||
#
|
#
|
||||||
@@ -447,7 +508,10 @@ esac
|
|||||||
# Drive Details Section
|
# Drive Details Section
|
||||||
#------------------------------------------------------------------------------
|
#------------------------------------------------------------------------------
|
||||||
|
|
||||||
echo -e "\n=== Drive Details with SMART Status (by Bay Position) ==="
|
# Build Ceph OSD cache (single query instead of per-device)
|
||||||
|
build_ceph_cache
|
||||||
|
|
||||||
|
printf "\n=== Drive Details with SMART Status (by Bay Position) ===\n"
|
||||||
printf "%-5s %-15s %-10s %-8s %-8s %-8s %-30s %-20s %-12s %-10s %-10s\n" "BAY" "DEVICE" "SIZE" "TYPE" "TEMP" "HEALTH" "MODEL" "SERIAL" "CEPH OSD" "STATUS" "USAGE"
|
printf "%-5s %-15s %-10s %-8s %-8s %-8s %-30s %-20s %-12s %-10s %-10s\n" "BAY" "DEVICE" "SIZE" "TYPE" "TEMP" "HEALTH" "MODEL" "SERIAL" "CEPH OSD" "STATUS" "USAGE"
|
||||||
echo "----------------------------------------------------------------------------------------------------------------------------------------------------"
|
echo "----------------------------------------------------------------------------------------------------------------------------------------------------"
|
||||||
|
|
||||||
@@ -470,31 +534,15 @@ for bay in $all_bays; do
|
|||||||
smart_info=$(get_drive_smart_info "$device")
|
smart_info=$(get_drive_smart_info "$device")
|
||||||
IFS='|' read -r type temp health model serial <<< "$smart_info"
|
IFS='|' read -r type temp health model serial <<< "$smart_info"
|
||||||
|
|
||||||
# Check for Ceph OSD
|
# Check for Ceph OSD using cached data
|
||||||
osd_id=$(ceph-volume lvm list 2>/dev/null | grep -B 20 "/dev/$device" | grep "osd id" | awk '{print "osd."$3}' | head -1)
|
osd_id="${CEPH_DEVICE_TO_OSD[$device]:-}"
|
||||||
|
|
||||||
# Get Ceph status if OSD exists
|
|
||||||
ceph_status="-"
|
ceph_status="-"
|
||||||
|
|
||||||
if [[ -n "$osd_id" ]]; then
|
if [[ -n "$osd_id" ]]; then
|
||||||
# Get in/out and up/down status from ceph osd tree
|
# Get status from cached OSD tree data
|
||||||
osd_num=$(echo "$osd_id" | sed 's/osd\.//')
|
osd_num="${osd_id#osd.}"
|
||||||
# Parse ceph osd tree output - column 5 is STATUS (up/down), column 6 is REWEIGHT (1.0 = in, 0 = out)
|
up_status="${CEPH_OSD_STATUS[$osd_num]:-unknown}"
|
||||||
tree_line=$(ceph osd tree 2>/dev/null | grep -E "^\s*${osd_num}\s+" | grep "osd.${osd_num}")
|
in_status="${CEPH_OSD_IN[$osd_num]:-out}"
|
||||||
up_status=$(echo "$tree_line" | awk '{print $5}')
|
|
||||||
reweight=$(echo "$tree_line" | awk '{print $6}')
|
|
||||||
|
|
||||||
# Default to unknown if we can't parse
|
|
||||||
[[ -z "$up_status" ]] && up_status="unknown"
|
|
||||||
[[ -z "$reweight" ]] && reweight="0"
|
|
||||||
|
|
||||||
# Determine in/out based on reweight (1.0 = in, 0 = out)
|
|
||||||
# Use awk for floating point comparison (more portable than bc)
|
|
||||||
if awk "BEGIN {exit !($reweight > 0)}"; then
|
|
||||||
in_status="in"
|
|
||||||
else
|
|
||||||
in_status="out"
|
|
||||||
fi
|
|
||||||
|
|
||||||
ceph_status="${up_status}/${in_status}"
|
ceph_status="${up_status}/${in_status}"
|
||||||
else
|
else
|
||||||
osd_id="-"
|
osd_id="-"
|
||||||
|
|||||||
Reference in New Issue
Block a user