Improve Ceph OSD parsing reliability with caching

Replace fragile per-device ceph-volume parsing (grep -B 20) with a
single upfront query that builds lookup tables.

New build_ceph_cache function:
- Parses ceph-volume lvm list output using proper block detection
- Extracts OSD IDs by matching "====== osd.X =======" headers
- Maps block devices to their corresponding OSDs
- Queries ceph osd tree once for all status info
- Creates CEPH_DEVICE_TO_OSD, CEPH_OSD_STATUS, CEPH_OSD_IN arrays

This is both more reliable and more efficient.

Fixes: #9

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
2026-02-05 11:29:04 -05:00
parent 6b4a985b95
commit 9d39332df3

View File

@@ -336,6 +336,67 @@ build_drive_map() {
fi fi
} }
#------------------------------------------------------------------------------
# build_ceph_cache
#
# Queries Ceph once and builds lookup tables for OSD information.
# This is much more efficient than querying ceph-volume per device.
#
# Sets global associative arrays:
# CEPH_DEVICE_TO_OSD - Maps device names to OSD IDs (e.g., sda -> osd.5)
# CEPH_OSD_STATUS - Maps OSD numbers to up/down status
# CEPH_OSD_IN - Maps OSD numbers to in/out status
#------------------------------------------------------------------------------
build_ceph_cache() {
declare -g -A CEPH_DEVICE_TO_OSD=()
declare -g -A CEPH_OSD_STATUS=()
declare -g -A CEPH_OSD_IN=()
# Skip if ceph-volume is not available
if ! command -v ceph-volume &>/dev/null; then
return
fi
# Parse ceph-volume lvm list output
# Format: blocks starting with "====== osd.X =======" followed by device info
local current_osd=""
while IFS= read -r line; do
# Match OSD header: "====== osd.5 ======="
if [[ "$line" =~ ======[[:space:]]+osd\.([0-9]+)[[:space:]]+======= ]]; then
current_osd="osd.${BASH_REMATCH[1]}"
# Match block device line: " block device /dev/sda"
elif [[ -n "$current_osd" && "$line" =~ block[[:space:]]device[[:space:]]+/dev/([^[:space:]]+) ]]; then
local dev_name="${BASH_REMATCH[1]}"
CEPH_DEVICE_TO_OSD[$dev_name]="$current_osd"
fi
done < <(ceph-volume lvm list 2>/dev/null)
# Skip if ceph command is not available
if ! command -v ceph &>/dev/null; then
return
fi
# Parse ceph osd tree for status
# Format: ID CLASS WEIGHT TYPE NAME STATUS REWEIGHT
while IFS= read -r line; do
# Match OSD lines: " 5 hdd 3.63660 osd.5 up 1.00000"
if [[ "$line" =~ ^[[:space:]]*([0-9]+)[[:space:]]+.*osd\.([0-9]+)[[:space:]]+(up|down)[[:space:]]+([0-9.]+) ]]; then
local osd_num="${BASH_REMATCH[1]}"
local status="${BASH_REMATCH[3]}"
local reweight="${BASH_REMATCH[4]}"
CEPH_OSD_STATUS[$osd_num]="$status"
# Determine in/out based on reweight
if awk "BEGIN {exit !($reweight > 0)}"; then
CEPH_OSD_IN[$osd_num]="in"
else
CEPH_OSD_IN[$osd_num]="out"
fi
fi
done < <(ceph osd tree 2>/dev/null)
}
#------------------------------------------------------------------------------ #------------------------------------------------------------------------------
# get_drive_smart_info # get_drive_smart_info
# #
@@ -447,7 +508,10 @@ esac
# Drive Details Section # Drive Details Section
#------------------------------------------------------------------------------ #------------------------------------------------------------------------------
echo -e "\n=== Drive Details with SMART Status (by Bay Position) ===" # Build Ceph OSD cache (single query instead of per-device)
build_ceph_cache
printf "\n=== Drive Details with SMART Status (by Bay Position) ===\n"
printf "%-5s %-15s %-10s %-8s %-8s %-8s %-30s %-20s %-12s %-10s %-10s\n" "BAY" "DEVICE" "SIZE" "TYPE" "TEMP" "HEALTH" "MODEL" "SERIAL" "CEPH OSD" "STATUS" "USAGE" printf "%-5s %-15s %-10s %-8s %-8s %-8s %-30s %-20s %-12s %-10s %-10s\n" "BAY" "DEVICE" "SIZE" "TYPE" "TEMP" "HEALTH" "MODEL" "SERIAL" "CEPH OSD" "STATUS" "USAGE"
echo "----------------------------------------------------------------------------------------------------------------------------------------------------" echo "----------------------------------------------------------------------------------------------------------------------------------------------------"
@@ -470,31 +534,15 @@ for bay in $all_bays; do
smart_info=$(get_drive_smart_info "$device") smart_info=$(get_drive_smart_info "$device")
IFS='|' read -r type temp health model serial <<< "$smart_info" IFS='|' read -r type temp health model serial <<< "$smart_info"
# Check for Ceph OSD # Check for Ceph OSD using cached data
osd_id=$(ceph-volume lvm list 2>/dev/null | grep -B 20 "/dev/$device" | grep "osd id" | awk '{print "osd."$3}' | head -1) osd_id="${CEPH_DEVICE_TO_OSD[$device]:-}"
# Get Ceph status if OSD exists
ceph_status="-" ceph_status="-"
if [[ -n "$osd_id" ]]; then if [[ -n "$osd_id" ]]; then
# Get in/out and up/down status from ceph osd tree # Get status from cached OSD tree data
osd_num=$(echo "$osd_id" | sed 's/osd\.//') osd_num="${osd_id#osd.}"
# Parse ceph osd tree output - column 5 is STATUS (up/down), column 6 is REWEIGHT (1.0 = in, 0 = out) up_status="${CEPH_OSD_STATUS[$osd_num]:-unknown}"
tree_line=$(ceph osd tree 2>/dev/null | grep -E "^\s*${osd_num}\s+" | grep "osd.${osd_num}") in_status="${CEPH_OSD_IN[$osd_num]:-out}"
up_status=$(echo "$tree_line" | awk '{print $5}')
reweight=$(echo "$tree_line" | awk '{print $6}')
# Default to unknown if we can't parse
[[ -z "$up_status" ]] && up_status="unknown"
[[ -z "$reweight" ]] && reweight="0"
# Determine in/out based on reweight (1.0 = in, 0 = out)
# Use awk for floating point comparison (more portable than bc)
if awk "BEGIN {exit !($reweight > 0)}"; then
in_status="in"
else
in_status="out"
fi
ceph_status="${up_status}/${in_status}" ceph_status="${up_status}/${in_status}"
else else
osd_id="-" osd_id="-"