Improve Ceph OSD parsing reliability with caching
Replace fragile per-device ceph-volume parsing (grep -B 20) with a single upfront query that builds lookup tables. New build_ceph_cache function: - Parses ceph-volume lvm list output using proper block detection - Extracts OSD IDs by matching "====== osd.X =======" headers - Maps block devices to their corresponding OSDs - Queries ceph osd tree once for all status info - Creates CEPH_DEVICE_TO_OSD, CEPH_OSD_STATUS, CEPH_OSD_IN arrays This is both more reliable and more efficient. Fixes: #9 Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -336,6 +336,67 @@ build_drive_map() {
|
||||
fi
|
||||
}
|
||||
|
||||
#------------------------------------------------------------------------------
|
||||
# build_ceph_cache
|
||||
#
|
||||
# Queries Ceph once and builds lookup tables for OSD information.
|
||||
# This is much more efficient than querying ceph-volume per device.
|
||||
#
|
||||
# Sets global associative arrays:
|
||||
# CEPH_DEVICE_TO_OSD - Maps device names to OSD IDs (e.g., sda -> osd.5)
|
||||
# CEPH_OSD_STATUS - Maps OSD numbers to up/down status
|
||||
# CEPH_OSD_IN - Maps OSD numbers to in/out status
|
||||
#------------------------------------------------------------------------------
|
||||
build_ceph_cache() {
|
||||
declare -g -A CEPH_DEVICE_TO_OSD=()
|
||||
declare -g -A CEPH_OSD_STATUS=()
|
||||
declare -g -A CEPH_OSD_IN=()
|
||||
|
||||
# Skip if ceph-volume is not available
|
||||
if ! command -v ceph-volume &>/dev/null; then
|
||||
return
|
||||
fi
|
||||
|
||||
# Parse ceph-volume lvm list output
|
||||
# Format: blocks starting with "====== osd.X =======" followed by device info
|
||||
local current_osd=""
|
||||
while IFS= read -r line; do
|
||||
# Match OSD header: "====== osd.5 ======="
|
||||
if [[ "$line" =~ ======[[:space:]]+osd\.([0-9]+)[[:space:]]+======= ]]; then
|
||||
current_osd="osd.${BASH_REMATCH[1]}"
|
||||
# Match block device line: " block device /dev/sda"
|
||||
elif [[ -n "$current_osd" && "$line" =~ block[[:space:]]device[[:space:]]+/dev/([^[:space:]]+) ]]; then
|
||||
local dev_name="${BASH_REMATCH[1]}"
|
||||
CEPH_DEVICE_TO_OSD[$dev_name]="$current_osd"
|
||||
fi
|
||||
done < <(ceph-volume lvm list 2>/dev/null)
|
||||
|
||||
# Skip if ceph command is not available
|
||||
if ! command -v ceph &>/dev/null; then
|
||||
return
|
||||
fi
|
||||
|
||||
# Parse ceph osd tree for status
|
||||
# Format: ID CLASS WEIGHT TYPE NAME STATUS REWEIGHT
|
||||
while IFS= read -r line; do
|
||||
# Match OSD lines: " 5 hdd 3.63660 osd.5 up 1.00000"
|
||||
if [[ "$line" =~ ^[[:space:]]*([0-9]+)[[:space:]]+.*osd\.([0-9]+)[[:space:]]+(up|down)[[:space:]]+([0-9.]+) ]]; then
|
||||
local osd_num="${BASH_REMATCH[1]}"
|
||||
local status="${BASH_REMATCH[3]}"
|
||||
local reweight="${BASH_REMATCH[4]}"
|
||||
|
||||
CEPH_OSD_STATUS[$osd_num]="$status"
|
||||
|
||||
# Determine in/out based on reweight
|
||||
if awk "BEGIN {exit !($reweight > 0)}"; then
|
||||
CEPH_OSD_IN[$osd_num]="in"
|
||||
else
|
||||
CEPH_OSD_IN[$osd_num]="out"
|
||||
fi
|
||||
fi
|
||||
done < <(ceph osd tree 2>/dev/null)
|
||||
}
|
||||
|
||||
#------------------------------------------------------------------------------
|
||||
# get_drive_smart_info
|
||||
#
|
||||
@@ -447,7 +508,10 @@ esac
|
||||
# Drive Details Section
|
||||
#------------------------------------------------------------------------------
|
||||
|
||||
echo -e "\n=== Drive Details with SMART Status (by Bay Position) ==="
|
||||
# Build Ceph OSD cache (single query instead of per-device)
|
||||
build_ceph_cache
|
||||
|
||||
printf "\n=== Drive Details with SMART Status (by Bay Position) ===\n"
|
||||
printf "%-5s %-15s %-10s %-8s %-8s %-8s %-30s %-20s %-12s %-10s %-10s\n" "BAY" "DEVICE" "SIZE" "TYPE" "TEMP" "HEALTH" "MODEL" "SERIAL" "CEPH OSD" "STATUS" "USAGE"
|
||||
echo "----------------------------------------------------------------------------------------------------------------------------------------------------"
|
||||
|
||||
@@ -470,31 +534,15 @@ for bay in $all_bays; do
|
||||
smart_info=$(get_drive_smart_info "$device")
|
||||
IFS='|' read -r type temp health model serial <<< "$smart_info"
|
||||
|
||||
# Check for Ceph OSD
|
||||
osd_id=$(ceph-volume lvm list 2>/dev/null | grep -B 20 "/dev/$device" | grep "osd id" | awk '{print "osd."$3}' | head -1)
|
||||
|
||||
# Get Ceph status if OSD exists
|
||||
# Check for Ceph OSD using cached data
|
||||
osd_id="${CEPH_DEVICE_TO_OSD[$device]:-}"
|
||||
ceph_status="-"
|
||||
|
||||
if [[ -n "$osd_id" ]]; then
|
||||
# Get in/out and up/down status from ceph osd tree
|
||||
osd_num=$(echo "$osd_id" | sed 's/osd\.//')
|
||||
# Parse ceph osd tree output - column 5 is STATUS (up/down), column 6 is REWEIGHT (1.0 = in, 0 = out)
|
||||
tree_line=$(ceph osd tree 2>/dev/null | grep -E "^\s*${osd_num}\s+" | grep "osd.${osd_num}")
|
||||
up_status=$(echo "$tree_line" | awk '{print $5}')
|
||||
reweight=$(echo "$tree_line" | awk '{print $6}')
|
||||
|
||||
# Default to unknown if we can't parse
|
||||
[[ -z "$up_status" ]] && up_status="unknown"
|
||||
[[ -z "$reweight" ]] && reweight="0"
|
||||
|
||||
# Determine in/out based on reweight (1.0 = in, 0 = out)
|
||||
# Use awk for floating point comparison (more portable than bc)
|
||||
if awk "BEGIN {exit !($reweight > 0)}"; then
|
||||
in_status="in"
|
||||
else
|
||||
in_status="out"
|
||||
fi
|
||||
|
||||
# Get status from cached OSD tree data
|
||||
osd_num="${osd_id#osd.}"
|
||||
up_status="${CEPH_OSD_STATUS[$osd_num]:-unknown}"
|
||||
in_status="${CEPH_OSD_IN[$osd_num]:-out}"
|
||||
ceph_status="${up_status}/${in_status}"
|
||||
else
|
||||
osd_id="-"
|
||||
|
||||
Reference in New Issue
Block a user