Fixed parsing of ceph osd tree output: - Column 5 is STATUS (up/down) not column 6 - Column 6 is REWEIGHT (1.0 = in, 0 = out) - Now correctly shows up/in for active OSDs 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
310 lines
14 KiB
Bash
310 lines
14 KiB
Bash
#!/bin/bash
|
|
|
|
#==============================================================================
|
|
# Drive Atlas - Server Drive Mapping Tool
|
|
# Maps physical drive bays to logical device names using PCI paths
|
|
#==============================================================================
|
|
|
|
#------------------------------------------------------------------------------
|
|
# Chassis Type Definitions
|
|
# These define the physical layout and display formatting for each chassis type
|
|
#------------------------------------------------------------------------------
|
|
|
|
generate_10bay_layout() {
|
|
local hostname=$1
|
|
build_drive_map
|
|
|
|
# Fixed width for consistent box drawing (fits device names like "nvme0n1")
|
|
local drive_width=10
|
|
|
|
# Main chassis section
|
|
printf "┌────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐\n"
|
|
printf "│ %-126s │\n" "$hostname - 10-Bay Hot-swap Chassis"
|
|
printf "│ │\n"
|
|
|
|
# M.2 NVMe slot if present
|
|
if [[ -n "${DRIVE_MAP[m2-1]}" ]]; then
|
|
printf "│ M.2 NVMe: %-10s │\n" "${DRIVE_MAP[m2-1]}"
|
|
printf "│ │\n"
|
|
fi
|
|
|
|
printf "│ Front Hot-swap Bays: │\n"
|
|
printf "│ │\n"
|
|
|
|
# Bay top borders
|
|
printf "│ "
|
|
for bay in {1..10}; do
|
|
printf "┌──────────┐ "
|
|
done
|
|
printf " │\n"
|
|
|
|
# Bay contents
|
|
printf "│ "
|
|
for bay in {1..10}; do
|
|
printf "│%-2d:%-7s│ " "$bay" "${DRIVE_MAP[$bay]:-EMPTY}"
|
|
done
|
|
printf " │\n"
|
|
|
|
# Bay bottom borders
|
|
printf "│ "
|
|
for bay in {1..10}; do
|
|
printf "└──────────┘ "
|
|
done
|
|
printf " │\n"
|
|
|
|
printf "└────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘\n"
|
|
}
|
|
|
|
generate_large1_layout() {
|
|
build_drive_map
|
|
|
|
cat << 'EOF'
|
|
┌─────────────────────────────────────────────────────────────┐
|
|
│ large1 │
|
|
│ Unique 3x5 Grid Chassis │
|
|
│ │
|
|
│ ┌──────────────────────────────────────────────┐ │
|
|
│ │ Motherboard │ │
|
|
│ │ │ │
|
|
│ │ ┌──┐┌──┐ │ │
|
|
│ │ │M1││M2│ │ │
|
|
│ │ └──┘└──┘ │ │
|
|
│ └──────────────────────────────────────────────┘ │
|
|
│ │
|
|
│ ┌─────────────────┐ ┌─────────────────┐ ┌─────────────────┐ │
|
|
│ │ │ │ │ │ │ │
|
|
│ │ 1 │ │ 2 │ │ 3 │ │
|
|
│ │ │ │ │ │ │ │
|
|
│ └─────────────────┘ └─────────────────┘ └─────────────────┘ │
|
|
│ ┌─────────────────┐ ┌─────────────────┐ ┌─────────────────┐ │
|
|
│ │ │ │ │ │ │ │
|
|
│ │ 4 │ │ 5 │ │ 6 │ │
|
|
│ │ │ │ │ │ │ │
|
|
│ └─────────────────┘ └─────────────────┘ └─────────────────┘ │
|
|
│ ┌─────────────────┐ ┌─────────────────┐ ┌─────────────────┐ │
|
|
│ │ │ │ │ │ │ │
|
|
│ │ 7 │ │ 8 │ │ 9 │ │
|
|
│ │ │ │ │ │ │ │
|
|
│ └─────────────────┘ └─────────────────┘ └─────────────────┘ │
|
|
└─────────────────────────────────────────────────────────────┘
|
|
EOF
|
|
}
|
|
|
|
#------------------------------------------------------------------------------
|
|
# Server-Specific Drive Mappings
|
|
# Maps PCI paths to physical bay numbers for each server
|
|
# Format: "pci-path bay-number"
|
|
#------------------------------------------------------------------------------
|
|
|
|
declare -A SERVER_MAPPINGS=(
|
|
# compute-storage-01 (formerly medium2)
|
|
# Motherboard: B650D4U3-2Q/BCM with AMD SATA controller
|
|
# HBA: LSI SAS3008 at 01:00.0 (mini-SAS HD ports)
|
|
# Cable mapping from user notes:
|
|
# - Mobo SATA: top-right=bay1, bottom-right=bay2, bottom-left=bay3, top-left=bay4
|
|
# - HBA bottom mini-SAS: bays 5,6,7,8
|
|
# - HBA top mini-SAS: bays 9,10
|
|
["compute-storage-01"]="
|
|
pci-0000:0d:00.0-ata-2 1
|
|
pci-0000:0d:00.0-ata-1 2
|
|
pci-0000:0d:00.0-ata-3 3
|
|
pci-0000:0d:00.0-ata-4 4
|
|
pci-0000:01:00.0-sas-phy6-lun-0 5
|
|
pci-0000:01:00.0-sas-phy7-lun-0 6
|
|
pci-0000:01:00.0-sas-phy5-lun-0 7
|
|
pci-0000:01:00.0-sas-phy2-lun-0 8
|
|
pci-0000:01:00.0-sas-phy4-lun-0 9
|
|
pci-0000:01:00.0-sas-phy3-lun-0 10
|
|
pci-0000:0e:00.0-nvme-1 m2-1
|
|
"
|
|
|
|
# storage-01
|
|
# Different motherboard, no HBA currently
|
|
# TODO: Map actual PCI paths after running diagnose-drives.sh
|
|
["storage-01"]="
|
|
"
|
|
|
|
# large1
|
|
# Unique chassis - 1/1 configuration
|
|
# TODO: Map actual PCI paths after running diagnose-drives.sh
|
|
["large1"]="
|
|
"
|
|
)
|
|
|
|
declare -A CHASSIS_TYPES=(
|
|
["compute-storage-01"]="10bay"
|
|
["compute-storage-gpu-01"]="10bay"
|
|
["storage-01"]="10bay"
|
|
["large1"]="large1"
|
|
["micro1"]="micro"
|
|
["monitor-02"]="micro"
|
|
)
|
|
|
|
#------------------------------------------------------------------------------
|
|
# Core Functions
|
|
#------------------------------------------------------------------------------
|
|
|
|
build_drive_map() {
|
|
local host=$(hostname)
|
|
declare -A drive_map
|
|
|
|
local mapping=${SERVER_MAPPINGS[$host]}
|
|
|
|
if [[ -n "$mapping" ]]; then
|
|
while read -r path slot; do
|
|
[[ -z "$path" || -z "$slot" ]] && continue
|
|
|
|
if [[ -L "/dev/disk/by-path/$path" ]]; then
|
|
local drive=$(readlink -f "/dev/disk/by-path/$path" | sed 's/.*\///')
|
|
drive_map[$slot]=$drive
|
|
fi
|
|
done <<< "$mapping"
|
|
fi
|
|
|
|
# Make drive_map available globally
|
|
declare -g -A DRIVE_MAP=()
|
|
for key in "${!drive_map[@]}"; do
|
|
DRIVE_MAP[$key]=${drive_map[$key]}
|
|
done
|
|
}
|
|
|
|
get_drive_smart_info() {
|
|
local device=$1
|
|
local smart_info=$(sudo smartctl -A -i -H /dev/$device 2>/dev/null)
|
|
local temp=$(echo "$smart_info" | grep "Temperature" | awk '{print $10}' | head -1)
|
|
local type=$(echo "$smart_info" | grep "Rotation Rate" | grep -q "Solid State" && echo "SSD" || echo "HDD")
|
|
local health=$(echo "$smart_info" | grep "SMART overall-health" | grep -q "PASSED" && echo "✓" || echo "✗")
|
|
local model=$(echo "$smart_info" | grep "Device Model\|Model Number" | cut -d: -f2 | xargs)
|
|
local serial=$(echo "$smart_info" | grep "Serial Number" | awk '{print $3}')
|
|
|
|
echo "$type|$temp°C|$health|$model|$serial"
|
|
}
|
|
|
|
#------------------------------------------------------------------------------
|
|
# Main Display Logic
|
|
#------------------------------------------------------------------------------
|
|
|
|
HOSTNAME=$(hostname)
|
|
CHASSIS_TYPE=${CHASSIS_TYPES[$HOSTNAME]:-"unknown"}
|
|
|
|
# Display chassis layout
|
|
case "$CHASSIS_TYPE" in
|
|
"10bay")
|
|
generate_10bay_layout "$HOSTNAME"
|
|
;;
|
|
"large1")
|
|
generate_large1_layout
|
|
;;
|
|
"micro")
|
|
echo "Micro server layout not yet implemented"
|
|
;;
|
|
*)
|
|
echo "┌─────────────────────────────────────────────────────────┐"
|
|
echo "│ Unknown server: $HOSTNAME"
|
|
echo "│ No chassis mapping defined yet"
|
|
echo "│ Run diagnose-drives.sh to gather PCI path information"
|
|
echo "└─────────────────────────────────────────────────────────┘"
|
|
;;
|
|
esac
|
|
|
|
#------------------------------------------------------------------------------
|
|
# Drive Details Section
|
|
#------------------------------------------------------------------------------
|
|
|
|
echo -e "\n=== Drive Details with SMART Status (by Bay Position) ==="
|
|
printf "%-5s %-15s %-10s %-8s %-8s %-8s %-30s %-20s %-12s %-10s %-10s\n" "BAY" "DEVICE" "SIZE" "TYPE" "TEMP" "HEALTH" "MODEL" "SERIAL" "CEPH OSD" "STATUS" "USAGE"
|
|
echo "----------------------------------------------------------------------------------------------------------------------------------------------------"
|
|
|
|
# Build reverse map: device -> bay
|
|
declare -A DEVICE_TO_BAY
|
|
for bay in "${!DRIVE_MAP[@]}"; do
|
|
device="${DRIVE_MAP[$bay]}"
|
|
if [[ -n "$device" && "$device" != "EMPTY" ]]; then
|
|
DEVICE_TO_BAY[$device]=$bay
|
|
fi
|
|
done
|
|
|
|
# Sort drives by bay position
|
|
for bay in $(printf '%s\n' "${!DRIVE_MAP[@]}" | grep -E '^[0-9]+$' | sort -n); do
|
|
device="${DRIVE_MAP[$bay]}"
|
|
if [[ -n "$device" && "$device" != "EMPTY" && -b "/dev/$device" ]]; then
|
|
size=$(lsblk -d -n -o SIZE "/dev/$device" 2>/dev/null)
|
|
smart_info=$(get_drive_smart_info "$device")
|
|
IFS='|' read -r type temp health model serial <<< "$smart_info"
|
|
|
|
# Check for Ceph OSD
|
|
osd_id=$(ceph-volume lvm list 2>/dev/null | grep -B 20 "/dev/$device" | grep "osd id" | awk '{print "osd."$3}' | head -1)
|
|
|
|
# Get Ceph status if OSD exists
|
|
ceph_status="-"
|
|
if [[ -n "$osd_id" ]]; then
|
|
# Get in/out and up/down status from ceph osd tree
|
|
osd_num=$(echo "$osd_id" | sed 's/osd\.//')
|
|
# Parse ceph osd tree output - column 5 is STATUS (up/down), column 6 is REWEIGHT (1.0 = in, 0 = out)
|
|
tree_line=$(ceph osd tree 2>/dev/null | grep -E "^\s*${osd_num}\s+" | grep "osd.${osd_num}")
|
|
up_status=$(echo "$tree_line" | awk '{print $5}')
|
|
reweight=$(echo "$tree_line" | awk '{print $6}')
|
|
|
|
# Default to unknown if we can't parse
|
|
[[ -z "$up_status" ]] && up_status="unknown"
|
|
[[ -z "$reweight" ]] && reweight="0"
|
|
|
|
# Determine in/out based on reweight (1.0 = in, 0 = out)
|
|
if (( $(echo "$reweight > 0" | bc -l 2>/dev/null || echo 0) )); then
|
|
in_status="in"
|
|
else
|
|
in_status="out"
|
|
fi
|
|
|
|
ceph_status="${up_status}/${in_status}"
|
|
else
|
|
osd_id="-"
|
|
fi
|
|
|
|
# Check if boot drive
|
|
usage="-"
|
|
if mount | grep -q "^/dev/${device}"; then
|
|
mount_point=$(mount | grep "^/dev/${device}" | awk '{print $3}' | head -1)
|
|
if [[ "$mount_point" == "/" ]]; then
|
|
usage="BOOT"
|
|
else
|
|
usage="$mount_point"
|
|
fi
|
|
fi
|
|
|
|
printf "%-5s %-15s %-10s %-8s %-8s %-8s %-30s %-20s %-12s %-10s %-10s\n" "$bay" "/dev/$device" "$size" "$type" "$temp" "$health" "$model" "$serial" "$osd_id" "$ceph_status" "$usage"
|
|
fi
|
|
done
|
|
|
|
# NVMe drives
|
|
if command -v nvme >/dev/null 2>&1; then
|
|
nvme_drives=$(sudo nvme list 2>/dev/null | grep "^/dev")
|
|
if [ -n "$nvme_drives" ]; then
|
|
echo -e "\n=== NVMe Drives ==="
|
|
printf "%-15s %-10s %-10s %-40s %-20s\n" "DEVICE" "SIZE" "TYPE" "MODEL" "SERIAL"
|
|
echo "----------------------------------------------------------------------------------------------------------------------------"
|
|
echo "$nvme_drives" | awk '{printf "%-15s %-10s %-10s %-40s %-20s\n", $1, $6, "NVMe", $3, $2}'
|
|
fi
|
|
fi
|
|
|
|
#------------------------------------------------------------------------------
|
|
# Optional sections
|
|
#------------------------------------------------------------------------------
|
|
|
|
# Ceph RBD Devices
|
|
rbd_output=$(lsblk -o NAME,SIZE,TYPE,MOUNTPOINT 2>/dev/null | grep "rbd" | sort -V)
|
|
if [ -n "$rbd_output" ]; then
|
|
echo -e "\n=== Ceph RBD Devices ==="
|
|
printf "%-15s %-10s %-10s %-20s\n" "DEVICE" "SIZE" "TYPE" "MOUNTPOINT"
|
|
echo "------------------------------------------------------------"
|
|
echo "$rbd_output"
|
|
fi
|
|
|
|
# Show mapping diagnostic info if DEBUG is set
|
|
if [[ -n "$DEBUG" ]]; then
|
|
echo -e "\n=== DEBUG: Drive Mappings ==="
|
|
for key in "${!DRIVE_MAP[@]}"; do
|
|
echo "Bay $key: ${DRIVE_MAP[$key]}"
|
|
done | sort -n
|
|
fi
|