#!/bin/bash #============================================================================== # Drive Atlas - Server Drive Mapping Tool # Maps physical drive bays to logical device names using PCI paths #============================================================================== #------------------------------------------------------------------------------ # Chassis Type Definitions # These define the physical layout and display formatting for each chassis type #------------------------------------------------------------------------------ generate_10bay_layout() { local hostname=$1 build_drive_map # Fixed width for consistent box drawing (fits device names like "nvme0n1") local drive_width=10 # Main chassis section printf "┌────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐\n" printf "│ %-126s │\n" "$hostname - 10-Bay Hot-swap Chassis" printf "│ │\n" # M.2 NVMe slot if present if [[ -n "${DRIVE_MAP[m2-1]}" ]]; then printf "│ M.2 NVMe: %-10s │\n" "${DRIVE_MAP[m2-1]}" printf "│ │\n" fi printf "│ Front Hot-swap Bays: │\n" printf "│ │\n" # Bay top borders printf "│ " for bay in {1..10}; do printf "┌──────────┐ " done printf " │\n" # Bay contents printf "│ " for bay in {1..10}; do printf "│%-2d:%-7s│ " "$bay" "${DRIVE_MAP[$bay]:-EMPTY}" done printf " │\n" # Bay bottom borders printf "│ " for bay in {1..10}; do printf "└──────────┘ " done printf " │\n" printf "└────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘\n" } generate_large1_layout() { build_drive_map cat << 'EOF' ┌─────────────────────────────────────────────────────────────┐ │ large1 │ │ Unique 3x5 Grid Chassis │ │ │ │ ┌──────────────────────────────────────────────┐ │ │ │ Motherboard │ │ │ │ │ │ │ │ ┌──┐┌──┐ │ │ │ │ │M1││M2│ │ │ │ │ └──┘└──┘ │ │ │ └──────────────────────────────────────────────┘ │ │ │ │ ┌─────────────────┐ ┌─────────────────┐ ┌─────────────────┐ │ │ │ │ │ │ │ │ │ │ │ 1 │ │ 2 │ │ 3 │ │ │ │ │ │ │ │ │ │ │ └─────────────────┘ └─────────────────┘ └─────────────────┘ │ │ ┌─────────────────┐ ┌─────────────────┐ ┌─────────────────┐ │ │ │ │ │ │ │ │ │ │ │ 4 │ │ 5 │ │ 6 │ │ │ │ │ │ │ │ │ │ │ └─────────────────┘ └─────────────────┘ └─────────────────┘ │ │ ┌─────────────────┐ ┌─────────────────┐ ┌─────────────────┐ │ │ │ │ │ │ │ │ │ │ │ 7 │ │ 8 │ │ 9 │ │ │ │ │ │ │ │ │ │ │ └─────────────────┘ └─────────────────┘ └─────────────────┘ │ └─────────────────────────────────────────────────────────────┘ EOF } #------------------------------------------------------------------------------ # Server-Specific Drive Mappings # Maps PCI paths to physical bay numbers for each server # Format: "pci-path bay-number" #------------------------------------------------------------------------------ declare -A SERVER_MAPPINGS=( # compute-storage-01 (formerly medium2) # Motherboard: B650D4U3-2Q/BCM with AMD SATA controller # HBA: LSI SAS3008 at 01:00.0 (mini-SAS HD ports) # Cable mapping from user notes: # - Mobo SATA: top-right=bay1, bottom-right=bay2, bottom-left=bay3, top-left=bay4 # - HBA bottom mini-SAS: bays 5,6,7,8 # - HBA top mini-SAS: bays 9,10 ["compute-storage-01"]=" pci-0000:0d:00.0-ata-2 1 pci-0000:0d:00.0-ata-1 2 pci-0000:0d:00.0-ata-3 3 pci-0000:0d:00.0-ata-4 4 pci-0000:01:00.0-sas-phy6-lun-0 5 pci-0000:01:00.0-sas-phy7-lun-0 6 pci-0000:01:00.0-sas-phy5-lun-0 7 pci-0000:01:00.0-sas-phy2-lun-0 8 pci-0000:01:00.0-sas-phy4-lun-0 9 pci-0000:01:00.0-sas-phy3-lun-0 10 pci-0000:0e:00.0-nvme-1 m2-1 " # storage-01 # Different motherboard, no HBA currently # TODO: Map actual PCI paths after running diagnose-drives.sh ["storage-01"]=" " # large1 # Unique chassis - 1/1 configuration # TODO: Map actual PCI paths after running diagnose-drives.sh ["large1"]=" " ) declare -A CHASSIS_TYPES=( ["compute-storage-01"]="10bay" ["compute-storage-gpu-01"]="10bay" ["storage-01"]="10bay" ["large1"]="large1" ["micro1"]="micro" ["monitor-02"]="micro" ) #------------------------------------------------------------------------------ # Core Functions #------------------------------------------------------------------------------ build_drive_map() { local host=$(hostname) declare -A drive_map local mapping=${SERVER_MAPPINGS[$host]} if [[ -n "$mapping" ]]; then while read -r path slot; do [[ -z "$path" || -z "$slot" ]] && continue if [[ -L "/dev/disk/by-path/$path" ]]; then local drive=$(readlink -f "/dev/disk/by-path/$path" | sed 's/.*\///') drive_map[$slot]=$drive fi done <<< "$mapping" fi # Make drive_map available globally declare -g -A DRIVE_MAP=() for key in "${!drive_map[@]}"; do DRIVE_MAP[$key]=${drive_map[$key]} done } get_drive_smart_info() { local device=$1 local smart_info=$(sudo smartctl -A -i -H /dev/$device 2>/dev/null) local temp=$(echo "$smart_info" | grep "Temperature" | awk '{print $10}' | head -1) local type=$(echo "$smart_info" | grep "Rotation Rate" | grep -q "Solid State" && echo "SSD" || echo "HDD") local health=$(echo "$smart_info" | grep "SMART overall-health" | grep -q "PASSED" && echo "✓" || echo "✗") local model=$(echo "$smart_info" | grep "Device Model\|Model Number" | cut -d: -f2 | xargs) local serial=$(echo "$smart_info" | grep "Serial Number" | awk '{print $3}') echo "$type|$temp°C|$health|$model|$serial" } #------------------------------------------------------------------------------ # Main Display Logic #------------------------------------------------------------------------------ HOSTNAME=$(hostname) CHASSIS_TYPE=${CHASSIS_TYPES[$HOSTNAME]:-"unknown"} # Display chassis layout case "$CHASSIS_TYPE" in "10bay") generate_10bay_layout "$HOSTNAME" ;; "large1") generate_large1_layout ;; "micro") echo "Micro server layout not yet implemented" ;; *) echo "┌─────────────────────────────────────────────────────────┐" echo "│ Unknown server: $HOSTNAME" echo "│ No chassis mapping defined yet" echo "│ Run diagnose-drives.sh to gather PCI path information" echo "└─────────────────────────────────────────────────────────┘" ;; esac #------------------------------------------------------------------------------ # Drive Details Section #------------------------------------------------------------------------------ echo -e "\n=== Drive Details with SMART Status (by Bay Position) ===" printf "%-5s %-15s %-10s %-8s %-8s %-8s %-30s %-20s %-12s %-10s %-10s\n" "BAY" "DEVICE" "SIZE" "TYPE" "TEMP" "HEALTH" "MODEL" "SERIAL" "CEPH OSD" "STATUS" "USAGE" echo "----------------------------------------------------------------------------------------------------------------------------------------------------" # Build reverse map: device -> bay declare -A DEVICE_TO_BAY for bay in "${!DRIVE_MAP[@]}"; do device="${DRIVE_MAP[$bay]}" if [[ -n "$device" && "$device" != "EMPTY" ]]; then DEVICE_TO_BAY[$device]=$bay fi done # Sort drives by bay position for bay in $(printf '%s\n' "${!DRIVE_MAP[@]}" | grep -E '^[0-9]+$' | sort -n); do device="${DRIVE_MAP[$bay]}" if [[ -n "$device" && "$device" != "EMPTY" && -b "/dev/$device" ]]; then size=$(lsblk -d -n -o SIZE "/dev/$device" 2>/dev/null) smart_info=$(get_drive_smart_info "$device") IFS='|' read -r type temp health model serial <<< "$smart_info" # Check for Ceph OSD osd_id=$(ceph-volume lvm list 2>/dev/null | grep -B 20 "/dev/$device" | grep "osd id" | awk '{print "osd."$3}' | head -1) # Get Ceph status if OSD exists ceph_status="-" if [[ -n "$osd_id" ]]; then # Get in/out and up/down status from ceph osd tree osd_num=$(echo "$osd_id" | sed 's/osd\.//') # Parse ceph osd tree output - column 5 is STATUS (up/down), column 6 is REWEIGHT (1.0 = in, 0 = out) tree_line=$(ceph osd tree 2>/dev/null | grep -E "^\s*${osd_num}\s+" | grep "osd.${osd_num}") up_status=$(echo "$tree_line" | awk '{print $5}') reweight=$(echo "$tree_line" | awk '{print $6}') # Default to unknown if we can't parse [[ -z "$up_status" ]] && up_status="unknown" [[ -z "$reweight" ]] && reweight="0" # Determine in/out based on reweight (1.0 = in, 0 = out) if (( $(echo "$reweight > 0" | bc -l 2>/dev/null || echo 0) )); then in_status="in" else in_status="out" fi ceph_status="${up_status}/${in_status}" else osd_id="-" fi # Check if boot drive usage="-" if mount | grep -q "^/dev/${device}"; then mount_point=$(mount | grep "^/dev/${device}" | awk '{print $3}' | head -1) if [[ "$mount_point" == "/" ]]; then usage="BOOT" else usage="$mount_point" fi fi printf "%-5s %-15s %-10s %-8s %-8s %-8s %-30s %-20s %-12s %-10s %-10s\n" "$bay" "/dev/$device" "$size" "$type" "$temp" "$health" "$model" "$serial" "$osd_id" "$ceph_status" "$usage" fi done # NVMe drives if command -v nvme >/dev/null 2>&1; then nvme_drives=$(sudo nvme list 2>/dev/null | grep "^/dev") if [ -n "$nvme_drives" ]; then echo -e "\n=== NVMe Drives ===" printf "%-15s %-10s %-10s %-40s %-20s\n" "DEVICE" "SIZE" "TYPE" "MODEL" "SERIAL" echo "----------------------------------------------------------------------------------------------------------------------------" echo "$nvme_drives" | awk '{printf "%-15s %-10s %-10s %-40s %-20s\n", $1, $6, "NVMe", $3, $2}' fi fi #------------------------------------------------------------------------------ # Optional sections #------------------------------------------------------------------------------ # Ceph RBD Devices rbd_output=$(lsblk -o NAME,SIZE,TYPE,MOUNTPOINT 2>/dev/null | grep "rbd" | sort -V) if [ -n "$rbd_output" ]; then echo -e "\n=== Ceph RBD Devices ===" printf "%-15s %-10s %-10s %-20s\n" "DEVICE" "SIZE" "TYPE" "MOUNTPOINT" echo "------------------------------------------------------------" echo "$rbd_output" fi # Show mapping diagnostic info if DEBUG is set if [[ -n "$DEBUG" ]]; then echo -e "\n=== DEBUG: Drive Mappings ===" for key in "${!DRIVE_MAP[@]}"; do echo "Bay $key: ${DRIVE_MAP[$key]}" done | sort -n fi