Refactor Drive Atlas with modular chassis templates and PCI path mapping

Major improvements:
- Separated chassis types from server hostnames for better reusability
- Implemented template-based layout system (10bay, large1, spare-10bay)
- Renamed medium2 to compute-storage-01 for clarity
- Added comprehensive PCI path-based drive mapping system
- Created diagnose-drives.sh helper script for mapping new servers
- Added DEBUG mode for troubleshooting drive mappings
- Documented Sliger CX471225 4U chassis model

Technical changes:
- Replaced DRIVE_MAPPINGS with separate SERVER_MAPPINGS and CHASSIS_TYPES
- Improved drive detection and SMART data collection
- Better error handling for missing drives and unmapped servers
- Cleaner code structure with sectioned comments

Documentation:
- Complete rewrite of README with setup guide and troubleshooting
- Added detailed todo.txt with action plan and technical notes
- Included step-by-step instructions for mapping new servers

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
This commit is contained in:
2026-01-06 15:52:24 -05:00
parent 585240b03f
commit 657b7d9a2d
4 changed files with 497 additions and 264 deletions

View File

@@ -1,84 +1,85 @@
#!/bin/bash
get_device_info() {
local pci_addr=$1
local info=$(lspci -s "$pci_addr")
echo "$info"
}
#==============================================================================
# Drive Atlas - Server Drive Mapping Tool
# Maps physical drive bays to logical device names using PCI paths
#==============================================================================
get_drive_details() {
local device=$1
local size=$(lsblk -d -o NAME,SIZE | grep "$device" | awk '{print $2}')
echo "$size"
}
#------------------------------------------------------------------------------
# Chassis Type Definitions
# These define the physical layout and display formatting for each chassis type
#------------------------------------------------------------------------------
get_drive_smart_info() {
local device=$1
local smart_info=$(sudo smartctl -A -i -H /dev/$device 2>/dev/null)
local temp=$(echo "$smart_info" | grep "Temperature" | awk '{print $10}' | head -1)
local type=$(echo "$smart_info" | grep "Rotation Rate" | grep -q "Solid State" && echo "SSD" || echo "HDD")
local health=$(echo "$smart_info" | grep "SMART overall-health" | grep -q "PASSED" && echo "✓" || echo "✗")
local model=$(echo "$smart_info" | grep "Device Model" | cut -d: -f2 | xargs)
echo "$type|$temp°C|$health|$model"
}
generate_10bay_layout() {
local hostname=$1
build_drive_map
get_drives_info() {
local path="/dev/disk/by-path"
for drive in "$path"/*; do
if [ -L "$drive" ]; then
echo "$(basename "$drive") $(readlink -f "$drive")"
fi
# Calculate max width needed for drive names
max_width=0
for bay in {1..10} "m2-1" "usb1" "usb2"; do
drive_text="${DRIVE_MAP[$bay]:-EMPTY}"
text_len=$((${#bay} + 1 + ${#drive_text}))
[[ $text_len -gt $max_width ]] && max_width=$text_len
done
}
declare -A DRIVE_MAPPINGS=(
["medium2"]="
pci-0000:0c:00.0-ata-3 5
pci-0000:0c:00.0-ata-4 6
pci-0000:0c:00.0-ata-1 3
pci-0000:0c:00.0-ata-2 4
pci-0000:0d:00.0-nvme-1 11
pci-0000:0b:00.0-usb-0:3:1.0-scsi-0:0:0:0 usb1
pci-0000:0b:00.0-usb-0:4:1.0-scsi-0:0:0:0 usb2
"
)
# Add padding for box borders
box_width=$((max_width + 4))
build_drive_map() {
local host=$(hostname)
declare -A drive_map
echo "DEBUG: Current host: $host"
echo "DEBUG: Mapping found: ${DRIVE_MAPPINGS[$host]}"
local mapping=${DRIVE_MAPPINGS[$host]}
if [[ -n "$mapping" ]]; then
while read -r path slot; do
[[ -z "$path" || -z "$slot" ]] && continue
echo "DEBUG: Checking path: $path for slot: $slot"
if [[ -L "/dev/disk/by-path/$path" ]]; then
local drive=$(readlink -f "/dev/disk/by-path/$path" | sed 's/.*\///')
drive_map[$slot]=$drive
echo "DEBUG: Mapped slot $slot to drive $drive"
fi
done <<< "$mapping"
# Create box drawing elements
h_line=$(printf '%*s' "$box_width" '' | tr ' ' '─')
# USB Section (if applicable)
if [[ -n "${DRIVE_MAP[usb1]}" || -n "${DRIVE_MAP[usb2]}" ]]; then
printf "\n External USB\n"
printf " ┌%s┐ ┌%s┐\n" "$h_line" "$h_line"
printf " │ %-${max_width}s │ │ %-${max_width}s │\n" "${DRIVE_MAP[usb1]:-EMPTY}" "${DRIVE_MAP[usb2]:-EMPTY}"
printf " └%s┘ └%s┘\n\n" "$h_line" "$h_line"
fi
# Make drive_map available globally
declare -g -A DRIVE_MAP=()
for key in "${!drive_map[@]}"; do
DRIVE_MAP[$key]=${drive_map[$key]}
echo "DEBUG: Final mapping - slot $key: ${drive_map[$key]}"
# Main chassis section
printf "┌──────────────────────────────────────────────────────────────┐\n"
printf "│ %-58s │\n" "$hostname"
printf "│ %-58s │\n" "10-Bay Hot-swap Chassis"
printf "│ │\n"
# M.2 NVMe slot if present
if [[ -n "${DRIVE_MAP[m2-1]}" ]]; then
printf "│ M.2 NVMe Slot │\n"
printf "│ ┌%s┐ │\n" "$h_line"
printf "│ │ %-${max_width}s │ │\n" "${DRIVE_MAP[m2-1]:-EMPTY}"
printf "│ └%s┘ │\n" "$h_line"
printf "│ │\n"
fi
printf "│ Front Hot-swap Bays │\n"
# Create bay rows
printf "│ "
for bay in {1..10}; do
printf "┌%s┐" "$h_line"
done
printf " │\n│ "
for bay in {1..10}; do
printf "│%-2d:%-${max_width}s │" "$bay" "${DRIVE_MAP[$bay]:-EMPTY}"
done
printf " │\n│ "
for bay in {1..10}; do
printf "└%s┘" "$h_line"
done
printf " │\n"
printf "└──────────────────────────────────────────────────────────────┘\n"
}
# Define the ASCII art maps
large1='''
generate_large1_layout() {
build_drive_map
cat << 'EOF'
┌─────────────────────────────────────────────────────────────┐
│ large1 │
│ Unique 3x5 Grid Chassis │
│ │
│ ┌──────────────────────────────────────────────┐ │
│ │ Motherboard │ │
@@ -104,10 +105,14 @@ large1='''
│ │ │ │ │ │ │ │
│ └─────────────────┘ └─────────────────┘ └─────────────────┘ │
└─────────────────────────────────────────────────────────────┘
'''
EOF
}
compute-storage-gpu-01='''
generate_spare_10bay_layout() {
cat << 'EOF'
┌─────────────────────────────────────────────────────────────┐
│ Spare 10-Bay Chassis │
│ (Not Currently Deployed) │
│ │
│ ┌────────────┐ ┌────────────┐ ┌────────────┐ ┌────────────┐ │
│ │ 1 │ │ 2 │ │ 3 │ │ 4 │ │
@@ -117,201 +122,175 @@ compute-storage-gpu-01='''
│ │ 5 │ │ 6 │ │ 7 │ │ 8 │ │
│ └────────────┘ └────────────┘ └────────────┘ └────────────┘ │
│ │
│ │
│ │
│ ┌─────────┐ │
compute-storage-gpu-01 │ 9 │ │
│ 9 │ │
│ └─────────┘ │
│ ┌─────────┐ │
│ │ 10 │ │
│ └─────────┘ │
│ │
└─────────────────────────────────────────────────────────────┘
'''
generate_medium2_layout() {
build_drive_map
# Calculate max width needed for drive names
max_width=0
for bay in {1..10} "11" "usb1" "usb2"; do
drive_text="${DRIVE_MAP[$bay]:-EMPTY}"
text_len=$((${#bay} + 1 + ${#drive_text}))
[[ $text_len -gt $max_width ]] && max_width=$text_len
done
# Add padding for box borders
box_width=$((max_width + 4))
# Create box drawing elements
h_line=$(printf '%*s' "$box_width" '' | tr ' ' '─')
# USB Section
printf "\n External USB [0b:00.0]\n"
printf " ┌%s┐ ┌%s┐\n" "$h_line" "$h_line"
printf " │ %-${max_width}s │ │ %-${max_width}s │\n" "${DRIVE_MAP[usb1]:-EMPTY}" "${DRIVE_MAP[usb2]:-EMPTY}"
printf " └%s┘ └%s┘\n\n" "$h_line" "$h_line"
# Main chassis section
printf "┌──────────────────────────────────────────────────────────────┐\n"
printf "│ B650D4U3-2Q/BCM │\n"
printf "│ │\n"
printf "│ NVMe [0d:00.0] Bay 11 │\n"
printf "│ ┌%s┐ │\n" "$h_line"
printf "│ │ %-${max_width}s │ │\n" "${DRIVE_MAP[11]:-EMPTY}"
printf "│ └%s┘ │\n" "$h_line"
printf "│ │\n"
printf "│ Front Hot-swap Bays [0c:00.0] │\n"
# Create bay rows
printf "│ "
for bay in {1..10}; do
printf "┌%s┐" "$h_line"
done
printf " │\n│ "
for bay in {1..10}; do
printf "│%-2d:%-${max_width}s │" "$bay" "${DRIVE_MAP[$bay]:-EMPTY}"
done
printf " │\n│ "
for bay in {1..10}; do
printf "└%s┘" "$h_line"
done
printf " │\n"
printf "└──────────────────────────────────────────────────────────────┘\n"
EOF
}
microGeneric='''
┌─┐ ┌─┐
┌└─┘──└─┘┐
│ 1 2 │
│ │
│ │
│ │
└────────┘
'''
#------------------------------------------------------------------------------
# Server-Specific Drive Mappings
# Maps PCI paths to physical bay numbers for each server
# Format: "pci-path bay-number"
#------------------------------------------------------------------------------
declare -A SERVER_MAPPINGS=(
# compute-storage-01 (formerly medium2)
# Motherboard: B650D4U3-2Q/BCM
# Controller at 0c:00.0 for hot-swap bays
# Controller at 0d:00.0 for M.2 NVMe
["compute-storage-01"]="
pci-0000:0c:00.0-ata-3 5
pci-0000:0c:00.0-ata-4 6
pci-0000:0c:00.0-ata-1 3
pci-0000:0c:00.0-ata-2 4
pci-0000:0d:00.0-nvme-1 m2-1
pci-0000:0b:00.0-usb-0:3:1.0-scsi-0:0:0:0 usb1
pci-0000:0b:00.0-usb-0:4:1.0-scsi-0:0:0:0 usb2
"
# storage-01
# Different motherboard, no HBA currently
# TODO: Map actual PCI paths after running diagnose-drives.sh
["storage-01"]="
"
# large1
# Unique chassis - 1/1 configuration
# TODO: Map actual PCI paths after running diagnose-drives.sh
["large1"]="
"
)
declare -A CHASSIS_TYPES=(
["compute-storage-01"]="10bay"
["compute-storage-gpu-01"]="spare-10bay"
["storage-01"]="10bay"
["large1"]="large1"
["micro1"]="micro"
["monitor-02"]="micro"
)
#------------------------------------------------------------------------------
# Core Functions
#------------------------------------------------------------------------------
build_drive_map() {
local host=$(hostname)
declare -A drive_map
local mapping=${SERVER_MAPPINGS[$host]}
if [[ -n "$mapping" ]]; then
while read -r path slot; do
[[ -z "$path" || -z "$slot" ]] && continue
if [[ -L "/dev/disk/by-path/$path" ]]; then
local drive=$(readlink -f "/dev/disk/by-path/$path" | sed 's/.*\///')
drive_map[$slot]=$drive
fi
done <<< "$mapping"
fi
# Make drive_map available globally
declare -g -A DRIVE_MAP=()
for key in "${!drive_map[@]}"; do
DRIVE_MAP[$key]=${drive_map[$key]}
done
}
get_drive_smart_info() {
local device=$1
local smart_info=$(sudo smartctl -A -i -H /dev/$device 2>/dev/null)
local temp=$(echo "$smart_info" | grep "Temperature" | awk '{print $10}' | head -1)
local type=$(echo "$smart_info" | grep "Rotation Rate" | grep -q "Solid State" && echo "SSD" || echo "HDD")
local health=$(echo "$smart_info" | grep "SMART overall-health" | grep -q "PASSED" && echo "✓" || echo "✗")
local model=$(echo "$smart_info" | grep "Device Model\|Model Number" | cut -d: -f2 | xargs)
echo "$type|$temp°C|$health|$model"
}
#------------------------------------------------------------------------------
# Main Display Logic
#------------------------------------------------------------------------------
# Get the hostname
HOSTNAME=$(hostname)
CHASSIS_TYPE=${CHASSIS_TYPES[$HOSTNAME]:-"unknown"}
# ASCII art based on hostname
case "$HOSTNAME" in
# Display chassis layout
case "$CHASSIS_TYPE" in
"10bay")
generate_10bay_layout "$HOSTNAME"
;;
"large1")
echo -e "$large1"
generate_large1_layout
;;
"compute-storage-gpu-01")
echo -e "$compute-storage-gpu-01"
"spare-10bay")
generate_spare_10bay_layout
;;
"medium2")
generate_medium2_layout
;;
"micro1" | "monitor-02")
echo -e "$microGeneric"
"micro")
echo "Micro server layout not yet implemented"
;;
*)
echo -e "No ASCII map defined for this hostname."
echo "┌─────────────────────────────────────────────────────────┐"
echo "│ Unknown server: $HOSTNAME"
echo "│ No chassis mapping defined yet"
echo "│ Run diagnose-drives.sh to gather PCI path information"
echo "└─────────────────────────────────────────────────────────┘"
;;
esac
map_drives_to_layout() {
local server_type=$1
case $server_type in
"large1")
for i in {1..9}; do
local drive_info=$(get_drive_info_for_position $i)
echo "Position $i: $drive_info"
done
;;
esac
}
#------------------------------------------------------------------------------
# Drive Details Section
#------------------------------------------------------------------------------
DRIVE_PATHS=$(get_drives_info | awk '{print $1, $2}')
# Initialize array for "not found" messages
not_found=()
echo -e "\n=== Drive Details with SMART Status ===\n"
printf "%-15s %-10s %-8s %-8s %-20s %-30s\n" "DEVICE" "SIZE" "TYPE" "TEMP" "HEALTH" "MODEL"
echo -e "\n=== Drive Details with SMART Status ==="
printf "%-15s %-10s %-8s %-8s %-8s %-30s\n" "DEVICE" "SIZE" "TYPE" "TEMP" "HEALTH" "MODEL"
echo "--------------------------------------------------------------------------------"
# For SATA drives
lsblk -d -o NAME | grep -v "nvme" | grep -v "rbd" | while read device; do
size=$(get_drive_details "$device")
smart_info=$(get_drive_smart_info "$device")
IFS='|' read -r type temp health model <<< "$smart_info"
printf "%-15s %-10s %-8s %-8s %-20s %-30s\n" "/dev/$device" "$size" "$type" "$temp" "$health" "$model"
done
# For NVMe drives
if [ -n "$nvme_drives" ]; then
while read -r line; do
device=$(echo "$line" | awk '{print $1}' | sed 's/.*\///')
size=$(echo "$line" | awk '{print $6}')
# SATA/SAS drives
lsblk -d -o NAME | grep -v "nvme" | grep -v "rbd" | grep -v "loop" | grep -v "NAME" | while read device; do
if [ -b "/dev/$device" ]; then
size=$(lsblk -d -n -o SIZE "/dev/$device" 2>/dev/null)
smart_info=$(get_drive_smart_info "$device")
IFS='|' read -r type temp health model <<< "$smart_info"
printf "%-15s %-10s %-8s %-8s %-20s %-30s\n" "/dev/$device" "$size" "$type" "$temp" "$health" "$model"
done <<< "$nvme_drives"
printf "%-15s %-10s %-8s %-8s %-8s %-30s\n" "/dev/$device" "$size" "$type" "$temp" "$health" "$model"
fi
done
# NVMe drives
if command -v nvme >/dev/null 2>&1; then
nvme_drives=$(sudo nvme list 2>/dev/null | grep "^/dev")
if [ -n "$nvme_drives" ]; then
echo -e "\n=== NVMe Drives ==="
printf "%-15s %-10s %-10s %-40s\n" "DEVICE" "SIZE" "TYPE" "MODEL"
echo "--------------------------------------------------------------------------------"
echo "$nvme_drives" | awk '{printf "%-15s %-10s %-10s %-40s\n", $1, $6, "NVMe", $3}'
fi
fi
# Show NVMe Drives only if present
nvme_drives=$(sudo nvme list | grep "^/dev")
if [ -n "$nvme_drives" ]; then
echo -e "\n=== NVMe Drives ===\n"
printf "%-15s %-10s %-10s %-20s\n" "DEVICE" "SIZE" "TYPE" "MODEL"
echo "------------------------------------------------------------"
echo "$nvme_drives" | awk '{printf "%-15s %-10s %-10s %-20s\n", $1, $6, "NVMe", $3}'
else
not_found+=("NVMe drives")
fi
#------------------------------------------------------------------------------
# Optional sections
#------------------------------------------------------------------------------
# Show MMC Drives only if present
mmc_output=$(lsblk -o NAME,SIZE,TYPE,MOUNTPOINT | grep "mmcblk" | sort)
if [ -n "$mmc_output" ]; then
echo -e "\n=== MMC Drives ===\n"
printf "%-15s %-10s %-10s %-20s\n" "DEVICE" "SIZE" "TYPE" "MOUNTPOINT"
echo "------------------------------------------------------------"
echo "$mmc_output"
fi
# Show SATA Drives only if present
sata_output=$(lsblk -d -o NAME,SIZE,TYPE,MOUNTPOINT | grep "disk" | grep -v "nvme" | grep -v "rbd" | sort | column -t)s
if [ -n "$sata_output" ]; then
echo -e "\n=== SATA Drives ===\n"
printf "%-15s %-10s %-10s %-20s\n" "DEVICE" "SIZE" "TYPE" "MOUNTPOINT"
echo "------------------------------------------------------------"
echo "$sata_output"
fi
# Show Ceph RBD Devices only if present
rbd_output=$(lsblk -o NAME,SIZE,TYPE,MOUNTPOINT | grep "rbd" | sort -V)
# Ceph RBD Devices
rbd_output=$(lsblk -o NAME,SIZE,TYPE,MOUNTPOINT 2>/dev/null | grep "rbd" | sort -V)
if [ -n "$rbd_output" ]; then
echo -e "\n=== Ceph RBD Devices ===\n"
echo -e "\n=== Ceph RBD Devices ==="
printf "%-15s %-10s %-10s %-20s\n" "DEVICE" "SIZE" "TYPE" "MOUNTPOINT"
echo "------------------------------------------------------------"
echo "$rbd_output"
else
not_found+=("RBD devices")
fi
# Check RAID
if ! [ -f /proc/mdstat ] || ! grep -q "active" /proc/mdstat; then
not_found+=("Software RAID")
# Show mapping diagnostic info if DEBUG is set
if [[ -n "$DEBUG" ]]; then
echo -e "\n=== DEBUG: Drive Mappings ==="
for key in "${!DRIVE_MAP[@]}"; do
echo "Bay $key: ${DRIVE_MAP[$key]}"
done | sort -n
fi
# Check ZFS
if ! command -v zpool >/dev/null 2>&1 || [ -z "$(sudo zpool status 2>/dev/null)" ]; then
not_found+=("ZFS pools")
fi
# Display consolidated "not found" messages at the end
if [ ${#not_found[@]} -gt 0 ]; then
echo -e "\n=== Not Found ===\n"
printf "%s\n" "${not_found[@]}"
fi