Split SMART data handling into two functions: - parse_smart_data(): Parses raw smartctl output (no I/O) - get_drive_smart_info(): Fetches and parses (wrapper) Changed parallel collection to save raw smartctl output to cache files, then parse during the display loop. This avoids issues with function availability in background subshells when running from process substitution (bash <(curl ...)). Also fixed: - Removed orphan code that was outside function scope - Fixed lsblk caching to use separate calls for SIZE and MOUNTPOINT Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
1118 lines
44 KiB
Bash
1118 lines
44 KiB
Bash
#!/bin/bash
|
|
|
|
#==============================================================================
|
|
# Drive Atlas - Server Drive Mapping Tool
|
|
# Maps physical drive bays to logical device names using PCI paths
|
|
#==============================================================================
|
|
|
|
# Shell safety options:
|
|
# -o pipefail: Exit status of pipe is rightmost non-zero exit code
|
|
# Note: Not using -e (errexit) to allow graceful degradation when tools fail
|
|
# Note: Not using -u (nounset) as script uses ${var:-default} patterns
|
|
set -o pipefail
|
|
|
|
VERSION="1.1.0"
|
|
|
|
#------------------------------------------------------------------------------
|
|
# Path Constants
|
|
# Centralized path definitions to avoid hardcoding throughout the script
|
|
#------------------------------------------------------------------------------
|
|
readonly DISK_BY_PATH="/dev/disk/by-path"
|
|
|
|
#------------------------------------------------------------------------------
|
|
# show_usage
|
|
#
|
|
# Displays help message with usage information and available options.
|
|
#------------------------------------------------------------------------------
|
|
show_usage() {
|
|
cat << EOF
|
|
Drive Atlas v${VERSION} - Server Drive Mapping Tool
|
|
|
|
Maps physical drive bays to logical device names using PCI paths.
|
|
Displays visual chassis layouts and comprehensive drive information.
|
|
|
|
USAGE:
|
|
$(basename "$0") [OPTIONS]
|
|
|
|
OPTIONS:
|
|
-h, --help Show this help message and exit
|
|
-v, --version Show version information
|
|
-d, --debug Enable debug output (show drive mappings)
|
|
-s, --skip-smart Skip SMART data collection (faster)
|
|
-c, --color Enable colored output
|
|
--verbose Show detailed error messages and warnings
|
|
--no-ceph Skip Ceph OSD information
|
|
--show-pci Show PCI paths in output
|
|
|
|
EXAMPLES:
|
|
$(basename "$0") # Normal run with all features
|
|
$(basename "$0") --skip-smart # Fast run without SMART data
|
|
$(basename "$0") --color # Run with colored output
|
|
$(basename "$0") --verbose # Show all errors and warnings
|
|
$(basename "$0") --debug # Show mapping debug info
|
|
|
|
ENVIRONMENT VARIABLES:
|
|
DEBUG=1 Same as --debug flag
|
|
|
|
For more information, see: https://code.lotusguild.org/LotusGuild/driveAtlas
|
|
EOF
|
|
}
|
|
|
|
#------------------------------------------------------------------------------
|
|
# Command Line Argument Parsing
|
|
#------------------------------------------------------------------------------
|
|
SKIP_SMART=false
|
|
SKIP_CEPH=false
|
|
SHOW_PCI=false
|
|
USE_COLOR=false
|
|
VERBOSE=false
|
|
|
|
while [[ $# -gt 0 ]]; do
|
|
case "$1" in
|
|
-h|--help)
|
|
show_usage
|
|
exit 0
|
|
;;
|
|
-v|--version)
|
|
echo "Drive Atlas v${VERSION}"
|
|
exit 0
|
|
;;
|
|
-d|--debug)
|
|
DEBUG=1
|
|
shift
|
|
;;
|
|
-s|--skip-smart)
|
|
SKIP_SMART=true
|
|
shift
|
|
;;
|
|
--no-ceph)
|
|
SKIP_CEPH=true
|
|
shift
|
|
;;
|
|
--show-pci)
|
|
SHOW_PCI=true
|
|
shift
|
|
;;
|
|
-c|--color)
|
|
USE_COLOR=true
|
|
shift
|
|
;;
|
|
--verbose)
|
|
VERBOSE=true
|
|
shift
|
|
;;
|
|
*)
|
|
echo "Unknown option: $1" >&2
|
|
echo "Use --help for usage information." >&2
|
|
exit 1
|
|
;;
|
|
esac
|
|
done
|
|
|
|
#------------------------------------------------------------------------------
|
|
# Color Definitions
|
|
# ANSI escape codes for terminal colors
|
|
#------------------------------------------------------------------------------
|
|
if [[ "$USE_COLOR" == true ]]; then
|
|
COLOR_RESET='\033[0m'
|
|
COLOR_RED='\033[0;31m'
|
|
COLOR_GREEN='\033[0;32m'
|
|
COLOR_YELLOW='\033[0;33m'
|
|
COLOR_BLUE='\033[0;34m'
|
|
COLOR_CYAN='\033[0;36m'
|
|
COLOR_BOLD='\033[1m'
|
|
else
|
|
COLOR_RESET=''
|
|
COLOR_RED=''
|
|
COLOR_GREEN=''
|
|
COLOR_YELLOW=''
|
|
COLOR_BLUE=''
|
|
COLOR_CYAN=''
|
|
COLOR_BOLD=''
|
|
fi
|
|
|
|
#------------------------------------------------------------------------------
|
|
# colorize_health
|
|
#
|
|
# Returns health indicator with appropriate color
|
|
# Args: $1 - health status (✓ or ✗)
|
|
#------------------------------------------------------------------------------
|
|
colorize_health() {
|
|
local health="$1"
|
|
if [[ "$USE_COLOR" == true ]]; then
|
|
if [[ "$health" == "✓" ]]; then
|
|
printf '%b%s%b' "$COLOR_GREEN" "$health" "$COLOR_RESET"
|
|
else
|
|
printf '%b%s%b' "$COLOR_RED" "$health" "$COLOR_RESET"
|
|
fi
|
|
else
|
|
printf '%s' "$health"
|
|
fi
|
|
}
|
|
|
|
#------------------------------------------------------------------------------
|
|
# colorize_temp
|
|
#
|
|
# Returns temperature with color based on value
|
|
# Args: $1 - temperature string (e.g., "45°C")
|
|
#------------------------------------------------------------------------------
|
|
colorize_temp() {
|
|
local temp_str="$1"
|
|
local temp_val
|
|
|
|
if [[ "$USE_COLOR" != true || "$temp_str" == "-" ]]; then
|
|
echo "$temp_str"
|
|
return
|
|
fi
|
|
|
|
# Extract numeric value
|
|
temp_val="${temp_str%°C}"
|
|
if [[ "$temp_val" =~ ^[0-9]+$ ]]; then
|
|
if [[ "$temp_val" -ge 60 ]]; then
|
|
printf '%b%s%b' "$COLOR_RED" "$temp_str" "$COLOR_RESET"
|
|
elif [[ "$temp_val" -ge 50 ]]; then
|
|
printf '%b%s%b' "$COLOR_YELLOW" "$temp_str" "$COLOR_RESET"
|
|
else
|
|
printf '%b%s%b' "$COLOR_GREEN" "$temp_str" "$COLOR_RESET"
|
|
fi
|
|
else
|
|
printf '%s' "$temp_str"
|
|
fi
|
|
}
|
|
|
|
#------------------------------------------------------------------------------
|
|
# colorize_header
|
|
#
|
|
# Returns header text in blue/bold
|
|
# Args: $1 - header text
|
|
#------------------------------------------------------------------------------
|
|
colorize_header() {
|
|
if [[ "$USE_COLOR" == true ]]; then
|
|
printf '%b%b%s%b\n' "$COLOR_BLUE" "$COLOR_BOLD" "$1" "$COLOR_RESET"
|
|
else
|
|
printf '%s\n' "$1"
|
|
fi
|
|
}
|
|
|
|
#------------------------------------------------------------------------------
|
|
# log_error
|
|
#
|
|
# Logs an error message to stderr. Always shown regardless of verbose mode.
|
|
# Args: $1 - error message
|
|
#------------------------------------------------------------------------------
|
|
log_error() {
|
|
if [[ "$USE_COLOR" == true ]]; then
|
|
printf '%bERROR:%b %s\n' "$COLOR_RED" "$COLOR_RESET" "$1" >&2
|
|
else
|
|
printf 'ERROR: %s\n' "$1" >&2
|
|
fi
|
|
}
|
|
|
|
#------------------------------------------------------------------------------
|
|
# log_warn
|
|
#
|
|
# Logs a warning message to stderr. Only shown in verbose mode.
|
|
# Args: $1 - warning message
|
|
#------------------------------------------------------------------------------
|
|
log_warn() {
|
|
if [[ "$VERBOSE" == true ]]; then
|
|
if [[ "$USE_COLOR" == true ]]; then
|
|
printf '%bWARN:%b %s\n' "$COLOR_YELLOW" "$COLOR_RESET" "$1" >&2
|
|
else
|
|
printf 'WARN: %s\n' "$1" >&2
|
|
fi
|
|
fi
|
|
}
|
|
|
|
#------------------------------------------------------------------------------
|
|
# log_info
|
|
#
|
|
# Logs an informational message to stderr. Only shown in verbose mode.
|
|
# Args: $1 - info message
|
|
#------------------------------------------------------------------------------
|
|
log_info() {
|
|
if [[ "$VERBOSE" == true ]]; then
|
|
if [[ "$USE_COLOR" == true ]]; then
|
|
printf '%bINFO:%b %s\n' "$COLOR_CYAN" "$COLOR_RESET" "$1" >&2
|
|
else
|
|
printf 'INFO: %s\n' "$1" >&2
|
|
fi
|
|
fi
|
|
}
|
|
|
|
#------------------------------------------------------------------------------
|
|
# Dependency Checks
|
|
# Verifies required commands are available before running
|
|
#------------------------------------------------------------------------------
|
|
|
|
# Required dependencies (script will not function without these)
|
|
REQUIRED_DEPS=(lsblk lspci readlink hostname)
|
|
|
|
# Optional dependencies (enhanced functionality)
|
|
OPTIONAL_DEPS=(smartctl ceph ceph-volume bc nvme)
|
|
|
|
FRESH_START_URL="http://10.10.10.63:3000/LotusGuild/freshStartScript/raw/branch/main/freshStart.sh"
|
|
|
|
#------------------------------------------------------------------------------
|
|
# check_dependencies
|
|
#
|
|
# Verifies required and optional commands are available.
|
|
# Exits with error if required dependencies are missing.
|
|
# Warns about missing optional dependencies.
|
|
#------------------------------------------------------------------------------
|
|
check_dependencies() {
|
|
local missing_required=()
|
|
local missing_optional=()
|
|
|
|
# Check required dependencies
|
|
for cmd in "${REQUIRED_DEPS[@]}"; do
|
|
if ! command -v "$cmd" &>/dev/null; then
|
|
missing_required+=("$cmd")
|
|
fi
|
|
done
|
|
|
|
# Check optional dependencies
|
|
for cmd in "${OPTIONAL_DEPS[@]}"; do
|
|
if ! command -v "$cmd" &>/dev/null; then
|
|
missing_optional+=("$cmd")
|
|
fi
|
|
done
|
|
|
|
# Report missing required dependencies and exit
|
|
if [[ ${#missing_required[@]} -gt 0 ]]; then
|
|
echo "ERROR: Missing required dependencies: ${missing_required[*]}" >&2
|
|
echo "" >&2
|
|
echo "Please install the missing packages or run the fresh start script:" >&2
|
|
echo " curl -s $FRESH_START_URL | bash" >&2
|
|
echo "" >&2
|
|
exit 1
|
|
fi
|
|
|
|
# Warn about missing optional dependencies
|
|
if [[ ${#missing_optional[@]} -gt 0 ]]; then
|
|
echo "Note: Some optional features unavailable. Missing: ${missing_optional[*]}" >&2
|
|
echo " Install them or run: curl -s $FRESH_START_URL | bash" >&2
|
|
echo "" >&2
|
|
fi
|
|
|
|
# Check for sudo access (needed for smartctl)
|
|
if command -v smartctl &>/dev/null && ! sudo -n true 2>/dev/null; then
|
|
echo "Note: SMART data requires sudo access. Run with sudo for full functionality." >&2
|
|
fi
|
|
}
|
|
|
|
# Run dependency check at script start
|
|
check_dependencies
|
|
|
|
#------------------------------------------------------------------------------
|
|
# Chassis Layout Generator Functions
|
|
# These define the physical layout and display formatting for each chassis type
|
|
#------------------------------------------------------------------------------
|
|
|
|
#------------------------------------------------------------------------------
|
|
# generate_10bay_layout
|
|
#
|
|
# Generates ASCII art representation of a 10-bay hot-swap chassis (Sliger CX4712).
|
|
# Shows storage controllers, M.2 NVMe slot, and 10 front hot-swap bays.
|
|
#
|
|
# Args:
|
|
# $1 - Hostname to display in the layout header
|
|
#
|
|
# Side effects: Calls build_drive_map() to populate DRIVE_MAP
|
|
#------------------------------------------------------------------------------
|
|
generate_10bay_layout() {
|
|
local hostname="$1"
|
|
build_drive_map
|
|
|
|
# Fixed width for consistent box drawing (fits device names like "nvme0n1")
|
|
local drive_width=10
|
|
|
|
# Main chassis section
|
|
printf "┌────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐\n"
|
|
printf "│ %-126s │\n" "$hostname - Sliger CX4712 (10x 3.5\" Hot-swap)"
|
|
printf "│ │\n"
|
|
|
|
# Show storage controllers
|
|
printf "│ Storage Controllers: │\n"
|
|
while IFS= read -r ctrl; do
|
|
[[ -n "$ctrl" ]] && printf "│ %-126s│\n" "$ctrl"
|
|
done < <(get_storage_controllers)
|
|
printf "│ │\n"
|
|
|
|
# M.2 NVMe slot if present
|
|
if [[ -n "${DRIVE_MAP[m2-1]}" ]]; then
|
|
printf "│ M.2 NVMe: %-10s │\n" "${DRIVE_MAP[m2-1]}"
|
|
printf "│ │\n"
|
|
fi
|
|
|
|
printf "│ Front Hot-swap Bays: │\n"
|
|
printf "│ │\n"
|
|
|
|
# Bay top borders
|
|
printf "│ "
|
|
for bay in {1..10}; do
|
|
printf "┌──────────┐ "
|
|
done
|
|
printf " │\n"
|
|
|
|
# Bay contents
|
|
printf "│ "
|
|
for bay in {1..10}; do
|
|
printf "│%-2d:%-7s│ " "$bay" "${DRIVE_MAP[$bay]:-EMPTY}"
|
|
done
|
|
printf " │\n"
|
|
|
|
# Bay bottom borders
|
|
printf "│ "
|
|
for bay in {1..10}; do
|
|
printf "└──────────┘ "
|
|
done
|
|
printf " │\n"
|
|
|
|
printf "└────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘\n"
|
|
}
|
|
|
|
#------------------------------------------------------------------------------
|
|
# generate_micro_layout
|
|
#
|
|
# Generates ASCII art representation of a micro SBC (e.g., ZimaBoard).
|
|
# Shows storage controllers, onboard eMMC (if present), and 2 SATA ports.
|
|
#
|
|
# Args:
|
|
# $1 - Hostname to display in the layout header
|
|
#
|
|
# Side effects: Calls build_drive_map() to populate DRIVE_MAP
|
|
#------------------------------------------------------------------------------
|
|
generate_micro_layout() {
|
|
local hostname="$1"
|
|
build_drive_map
|
|
|
|
# Check for eMMC storage
|
|
local emmc_device=""
|
|
if [[ -b /dev/mmcblk0 ]]; then
|
|
emmc_device="mmcblk0"
|
|
fi
|
|
|
|
printf "┌─────────────────────────────────────────────────────────────┐\n"
|
|
printf "│ %-57s │\n" "$hostname - Micro SBC"
|
|
printf "│ │\n"
|
|
printf "│ Storage Controllers: │\n"
|
|
while IFS= read -r ctrl; do
|
|
[[ -n "$ctrl" ]] && printf "│ %-57s│\n" "$ctrl"
|
|
done < <(get_storage_controllers)
|
|
printf "│ │\n"
|
|
|
|
# Show eMMC if present
|
|
if [[ -n "$emmc_device" ]]; then
|
|
local emmc_size=$(lsblk -d -n -o SIZE "/dev/$emmc_device" 2>/dev/null | xargs)
|
|
printf "│ ┌─────────────────────────────────────────────────────┐ │\n"
|
|
printf "│ │ Onboard eMMC: %-10s (%s) │ │\n" "$emmc_device" "$emmc_size"
|
|
printf "│ └─────────────────────────────────────────────────────┘ │\n"
|
|
printf "│ │\n"
|
|
fi
|
|
|
|
printf "│ SATA Ports (rear): │\n"
|
|
printf "│ ┌──────────────┐ ┌──────────────┐ │\n"
|
|
printf "│ │ 1: %-9s │ │ 2: %-9s │ │\n" "${DRIVE_MAP[1]:-EMPTY}" "${DRIVE_MAP[2]:-EMPTY}"
|
|
printf "│ └──────────────┘ └──────────────┘ │\n"
|
|
printf "└─────────────────────────────────────────────────────────────┘\n"
|
|
}
|
|
|
|
#------------------------------------------------------------------------------
|
|
# generate_large1_layout
|
|
#
|
|
# Generates ASCII art representation of a large1 chassis (Rosewill RSV-L4500U).
|
|
# Shows storage controllers, 2 M.2 NVMe slots, and 15 front bays in 3x5 grid.
|
|
#
|
|
# Args:
|
|
# $1 - Hostname to display in the layout header
|
|
#
|
|
# Side effects: Calls build_drive_map() to populate DRIVE_MAP
|
|
#------------------------------------------------------------------------------
|
|
generate_large1_layout() {
|
|
local hostname="$1"
|
|
build_drive_map
|
|
|
|
# large1 has 3 stacks of 5 bays at front (15 total) + 2 M.2 slots
|
|
# Physical bay mapping TBD - current mapping is by controller order
|
|
printf "┌─────────────────────────────────────────────────────────────────────────┐\n"
|
|
printf "│ %-69s │\n" "$hostname - Rosewill RSV-L4500U (15x 3.5\" Bays)"
|
|
printf "│ │\n"
|
|
printf "│ Storage Controllers: │\n"
|
|
while IFS= read -r ctrl; do
|
|
[[ -n "$ctrl" ]] && printf "│ %-69s│\n" "$ctrl"
|
|
done < <(get_storage_controllers)
|
|
printf "│ │\n"
|
|
printf "│ M.2 NVMe: M1: %-10s M2: %-10s │\n" "${DRIVE_MAP[m2-1]:-EMPTY}" "${DRIVE_MAP[m2-2]:-EMPTY}"
|
|
printf "│ │\n"
|
|
printf "│ Front Bays (3 stacks x 5 rows): [Bay mapping TBD] │\n"
|
|
printf "│ Stack A Stack B Stack C │\n"
|
|
printf "│ ┌──────────┐ ┌──────────┐ ┌──────────┐ │\n"
|
|
printf "│ │1:%-8s│ │2:%-8s│ │3:%-8s│ │\n" "${DRIVE_MAP[1]:-EMPTY}" "${DRIVE_MAP[2]:-EMPTY}" "${DRIVE_MAP[3]:-EMPTY}"
|
|
printf "│ ├──────────┤ ├──────────┤ ├──────────┤ │\n"
|
|
printf "│ │4:%-8s│ │5:%-8s│ │6:%-8s│ │\n" "${DRIVE_MAP[4]:-EMPTY}" "${DRIVE_MAP[5]:-EMPTY}" "${DRIVE_MAP[6]:-EMPTY}"
|
|
printf "│ ├──────────┤ ├──────────┤ ├──────────┤ │\n"
|
|
printf "│ │7:%-8s│ │8:%-8s│ │9:%-8s│ │\n" "${DRIVE_MAP[7]:-EMPTY}" "${DRIVE_MAP[8]:-EMPTY}" "${DRIVE_MAP[9]:-EMPTY}"
|
|
printf "│ ├──────────┤ ├──────────┤ ├──────────┤ │\n"
|
|
printf "│ │10:%-7s│ │11:%-7s│ │12:%-7s│ │\n" "${DRIVE_MAP[10]:-EMPTY}" "${DRIVE_MAP[11]:-EMPTY}" "${DRIVE_MAP[12]:-EMPTY}"
|
|
printf "│ ├──────────┤ ├──────────┤ ├──────────┤ │\n"
|
|
printf "│ │13:%-7s│ │14:%-7s│ │15:%-7s│ │\n" "${DRIVE_MAP[13]:-EMPTY}" "${DRIVE_MAP[14]:-EMPTY}" "${DRIVE_MAP[15]:-EMPTY}"
|
|
printf "│ └──────────┘ └──────────┘ └──────────┘ │\n"
|
|
printf "└─────────────────────────────────────────────────────────────────────────┘\n"
|
|
}
|
|
|
|
#------------------------------------------------------------------------------
|
|
# Server-Specific Drive Mappings
|
|
# Maps PCI paths to physical bay numbers for each server
|
|
# Format: "pci-path bay-number"
|
|
#------------------------------------------------------------------------------
|
|
|
|
declare -A SERVER_MAPPINGS=(
|
|
# compute-storage-01 (formerly medium2)
|
|
# Motherboard: B650D4U3-2Q/BCM with AMD SATA controller
|
|
# HBA: LSI SAS3008 at 01:00.0 (mini-SAS HD ports)
|
|
# Cable mapping from user notes:
|
|
# - Mobo SATA: top-right=bay1, bottom-right=bay2, bottom-left=bay3, top-left=bay4
|
|
# - HBA bottom mini-SAS: bays 5,6,7,8
|
|
# - HBA top mini-SAS: bays 9,10
|
|
["compute-storage-01"]="
|
|
pci-0000:0d:00.0-ata-2 1
|
|
pci-0000:0d:00.0-ata-1 2
|
|
pci-0000:0d:00.0-ata-3 3
|
|
pci-0000:0d:00.0-ata-4 4
|
|
pci-0000:01:00.0-sas-phy6-lun-0 5
|
|
pci-0000:01:00.0-sas-phy7-lun-0 6
|
|
pci-0000:01:00.0-sas-phy5-lun-0 7
|
|
pci-0000:01:00.0-sas-phy2-lun-0 8
|
|
pci-0000:01:00.0-sas-phy4-lun-0 9
|
|
pci-0000:01:00.0-sas-phy3-lun-0 10
|
|
pci-0000:0e:00.0-nvme-1 m2-1
|
|
"
|
|
|
|
# compute-storage-gpu-01
|
|
# Motherboard: ASUS PRIME B550-PLUS with AMD SATA controller at 02:00.1
|
|
# 5 SATA ports + 1 M.2 NVMe slot
|
|
# sdf is USB/card reader - not mapped
|
|
["compute-storage-gpu-01"]="
|
|
pci-0000:02:00.1-ata-1 1
|
|
pci-0000:02:00.1-ata-2 2
|
|
pci-0000:02:00.1-ata-3 3
|
|
pci-0000:02:00.1-ata-4 4
|
|
pci-0000:02:00.1-ata-5 5
|
|
pci-0000:0c:00.0-nvme-1 m2-1
|
|
"
|
|
|
|
# storage-01
|
|
# Motherboard: ASRock A320M-HDV R4.0 with AMD SATA controller at 02:00.1
|
|
# 4 SATA ports used (ata-1, ata-2, ata-5, ata-6) - ata-3/4 empty
|
|
["storage-01"]="
|
|
pci-0000:02:00.1-ata-1 1
|
|
pci-0000:02:00.1-ata-2 2
|
|
pci-0000:02:00.1-ata-5 3
|
|
pci-0000:02:00.1-ata-6 4
|
|
"
|
|
|
|
# large1
|
|
# Custom tower with multiple controllers:
|
|
# - HBA: LSI SAS2008 at 10:00.0 (7 drives)
|
|
# - AMD SATA at 16:00.1 (3 drives)
|
|
# - ASMedia SATA at 25:00.0 (2 drives)
|
|
# - 2x NVMe slots
|
|
["large1"]="
|
|
pci-0000:10:00.0-sas-phy0-lun-0 1
|
|
pci-0000:10:00.0-sas-phy1-lun-0 2
|
|
pci-0000:10:00.0-sas-phy3-lun-0 3
|
|
pci-0000:10:00.0-sas-phy4-lun-0 4
|
|
pci-0000:10:00.0-sas-phy5-lun-0 5
|
|
pci-0000:10:00.0-sas-phy6-lun-0 6
|
|
pci-0000:10:00.0-sas-phy7-lun-0 7
|
|
pci-0000:16:00.1-ata-3 8
|
|
pci-0000:16:00.1-ata-7 9
|
|
pci-0000:16:00.1-ata-8 10
|
|
pci-0000:25:00.0-ata-1 11
|
|
pci-0000:25:00.0-ata-2 12
|
|
pci-0000:2a:00.0-nvme-1 m2-1
|
|
pci-0000:26:00.0-nvme-1 m2-2
|
|
"
|
|
|
|
# micro1
|
|
# ZimaBoard 832 - Single board computer
|
|
# 2 SATA ports on rear (currently unused)
|
|
# Boot from onboard eMMC (mmcblk0)
|
|
# SATA controller at 00:12.0
|
|
["micro1"]="
|
|
"
|
|
|
|
# monitor-02
|
|
# ZimaBoard 832 - Single board computer
|
|
# 2 SATA ports on rear (currently unused)
|
|
# Boot from onboard eMMC (mmcblk0)
|
|
# SATA controller would be at a specific PCI address when drives connected
|
|
["monitor-02"]="
|
|
"
|
|
)
|
|
|
|
declare -A CHASSIS_TYPES=(
|
|
["compute-storage-01"]="10bay"
|
|
["compute-storage-gpu-01"]="10bay"
|
|
["storage-01"]="10bay"
|
|
["large1"]="large1"
|
|
["micro1"]="micro" # ZimaBoard 832
|
|
["monitor-02"]="micro" # ZimaBoard 832
|
|
)
|
|
|
|
#------------------------------------------------------------------------------
|
|
# Core Functions
|
|
#------------------------------------------------------------------------------
|
|
|
|
# Cache for lspci output (populated on first call)
|
|
LSPCI_CACHE=""
|
|
|
|
#------------------------------------------------------------------------------
|
|
# get_storage_controllers
|
|
#
|
|
# Returns a formatted list of storage controllers found via lspci.
|
|
# Uses cached output if available to avoid redundant lspci calls.
|
|
#
|
|
# Output Format: " PCI_ADDR: DESCRIPTION" (one per line)
|
|
#------------------------------------------------------------------------------
|
|
get_storage_controllers() {
|
|
# Cache lspci output on first call
|
|
if [[ -z "$LSPCI_CACHE" ]]; then
|
|
LSPCI_CACHE="$(lspci 2>/dev/null | grep -iE "SAS|SATA|RAID|Mass storage|NVMe")"
|
|
fi
|
|
|
|
# Format and return cached output
|
|
echo "$LSPCI_CACHE" | while read -r line; do
|
|
[[ -z "$line" ]] && continue
|
|
pci_addr="$(echo "$line" | awk '{print $1}')"
|
|
# Get short description (strip PCI address)
|
|
desc="$(echo "$line" | sed 's/^[0-9a-f:.]\+ //')"
|
|
echo " $pci_addr: $desc"
|
|
done
|
|
}
|
|
|
|
#------------------------------------------------------------------------------
|
|
# build_drive_map
|
|
#
|
|
# Builds a global associative array mapping physical bay numbers to device names.
|
|
# Uses PCI paths from SERVER_MAPPINGS to resolve current device assignments.
|
|
#
|
|
# Sets:
|
|
# DRIVE_MAP (global associative array)
|
|
# Keys: Bay identifiers (1, 2, ..., m2-1, m2-2, etc.)
|
|
# Values: Device names (sda, nvme0n1, etc.)
|
|
# BAY_TO_PCI_PATH (global associative array)
|
|
# Keys: Bay identifiers
|
|
# Values: PCI path strings (for --show-pci option)
|
|
#------------------------------------------------------------------------------
|
|
build_drive_map() {
|
|
local host="$(hostname)"
|
|
local mapping="${SERVER_MAPPINGS[$host]}"
|
|
|
|
# Declare global arrays directly
|
|
declare -g -A DRIVE_MAP=()
|
|
declare -g -A BAY_TO_PCI_PATH=()
|
|
|
|
if [[ -z "$mapping" ]]; then
|
|
log_warn "No drive mapping found for host '$host'. Run diagnose-drives.sh to create one."
|
|
return
|
|
fi
|
|
|
|
local mapped_count=0
|
|
local empty_count=0
|
|
while read -r path slot; do
|
|
[[ -z "$path" || -z "$slot" ]] && continue
|
|
|
|
BAY_TO_PCI_PATH[$slot]="$path"
|
|
if [[ -L "${DISK_BY_PATH}/$path" ]]; then
|
|
local drive="$(readlink -f "${DISK_BY_PATH}/$path" | sed 's/.*\///')"
|
|
DRIVE_MAP[$slot]="$drive"
|
|
((mapped_count++))
|
|
else
|
|
log_info "Bay $slot: No device at PCI path $path"
|
|
((empty_count++))
|
|
fi
|
|
done <<< "$mapping"
|
|
|
|
log_info "Mapped $mapped_count drives, $empty_count empty bays"
|
|
}
|
|
|
|
#------------------------------------------------------------------------------
|
|
# build_ceph_cache
|
|
#
|
|
# Queries Ceph once and builds lookup tables for OSD information.
|
|
# This is much more efficient than querying ceph-volume per device.
|
|
#
|
|
# Sets global associative arrays:
|
|
# CEPH_DEVICE_TO_OSD - Maps device names to OSD IDs (e.g., sda -> osd.5)
|
|
# CEPH_OSD_STATUS - Maps OSD numbers to up/down status
|
|
# CEPH_OSD_IN - Maps OSD numbers to in/out status
|
|
#------------------------------------------------------------------------------
|
|
build_ceph_cache() {
|
|
declare -g -A CEPH_DEVICE_TO_OSD=()
|
|
declare -g -A CEPH_OSD_STATUS=()
|
|
declare -g -A CEPH_OSD_IN=()
|
|
|
|
# Skip if ceph-volume is not available
|
|
if ! command -v ceph-volume &>/dev/null; then
|
|
log_info "ceph-volume not found, skipping Ceph OSD detection"
|
|
return
|
|
fi
|
|
|
|
log_info "Querying Ceph OSD information..."
|
|
|
|
# Parse ceph-volume lvm list output
|
|
# Format: blocks starting with "====== osd.X =======" followed by device info
|
|
local current_osd=""
|
|
while IFS= read -r line; do
|
|
# Match OSD header: "====== osd.5 ======="
|
|
if [[ "$line" =~ ======[[:space:]]+osd\.([0-9]+)[[:space:]]+======= ]]; then
|
|
current_osd="osd.${BASH_REMATCH[1]}"
|
|
# Match block device line: " block device /dev/sda"
|
|
elif [[ -n "$current_osd" && "$line" =~ block[[:space:]]device[[:space:]]+/dev/([^[:space:]]+) ]]; then
|
|
local dev_name="${BASH_REMATCH[1]}"
|
|
CEPH_DEVICE_TO_OSD[$dev_name]="$current_osd"
|
|
fi
|
|
done < <(ceph-volume lvm list 2>/dev/null)
|
|
|
|
# Skip if ceph command is not available
|
|
if ! command -v ceph &>/dev/null; then
|
|
log_info "ceph CLI not found, skipping OSD status detection"
|
|
return
|
|
fi
|
|
|
|
log_info "Querying Ceph OSD status..."
|
|
|
|
# Parse ceph osd tree for status
|
|
# Format: ID CLASS WEIGHT TYPE NAME STATUS REWEIGHT
|
|
while IFS= read -r line; do
|
|
# Match OSD lines: " 5 hdd 3.63660 osd.5 up 1.00000"
|
|
if [[ "$line" =~ ^[[:space:]]*([0-9]+)[[:space:]]+.*osd\.([0-9]+)[[:space:]]+(up|down)[[:space:]]+([0-9.]+) ]]; then
|
|
local osd_num="${BASH_REMATCH[1]}"
|
|
local status="${BASH_REMATCH[3]}"
|
|
local reweight="${BASH_REMATCH[4]}"
|
|
|
|
CEPH_OSD_STATUS[$osd_num]="$status"
|
|
|
|
# Determine in/out based on reweight
|
|
if awk "BEGIN {exit !($reweight > 0)}"; then
|
|
CEPH_OSD_IN[$osd_num]="in"
|
|
else
|
|
CEPH_OSD_IN[$osd_num]="out"
|
|
fi
|
|
fi
|
|
done < <(ceph osd tree 2>/dev/null)
|
|
}
|
|
|
|
# SMART warning thresholds
|
|
readonly SMART_TEMP_WARN=50 # Temperature warning threshold (°C)
|
|
readonly SMART_TEMP_CRIT=60 # Temperature critical threshold (°C)
|
|
readonly SMART_REALLOCATED_WARN=1 # Reallocated sectors warning threshold
|
|
readonly SMART_PENDING_WARN=1 # Pending sectors warning threshold
|
|
readonly SMART_CRC_ERROR_WARN=100 # UDMA CRC error warning threshold
|
|
readonly SMART_POWER_ON_HOURS_WARN=43800 # ~5 years of continuous use
|
|
|
|
#------------------------------------------------------------------------------
|
|
# parse_smart_data
|
|
#
|
|
# Parses raw SMART data and returns formatted info string.
|
|
#
|
|
# Args:
|
|
# $1 - Device name (e.g., sda, nvme0n1)
|
|
# $2 - Raw smartctl output string
|
|
#
|
|
# Returns: Pipe-delimited string: TYPE|TEMP|HEALTH|MODEL|SERIAL|WARNINGS
|
|
#------------------------------------------------------------------------------
|
|
parse_smart_data() {
|
|
local device="$1"
|
|
local smart_info="$2"
|
|
local temp="-"
|
|
local type="HDD"
|
|
local health="✗"
|
|
local model="-"
|
|
local serial="-"
|
|
local warnings=""
|
|
|
|
if [[ -z "$smart_info" ]]; then
|
|
echo "HDD|-|✗|-|-|"
|
|
return
|
|
fi
|
|
|
|
# Temperature parsing - handles multiple formats:
|
|
# - SATA: "194 Temperature_Celsius ... 35" (value at end of line)
|
|
# - SATA: "Temperature: 42 Celsius"
|
|
# - SATA: "Current Temperature: 35 Celsius"
|
|
# - NVMe: "Temperature: 42 Celsius"
|
|
if echo "$smart_info" | grep -q "Temperature_Celsius"; then
|
|
temp="$(echo "$smart_info" | grep "Temperature_Celsius" | head -1 | awk '{for(i=NF;i>0;i--) if($i ~ /^[0-9]+$/) {print $i; exit}}')"
|
|
elif echo "$smart_info" | grep -qE "^(Current )?Temperature:"; then
|
|
temp="$(echo "$smart_info" | grep -E "^(Current )?Temperature:" | head -1 | awk '{print $2}')"
|
|
fi
|
|
|
|
# Device type detection - handles SSD, HDD, and NVMe
|
|
if [[ "$device" == nvme* ]]; then
|
|
type="NVMe"
|
|
elif echo "$smart_info" | grep -q "Rotation Rate"; then
|
|
if echo "$smart_info" | grep "Rotation Rate" | grep -qiE "solid state|0 rpm"; then
|
|
type="SSD"
|
|
else
|
|
type="HDD"
|
|
fi
|
|
elif echo "$smart_info" | grep -qiE "SSD|Solid State"; then
|
|
type="SSD"
|
|
fi
|
|
|
|
# Health status (basic SMART check)
|
|
if echo "$smart_info" | grep -q "SMART overall-health.*PASSED"; then
|
|
health="✓"
|
|
elif echo "$smart_info" | grep -q "SMART Health Status.*OK"; then
|
|
# NVMe format
|
|
health="✓"
|
|
fi
|
|
|
|
# Model - try multiple field names
|
|
model="$(echo "$smart_info" | grep -E "^(Device Model|Model Number|Product):" | head -1 | cut -d: -f2 | xargs)"
|
|
[[ -z "$model" ]] && model="-"
|
|
|
|
# Serial number - capture everything after the colon to handle spaces
|
|
serial="$(echo "$smart_info" | grep -E "^Serial [Nn]umber:" | head -1 | cut -d: -f2 | xargs)"
|
|
[[ -z "$serial" ]] && serial="-"
|
|
|
|
# SMART threshold warnings - check for concerning values
|
|
local warn_list=()
|
|
|
|
# Temperature thresholds
|
|
if [[ -n "$temp" && "$temp" =~ ^[0-9]+$ ]]; then
|
|
if [[ "$temp" -ge "$SMART_TEMP_CRIT" ]]; then
|
|
warn_list+=("TEMP_CRIT")
|
|
elif [[ "$temp" -ge "$SMART_TEMP_WARN" ]]; then
|
|
warn_list+=("TEMP_WARN")
|
|
fi
|
|
fi
|
|
|
|
# Reallocated sectors (SMART attribute 5)
|
|
local reallocated
|
|
reallocated="$(echo "$smart_info" | grep -E "^\s*5\s+Reallocated_Sector" | awk '{print $NF}')"
|
|
if [[ -n "$reallocated" && "$reallocated" =~ ^[0-9]+$ && "$reallocated" -ge "$SMART_REALLOCATED_WARN" ]]; then
|
|
warn_list+=("REALLOC:$reallocated")
|
|
fi
|
|
|
|
# Current pending sectors (SMART attribute 197)
|
|
local pending
|
|
pending="$(echo "$smart_info" | grep -E "^\s*197\s+Current_Pending" | awk '{print $NF}')"
|
|
if [[ -n "$pending" && "$pending" =~ ^[0-9]+$ && "$pending" -ge "$SMART_PENDING_WARN" ]]; then
|
|
warn_list+=("PENDING:$pending")
|
|
fi
|
|
|
|
# UDMA CRC errors (SMART attribute 199)
|
|
local crc_errors
|
|
crc_errors="$(echo "$smart_info" | grep -E "^\s*199\s+UDMA_CRC_Error" | awk '{print $NF}')"
|
|
if [[ -n "$crc_errors" && "$crc_errors" =~ ^[0-9]+$ && "$crc_errors" -ge "$SMART_CRC_ERROR_WARN" ]]; then
|
|
warn_list+=("CRC:$crc_errors")
|
|
fi
|
|
|
|
# Power-on hours (SMART attribute 9)
|
|
local power_hours
|
|
power_hours="$(echo "$smart_info" | grep -E "^\s*9\s+Power_On_Hours" | awk '{print $NF}')"
|
|
if [[ -n "$power_hours" && "$power_hours" =~ ^[0-9]+$ && "$power_hours" -ge "$SMART_POWER_ON_HOURS_WARN" ]]; then
|
|
warn_list+=("HOURS:$power_hours")
|
|
fi
|
|
|
|
# Join warnings
|
|
if [[ ${#warn_list[@]} -gt 0 ]]; then
|
|
warnings="$(IFS=','; echo "${warn_list[*]}")"
|
|
# Change health indicator to warning if SMART passed but has warnings
|
|
if [[ "$health" == "✓" ]]; then
|
|
health="⚠"
|
|
fi
|
|
fi
|
|
|
|
# Format temperature with unit if we have a value
|
|
local temp_display
|
|
if [[ -n "$temp" && "$temp" != "-" ]]; then
|
|
temp_display="${temp}°C"
|
|
else
|
|
temp_display="-"
|
|
fi
|
|
|
|
echo "${type}|${temp_display}|${health}|${model}|${serial}|${warnings}"
|
|
}
|
|
|
|
#------------------------------------------------------------------------------
|
|
# get_drive_smart_info
|
|
#
|
|
# Retrieves SMART data for a given device (fetches and parses).
|
|
#
|
|
# Args:
|
|
# $1 - Device name (e.g., sda, nvme0n1)
|
|
#
|
|
# Returns: Pipe-delimited string: TYPE|TEMP|HEALTH|MODEL|SERIAL|WARNINGS
|
|
#------------------------------------------------------------------------------
|
|
get_drive_smart_info() {
|
|
local device="$1"
|
|
local smart_info
|
|
|
|
smart_info="$(sudo smartctl -A -i -H "/dev/$device" 2>/dev/null)"
|
|
parse_smart_data "$device" "$smart_info"
|
|
}
|
|
|
|
#------------------------------------------------------------------------------
|
|
# Main Display Logic
|
|
#------------------------------------------------------------------------------
|
|
|
|
HOSTNAME=$(hostname)
|
|
CHASSIS_TYPE=${CHASSIS_TYPES[$HOSTNAME]:-"unknown"}
|
|
|
|
# Display chassis layout
|
|
case "$CHASSIS_TYPE" in
|
|
"10bay")
|
|
generate_10bay_layout "$HOSTNAME"
|
|
;;
|
|
"large1")
|
|
generate_large1_layout "$HOSTNAME"
|
|
;;
|
|
"micro")
|
|
generate_micro_layout "$HOSTNAME"
|
|
;;
|
|
*)
|
|
echo "┌─────────────────────────────────────────────────────────┐"
|
|
echo "│ Unknown server: $HOSTNAME"
|
|
echo "│ No chassis mapping defined yet"
|
|
echo "│ Run diagnose-drives.sh to gather PCI path information"
|
|
echo "└─────────────────────────────────────────────────────────┘"
|
|
;;
|
|
esac
|
|
|
|
#------------------------------------------------------------------------------
|
|
# Drive Details Section
|
|
#------------------------------------------------------------------------------
|
|
|
|
# Build Ceph OSD cache (single query instead of per-device)
|
|
if [[ "$SKIP_CEPH" != true ]]; then
|
|
build_ceph_cache
|
|
fi
|
|
|
|
printf "\n"
|
|
colorize_header '=== Drive Details with SMART Status (by Bay Position) ==='
|
|
if [[ "$SHOW_PCI" == true ]]; then
|
|
printf "%-5s %-15s %-10s %-8s %-8s %-8s %-30s %-20s %-12s %-10s %-10s %-30s %-40s\n" "BAY" "DEVICE" "SIZE" "TYPE" "TEMP" "HEALTH" "MODEL" "SERIAL" "CEPH OSD" "STATUS" "USAGE" "WARNINGS" "PCI PATH"
|
|
echo "----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------"
|
|
else
|
|
printf "%-5s %-15s %-10s %-8s %-8s %-8s %-30s %-20s %-12s %-10s %-10s %-30s\n" "BAY" "DEVICE" "SIZE" "TYPE" "TEMP" "HEALTH" "MODEL" "SERIAL" "CEPH OSD" "STATUS" "USAGE" "WARNINGS"
|
|
echo "----------------------------------------------------------------------------------------------------------------------------------------------------------------------"
|
|
fi
|
|
|
|
# Build reverse map: device -> bay
|
|
declare -A DEVICE_TO_BAY
|
|
for bay in "${!DRIVE_MAP[@]}"; do
|
|
device="${DRIVE_MAP[$bay]}"
|
|
if [[ -n "$device" && "$device" != "EMPTY" ]]; then
|
|
DEVICE_TO_BAY["$device"]="$bay"
|
|
fi
|
|
done
|
|
|
|
# Sort drives by bay position (numeric bays first, then m2 slots)
|
|
# Combine numeric bays (sorted numerically) with m2 slots (sorted alphanumerically)
|
|
all_bays="$(printf '%s\n' "${!DRIVE_MAP[@]}" | grep -E '^[0-9]+$' | sort -n; printf '%s\n' "${!DRIVE_MAP[@]}" | grep -E '^m2-' | sort)"
|
|
|
|
# Cache lsblk data to reduce redundant calls
|
|
# Get device sizes (whole disk only)
|
|
declare -A LSBLK_SIZE=()
|
|
declare -A LSBLK_MOUNTS=()
|
|
log_info "Caching block device information..."
|
|
|
|
# Get sizes for whole disks only
|
|
while read -r name size; do
|
|
[[ -z "$name" ]] && continue
|
|
LSBLK_SIZE["$name"]="$size"
|
|
done < <(lsblk -dn -o NAME,SIZE 2>/dev/null)
|
|
|
|
# Get mount points (including partitions) and map back to parent device
|
|
while read -r name mounts; do
|
|
[[ -z "$name" || -z "$mounts" ]] && continue
|
|
# Strip partition suffix (sda1 -> sda, nvme0n1p1 -> nvme0n1)
|
|
if [[ "$name" =~ ^(nvme[0-9]+n[0-9]+)p[0-9]+$ ]]; then
|
|
parent="${BASH_REMATCH[1]}"
|
|
elif [[ "$name" =~ ^([a-z]+)[0-9]+$ ]]; then
|
|
parent="${BASH_REMATCH[1]}"
|
|
else
|
|
parent="$name"
|
|
fi
|
|
if [[ -n "${LSBLK_MOUNTS[$parent]:-}" ]]; then
|
|
LSBLK_MOUNTS["$parent"]+=",${mounts}"
|
|
else
|
|
LSBLK_MOUNTS["$parent"]="$mounts"
|
|
fi
|
|
done < <(lsblk -rn -o NAME,MOUNTPOINT 2>/dev/null | grep -v '^ ')
|
|
|
|
# Parallel SMART data collection for faster execution
|
|
# Collect raw smartctl output in background jobs, parse later
|
|
if [[ "$SKIP_SMART" != true ]]; then
|
|
SMART_CACHE_DIR="$(mktemp -d)"
|
|
log_info "Collecting SMART data in parallel..."
|
|
|
|
for bay in $all_bays; do
|
|
device="${DRIVE_MAP[$bay]}"
|
|
if [[ -n "$device" && "$device" != "EMPTY" && -b "/dev/$device" ]]; then
|
|
# Launch background job to collect raw smartctl data
|
|
(sudo smartctl -A -i -H "/dev/$device" > "$SMART_CACHE_DIR/${device}.raw" 2>/dev/null) &
|
|
fi
|
|
done
|
|
|
|
# Wait for all background SMART queries to complete
|
|
wait
|
|
log_info "SMART data collection complete"
|
|
fi
|
|
|
|
for bay in $all_bays; do
|
|
device="${DRIVE_MAP[$bay]}"
|
|
if [[ -n "$device" && "$device" != "EMPTY" && -b "/dev/$device" ]]; then
|
|
# Use cached lsblk data
|
|
size="${LSBLK_SIZE[$device]:-}"
|
|
|
|
# Get SMART info from cache (or defaults if skipped)
|
|
if [[ "$SKIP_SMART" == true ]]; then
|
|
type="-"
|
|
temp="-"
|
|
health="-"
|
|
model="-"
|
|
serial="-"
|
|
warnings=""
|
|
else
|
|
# Read from cached raw SMART data and parse it
|
|
raw_smart=""
|
|
if [[ -f "$SMART_CACHE_DIR/${device}.raw" ]]; then
|
|
raw_smart="$(cat "$SMART_CACHE_DIR/${device}.raw")"
|
|
fi
|
|
# Parse the raw data using get_drive_smart_info logic inline
|
|
if [[ -n "$raw_smart" ]]; then
|
|
smart_info="$(parse_smart_data "$device" "$raw_smart")"
|
|
IFS='|' read -r type temp health model serial warnings <<< "$smart_info"
|
|
else
|
|
type="-"
|
|
temp="-"
|
|
health="-"
|
|
model="-"
|
|
serial="-"
|
|
warnings=""
|
|
fi
|
|
fi
|
|
|
|
# Check for Ceph OSD using cached data
|
|
osd_id="-"
|
|
ceph_status="-"
|
|
if [[ "$SKIP_CEPH" != true ]]; then
|
|
osd_id="${CEPH_DEVICE_TO_OSD[$device]:-}"
|
|
if [[ -n "$osd_id" ]]; then
|
|
# Get status from cached OSD tree data
|
|
osd_num="${osd_id#osd.}"
|
|
up_status="${CEPH_OSD_STATUS[$osd_num]:-unknown}"
|
|
in_status="${CEPH_OSD_IN[$osd_num]:-out}"
|
|
ceph_status="${up_status}/${in_status}"
|
|
else
|
|
osd_id="-"
|
|
fi
|
|
fi
|
|
|
|
# Check mount points using cached lsblk data
|
|
# This includes both whole-device mounts and partition mounts
|
|
usage="-"
|
|
mount_points="${LSBLK_MOUNTS[$device]:-}"
|
|
# Limit to first 3 mount points for display
|
|
mount_points="$(echo "$mount_points" | tr ',' '\n' | head -3 | tr '\n' ',' | sed 's/,$//')"
|
|
if [[ -n "$mount_points" ]]; then
|
|
if [[ "$mount_points" == *"/"* && ! "$mount_points" == *"/boot"* && ! "$mount_points" == *"/home"* ]]; then
|
|
# Root filesystem mounted (but not just /boot or /home)
|
|
if echo "$mount_points" | grep -qE '^/,|^/$|,/$'; then
|
|
usage="BOOT"
|
|
else
|
|
usage="$mount_points"
|
|
fi
|
|
else
|
|
usage="$mount_points"
|
|
fi
|
|
fi
|
|
|
|
# Apply colors if enabled
|
|
colored_temp="$(colorize_temp "$temp")"
|
|
colored_health="$(colorize_health "$health")"
|
|
|
|
# Colorize warnings if present
|
|
colored_warnings="${warnings:--}"
|
|
if [[ "$USE_COLOR" == true && -n "$warnings" ]]; then
|
|
colored_warnings="${COLOR_YELLOW}${warnings}${COLOR_RESET}"
|
|
fi
|
|
|
|
if [[ "$SHOW_PCI" == true ]]; then
|
|
pci_path="${BAY_TO_PCI_PATH[$bay]:-}"
|
|
printf "%-5s %-15s %-10s %-8s %-8b %-8b %-30s %-20s %-12s %-10s %-10s %-30b %-40s\n" "$bay" "/dev/$device" "$size" "$type" "$colored_temp" "$colored_health" "$model" "$serial" "$osd_id" "$ceph_status" "$usage" "$colored_warnings" "$pci_path"
|
|
else
|
|
printf "%-5s %-15s %-10s %-8s %-8b %-8b %-30s %-20s %-12s %-10s %-10s %-30b\n" "$bay" "/dev/$device" "$size" "$type" "$colored_temp" "$colored_health" "$model" "$serial" "$osd_id" "$ceph_status" "$usage" "$colored_warnings"
|
|
fi
|
|
fi
|
|
done
|
|
|
|
# Clean up SMART cache directory
|
|
if [[ -n "${SMART_CACHE_DIR:-}" && -d "$SMART_CACHE_DIR" ]]; then
|
|
rm -rf "$SMART_CACHE_DIR"
|
|
fi
|
|
|
|
# NVMe drives (only show unmapped ones - mapped NVMe drives appear in main table)
|
|
nvme_devices=$(lsblk -d -n -o NAME,SIZE | grep "^nvme" 2>/dev/null)
|
|
if [[ -n "$nvme_devices" ]]; then
|
|
# Filter out already-mapped NVMe devices
|
|
unmapped_nvme=""
|
|
while read -r name size; do
|
|
if [[ -z "${DEVICE_TO_BAY[$name]:-}" ]]; then
|
|
unmapped_nvme+="$name $size"$'\n'
|
|
fi
|
|
done <<< "$nvme_devices"
|
|
|
|
if [[ -n "$unmapped_nvme" ]]; then
|
|
printf "\n"
|
|
colorize_header '=== Unmapped NVMe Drives ==='
|
|
printf "%-15s %-10s %-10s %-40s %-25s\n" "DEVICE" "SIZE" "TYPE" "MODEL" "SERIAL"
|
|
echo "------------------------------------------------------------------------------------------------------"
|
|
echo "$unmapped_nvme" | while read -r name size; do
|
|
[[ -z "$name" ]] && continue
|
|
device="/dev/$name"
|
|
# Get model and serial from smartctl for accuracy
|
|
smart_info="$(sudo smartctl -i "$device" 2>/dev/null)"
|
|
model="$(echo "$smart_info" | grep "Model Number" | cut -d: -f2 | xargs)"
|
|
serial="$(echo "$smart_info" | grep "Serial Number" | cut -d: -f2 | xargs)"
|
|
[[ -z "$model" ]] && model="-"
|
|
[[ -z "$serial" ]] && serial="-"
|
|
printf "%-15s %-10s %-10s %-40s %-25s\n" "$device" "$size" "NVMe" "$model" "$serial"
|
|
done
|
|
fi
|
|
fi
|
|
|
|
#------------------------------------------------------------------------------
|
|
# Optional sections
|
|
#------------------------------------------------------------------------------
|
|
|
|
# Ceph RBD Devices
|
|
rbd_devices=$(lsblk -d -n -o NAME,SIZE,TYPE 2>/dev/null | grep "rbd" | sort -V)
|
|
if [ -n "$rbd_devices" ]; then
|
|
printf "\n"
|
|
colorize_header '=== Ceph RBD Devices ==='
|
|
printf "%-15s %-10s %-10s %-30s\n" "DEVICE" "SIZE" "TYPE" "MOUNTPOINT"
|
|
echo "------------------------------------------------------------"
|
|
echo "$rbd_devices" | while read -r name size type; do
|
|
# Get mountpoint if any
|
|
mountpoint=$(lsblk -n -o MOUNTPOINT "/dev/$name" 2>/dev/null | head -1)
|
|
[[ -z "$mountpoint" ]] && mountpoint="-"
|
|
printf "%-15s %-10s %-10s %-30s\n" "/dev/$name" "$size" "$type" "$mountpoint"
|
|
done
|
|
fi
|
|
|
|
# Show mapping diagnostic info if DEBUG is set
|
|
if [[ -n "$DEBUG" ]]; then
|
|
printf "\n"
|
|
colorize_header '=== DEBUG: Drive Mappings ==='
|
|
for key in "${!DRIVE_MAP[@]}"; do
|
|
echo "Bay $key: ${DRIVE_MAP[$key]}"
|
|
done | sort -n
|
|
fi
|