Add --diagnose flag, remove obsolete helper scripts, fix docs

- Add --diagnose option that shows all PCI paths, storage controllers,
  block devices, and validates current mappings. Replaces the separate
  diagnose-drives.sh script.
- Remove diagnose-drives.sh (incorporated into --diagnose).
- Remove get-serials.sh (redundant with SMART data in main table).
- Remove test-paths.sh (referenced non-existent 0c:00.0 controller).
- Remove todo.md (massively outdated).
- Fix storage controller text overflowing box borders in large1 and
  micro layouts by adding truncation (%-69.69s, %-57.57s).
- Fix chassis name to CX4712 in README.
- Update server mapping statuses from "Requires mapping" to actual
  partially-mapped states.
- Add ⚠ health indicator to README output column docs.
- Update Claude.md metrics to match current state.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-02-06 18:50:37 -05:00
parent 555ecd54b2
commit c6ea28c5d6
6 changed files with 98 additions and 107 deletions

View File

@@ -60,6 +60,7 @@ OPTIONS:
--verbose Show detailed error messages and warnings
--no-ceph Skip Ceph OSD information
--show-pci Show PCI paths in output
--diagnose Show all PCI paths and block devices (for mapping new servers)
EXAMPLES:
$(basename "$0") # Normal run with all features
@@ -67,6 +68,7 @@ EXAMPLES:
$(basename "$0") --color # Run with colored output
$(basename "$0") --verbose # Show all errors and warnings
$(basename "$0") --debug # Show mapping debug info
$(basename "$0") --diagnose # Gather PCI paths for new server setup
ENVIRONMENT VARIABLES:
DEBUG=1 Same as --debug flag
@@ -83,6 +85,7 @@ SKIP_CEPH=false
SHOW_PCI=false
USE_COLOR=false
VERBOSE=false
RUN_DIAGNOSE=false
while [[ $# -gt 0 ]]; do
case "$1" in
@@ -118,6 +121,10 @@ while [[ $# -gt 0 ]]; do
VERBOSE=true
shift
;;
--diagnose)
RUN_DIAGNOSE=true
shift
;;
*)
echo "Unknown option: $1" >&2
echo "Use --help for usage information." >&2
@@ -321,6 +328,68 @@ check_dependencies() {
# Run dependency check at script start
check_dependencies
#------------------------------------------------------------------------------
# run_diagnose
#
# Displays all PCI disk paths, storage controllers, and block devices.
# Used to gather information needed when mapping a new server.
#------------------------------------------------------------------------------
run_diagnose() {
local hostname
hostname="$(hostname)"
echo "=== Server Information ==="
echo "Hostname: $hostname"
echo "Date: $(date)"
echo ""
echo "=== Storage Controllers ==="
lspci 2>/dev/null | grep -iE "SAS|SATA|RAID|Mass storage|NVMe"
echo ""
echo "=== All /dev/disk/by-path/ entries (whole disks only) ==="
for path in "${DISK_BY_PATH}"/*; do
[[ -L "$path" ]] || continue
# Skip partitions
[[ "$path" =~ -part[0-9]+$ ]] && continue
local basename_path target device size serial model
basename_path="$(basename "$path")"
target="$(readlink -f "$path")"
device="$(basename "$target")"
size="$(lsblk -d -n -o SIZE "$target" 2>/dev/null | xargs)"
printf " %-55s -> %-10s %s\n" "$basename_path" "$device" "${size:+($size)}"
done
echo ""
echo "=== Block Devices ==="
lsblk -d -o NAME,SIZE,TYPE,TRAN 2>/dev/null | grep -v "rbd\|loop"
echo ""
# Check if this server has a mapping
local sanitized
sanitized="$(echo "$hostname" | tr -cd '[:alnum:]-_.')"
if [[ -n "${SERVER_MAPPINGS[$sanitized]:-}" ]]; then
echo "=== Current Mapping for $sanitized ==="
echo "${SERVER_MAPPINGS[$sanitized]}" | while read -r pci_path bay; do
[[ -z "$pci_path" || -z "$bay" ]] && continue
if [[ -L "${DISK_BY_PATH}/$pci_path" ]]; then
local dev
dev="$(readlink -f "${DISK_BY_PATH}/$pci_path" | sed 's/.*\///')"
printf " Bay %-5s %-55s -> %s\n" "$bay" "$pci_path" "$dev"
else
printf " Bay %-5s %-55s -> (not connected)\n" "$bay" "$pci_path"
fi
done
else
echo "NOTE: No mapping exists yet for '$sanitized'."
echo "Use the PCI paths above to create a SERVER_MAPPINGS entry."
fi
exit 0
}
#------------------------------------------------------------------------------
# Chassis Layout Generator Functions
# These define the physical layout and display formatting for each chassis type
@@ -418,7 +487,7 @@ generate_micro_layout() {
printf "│ │\n"
printf "│ Storage Controllers: │\n"
while IFS= read -r ctrl; do
[[ -n "$ctrl" ]] && printf "│ %-57s│\n" "$ctrl"
[[ -n "$ctrl" ]] && printf "│ %-57.57s│\n" "$ctrl"
done < <(get_storage_controllers)
printf "│ │\n"
@@ -460,7 +529,7 @@ generate_large1_layout() {
printf "│ │\n"
printf "│ Storage Controllers: │\n"
while IFS= read -r ctrl; do
[[ -n "$ctrl" ]] && printf "│ %-69s│\n" "$ctrl"
[[ -n "$ctrl" ]] && printf "│ %-69.69s│\n" "$ctrl"
done < <(get_storage_controllers)
printf "│ │\n"
printf "│ M.2 NVMe: M1: %-10s M2: %-10s │\n" "${DRIVE_MAP[m2-1]:-EMPTY}" "${DRIVE_MAP[m2-2]:-EMPTY}"
@@ -648,7 +717,7 @@ build_drive_map() {
declare -g -A BAY_TO_PCI_PATH=()
if [[ -z "$mapping" ]]; then
log_warn "No drive mapping found for host '$host'. Run diagnose-drives.sh to create one."
log_warn "No drive mapping found for host '$host'. Run with --diagnose to gather PCI path info."
return
fi
@@ -919,6 +988,11 @@ get_drive_smart_info() {
# Main Display Logic
#------------------------------------------------------------------------------
# Run diagnose mode if requested (exits after printing)
if [[ "$RUN_DIAGNOSE" == true ]]; then
run_diagnose
fi
HOSTNAME=$(hostname | tr -cd '[:alnum:]-_.')
CHASSIS_TYPE=${CHASSIS_TYPES[$HOSTNAME]:-"unknown"}
@@ -937,7 +1011,7 @@ case "$CHASSIS_TYPE" in
echo "┌─────────────────────────────────────────────────────────┐"
echo "│ Unknown server: $HOSTNAME"
echo "│ No chassis mapping defined yet"
echo "│ Run diagnose-drives.sh to gather PCI path information"
echo "│ Run with --diagnose to gather PCI path information"
echo "└─────────────────────────────────────────────────────────┘"
;;
esac