Compare commits
3 Commits
7db30a7bbf
...
4a86cdd167
| Author | SHA1 | Date | |
|---|---|---|---|
| 4a86cdd167 | |||
| 58897b1f3a | |||
| fbd9965fb1 |
107
driveAtlas.sh
107
driveAtlas.sh
@@ -714,24 +714,19 @@ readonly SMART_CRC_ERROR_WARN=100 # UDMA CRC error warning threshold
|
|||||||
readonly SMART_POWER_ON_HOURS_WARN=43800 # ~5 years of continuous use
|
readonly SMART_POWER_ON_HOURS_WARN=43800 # ~5 years of continuous use
|
||||||
|
|
||||||
#------------------------------------------------------------------------------
|
#------------------------------------------------------------------------------
|
||||||
# get_drive_smart_info
|
# parse_smart_data
|
||||||
#
|
#
|
||||||
# Retrieves SMART data for a given device.
|
# Parses raw SMART data and returns formatted info string.
|
||||||
#
|
#
|
||||||
# Args:
|
# Args:
|
||||||
# $1 - Device name (e.g., sda, nvme0n1)
|
# $1 - Device name (e.g., sda, nvme0n1)
|
||||||
|
# $2 - Raw smartctl output string
|
||||||
#
|
#
|
||||||
# Returns: Pipe-delimited string: TYPE|TEMP|HEALTH|MODEL|SERIAL|WARNINGS
|
# Returns: Pipe-delimited string: TYPE|TEMP|HEALTH|MODEL|SERIAL|WARNINGS
|
||||||
# TYPE: SSD, HDD, or NVMe
|
|
||||||
# TEMP: Temperature in Celsius (or "-" if unavailable)
|
|
||||||
# HEALTH: ✓ for passed, ✗ for failed, ⚠ for passed with warnings
|
|
||||||
# MODEL: Drive model string
|
|
||||||
# SERIAL: Drive serial number
|
|
||||||
# WARNINGS: Comma-separated warning codes (or empty)
|
|
||||||
#------------------------------------------------------------------------------
|
#------------------------------------------------------------------------------
|
||||||
get_drive_smart_info() {
|
parse_smart_data() {
|
||||||
local device="$1"
|
local device="$1"
|
||||||
local smart_info
|
local smart_info="$2"
|
||||||
local temp="-"
|
local temp="-"
|
||||||
local type="HDD"
|
local type="HDD"
|
||||||
local health="✗"
|
local health="✗"
|
||||||
@@ -739,19 +734,7 @@ get_drive_smart_info() {
|
|||||||
local serial="-"
|
local serial="-"
|
||||||
local warnings=""
|
local warnings=""
|
||||||
|
|
||||||
# Capture both stdout and stderr for better error reporting
|
|
||||||
local smart_stderr
|
|
||||||
smart_stderr="$(mktemp)"
|
|
||||||
smart_info="$(sudo smartctl -A -i -H "/dev/$device" 2>"$smart_stderr")"
|
|
||||||
local smart_exit=$?
|
|
||||||
|
|
||||||
if [[ $smart_exit -ne 0 && -s "$smart_stderr" ]]; then
|
|
||||||
log_warn "SMART query failed for $device: $(head -1 "$smart_stderr")"
|
|
||||||
fi
|
|
||||||
rm -f "$smart_stderr"
|
|
||||||
|
|
||||||
if [[ -z "$smart_info" ]]; then
|
if [[ -z "$smart_info" ]]; then
|
||||||
log_info "No SMART data available for $device"
|
|
||||||
echo "HDD|-|✗|-|-|"
|
echo "HDD|-|✗|-|-|"
|
||||||
return
|
return
|
||||||
fi
|
fi
|
||||||
@@ -762,11 +745,8 @@ get_drive_smart_info() {
|
|||||||
# - SATA: "Current Temperature: 35 Celsius"
|
# - SATA: "Current Temperature: 35 Celsius"
|
||||||
# - NVMe: "Temperature: 42 Celsius"
|
# - NVMe: "Temperature: 42 Celsius"
|
||||||
if echo "$smart_info" | grep -q "Temperature_Celsius"; then
|
if echo "$smart_info" | grep -q "Temperature_Celsius"; then
|
||||||
# SMART attribute format - temperature is typically the 10th field (raw value)
|
|
||||||
# But we use the last numeric field before any parentheses for reliability
|
|
||||||
temp="$(echo "$smart_info" | grep "Temperature_Celsius" | head -1 | awk '{for(i=NF;i>0;i--) if($i ~ /^[0-9]+$/) {print $i; exit}}')"
|
temp="$(echo "$smart_info" | grep "Temperature_Celsius" | head -1 | awk '{for(i=NF;i>0;i--) if($i ~ /^[0-9]+$/) {print $i; exit}}')"
|
||||||
elif echo "$smart_info" | grep -qE "^(Current )?Temperature:"; then
|
elif echo "$smart_info" | grep -qE "^(Current )?Temperature:"; then
|
||||||
# Simple "Temperature: XX Celsius" format
|
|
||||||
temp="$(echo "$smart_info" | grep -E "^(Current )?Temperature:" | head -1 | awk '{print $2}')"
|
temp="$(echo "$smart_info" | grep -E "^(Current )?Temperature:" | head -1 | awk '{print $2}')"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
@@ -859,6 +839,24 @@ get_drive_smart_info() {
|
|||||||
echo "${type}|${temp_display}|${health}|${model}|${serial}|${warnings}"
|
echo "${type}|${temp_display}|${health}|${model}|${serial}|${warnings}"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#------------------------------------------------------------------------------
|
||||||
|
# get_drive_smart_info
|
||||||
|
#
|
||||||
|
# Retrieves SMART data for a given device (fetches and parses).
|
||||||
|
#
|
||||||
|
# Args:
|
||||||
|
# $1 - Device name (e.g., sda, nvme0n1)
|
||||||
|
#
|
||||||
|
# Returns: Pipe-delimited string: TYPE|TEMP|HEALTH|MODEL|SERIAL|WARNINGS
|
||||||
|
#------------------------------------------------------------------------------
|
||||||
|
get_drive_smart_info() {
|
||||||
|
local device="$1"
|
||||||
|
local smart_info
|
||||||
|
|
||||||
|
smart_info="$(sudo smartctl -A -i -H "/dev/$device" 2>/dev/null)"
|
||||||
|
parse_smart_data "$device" "$smart_info"
|
||||||
|
}
|
||||||
|
|
||||||
#------------------------------------------------------------------------------
|
#------------------------------------------------------------------------------
|
||||||
# Main Display Logic
|
# Main Display Logic
|
||||||
#------------------------------------------------------------------------------
|
#------------------------------------------------------------------------------
|
||||||
@@ -919,26 +917,37 @@ done
|
|||||||
all_bays="$(printf '%s\n' "${!DRIVE_MAP[@]}" | grep -E '^[0-9]+$' | sort -n; printf '%s\n' "${!DRIVE_MAP[@]}" | grep -E '^m2-' | sort)"
|
all_bays="$(printf '%s\n' "${!DRIVE_MAP[@]}" | grep -E '^[0-9]+$' | sort -n; printf '%s\n' "${!DRIVE_MAP[@]}" | grep -E '^m2-' | sort)"
|
||||||
|
|
||||||
# Cache lsblk data to reduce redundant calls
|
# Cache lsblk data to reduce redundant calls
|
||||||
# Single call gets all info we need: size and mount points
|
# Get device sizes (whole disk only)
|
||||||
declare -A LSBLK_SIZE=()
|
declare -A LSBLK_SIZE=()
|
||||||
declare -A LSBLK_MOUNTS=()
|
declare -A LSBLK_MOUNTS=()
|
||||||
log_info "Caching block device information..."
|
log_info "Caching block device information..."
|
||||||
while IFS='|' read -r name size mounts; do
|
|
||||||
|
# Get sizes for whole disks only
|
||||||
|
while read -r name size; do
|
||||||
[[ -z "$name" ]] && continue
|
[[ -z "$name" ]] && continue
|
||||||
LSBLK_SIZE[$name]="$size"
|
LSBLK_SIZE["$name"]="$size"
|
||||||
# Accumulate mount points for parent device
|
done < <(lsblk -dn -o NAME,SIZE 2>/dev/null)
|
||||||
parent="${name%%[0-9]}" # Strip partition number
|
|
||||||
if [[ -n "$mounts" ]]; then
|
# Get mount points (including partitions) and map back to parent device
|
||||||
if [[ -n "${LSBLK_MOUNTS[$parent]}" ]]; then
|
while read -r name mounts; do
|
||||||
LSBLK_MOUNTS[$parent]+=",${mounts}"
|
[[ -z "$name" || -z "$mounts" ]] && continue
|
||||||
|
# Strip partition suffix (sda1 -> sda, nvme0n1p1 -> nvme0n1)
|
||||||
|
if [[ "$name" =~ ^(nvme[0-9]+n[0-9]+)p[0-9]+$ ]]; then
|
||||||
|
parent="${BASH_REMATCH[1]}"
|
||||||
|
elif [[ "$name" =~ ^([a-z]+)[0-9]+$ ]]; then
|
||||||
|
parent="${BASH_REMATCH[1]}"
|
||||||
else
|
else
|
||||||
LSBLK_MOUNTS[$parent]="$mounts"
|
parent="$name"
|
||||||
fi
|
fi
|
||||||
|
if [[ -n "${LSBLK_MOUNTS[$parent]:-}" ]]; then
|
||||||
|
LSBLK_MOUNTS["$parent"]+=",${mounts}"
|
||||||
|
else
|
||||||
|
LSBLK_MOUNTS["$parent"]="$mounts"
|
||||||
fi
|
fi
|
||||||
done < <(lsblk -rn -o NAME,SIZE,MOUNTPOINT 2>/dev/null)
|
done < <(lsblk -rn -o NAME,MOUNTPOINT 2>/dev/null | grep -v '^ ')
|
||||||
|
|
||||||
# Parallel SMART data collection for faster execution
|
# Parallel SMART data collection for faster execution
|
||||||
# Collect SMART data in background jobs, store in temp files
|
# Collect raw smartctl output in background jobs, parse later
|
||||||
if [[ "$SKIP_SMART" != true ]]; then
|
if [[ "$SKIP_SMART" != true ]]; then
|
||||||
SMART_CACHE_DIR="$(mktemp -d)"
|
SMART_CACHE_DIR="$(mktemp -d)"
|
||||||
log_info "Collecting SMART data in parallel..."
|
log_info "Collecting SMART data in parallel..."
|
||||||
@@ -946,8 +955,8 @@ if [[ "$SKIP_SMART" != true ]]; then
|
|||||||
for bay in $all_bays; do
|
for bay in $all_bays; do
|
||||||
device="${DRIVE_MAP[$bay]}"
|
device="${DRIVE_MAP[$bay]}"
|
||||||
if [[ -n "$device" && "$device" != "EMPTY" && -b "/dev/$device" ]]; then
|
if [[ -n "$device" && "$device" != "EMPTY" && -b "/dev/$device" ]]; then
|
||||||
# Launch background job for each device
|
# Launch background job to collect raw smartctl data
|
||||||
(get_drive_smart_info "$device" > "$SMART_CACHE_DIR/$device") &
|
(sudo smartctl -A -i -H "/dev/$device" > "$SMART_CACHE_DIR/${device}.raw" 2>/dev/null) &
|
||||||
fi
|
fi
|
||||||
done
|
done
|
||||||
|
|
||||||
@@ -971,13 +980,23 @@ for bay in $all_bays; do
|
|||||||
serial="-"
|
serial="-"
|
||||||
warnings=""
|
warnings=""
|
||||||
else
|
else
|
||||||
# Read from cached SMART data
|
# Read from cached raw SMART data and parse it
|
||||||
if [[ -f "$SMART_CACHE_DIR/$device" ]]; then
|
raw_smart=""
|
||||||
smart_info="$(cat "$SMART_CACHE_DIR/$device")"
|
if [[ -f "$SMART_CACHE_DIR/${device}.raw" ]]; then
|
||||||
else
|
raw_smart="$(cat "$SMART_CACHE_DIR/${device}.raw")"
|
||||||
smart_info=""
|
|
||||||
fi
|
fi
|
||||||
|
# Parse the raw data using get_drive_smart_info logic inline
|
||||||
|
if [[ -n "$raw_smart" ]]; then
|
||||||
|
smart_info="$(parse_smart_data "$device" "$raw_smart")"
|
||||||
IFS='|' read -r type temp health model serial warnings <<< "$smart_info"
|
IFS='|' read -r type temp health model serial warnings <<< "$smart_info"
|
||||||
|
else
|
||||||
|
type="-"
|
||||||
|
temp="-"
|
||||||
|
health="-"
|
||||||
|
model="-"
|
||||||
|
serial="-"
|
||||||
|
warnings=""
|
||||||
|
fi
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Check for Ceph OSD using cached data
|
# Check for Ceph OSD using cached data
|
||||||
@@ -1020,7 +1039,7 @@ for bay in $all_bays; do
|
|||||||
colored_health="$(colorize_health "$health")"
|
colored_health="$(colorize_health "$health")"
|
||||||
|
|
||||||
# Colorize warnings if present
|
# Colorize warnings if present
|
||||||
local colored_warnings="${warnings:--}"
|
colored_warnings="${warnings:--}"
|
||||||
if [[ "$USE_COLOR" == true && -n "$warnings" ]]; then
|
if [[ "$USE_COLOR" == true && -n "$warnings" ]]; then
|
||||||
colored_warnings="${COLOR_YELLOW}${warnings}${COLOR_RESET}"
|
colored_warnings="${COLOR_YELLOW}${warnings}${COLOR_RESET}"
|
||||||
fi
|
fi
|
||||||
|
|||||||
Reference in New Issue
Block a user