Refactor SMART parsing for parallel collection compatibility

Split SMART data handling into two functions:
- parse_smart_data(): Parses raw smartctl output (no I/O)
- get_drive_smart_info(): Fetches and parses (wrapper)

Changed parallel collection to save raw smartctl output to cache
files, then parse during the display loop. This avoids issues
with function availability in background subshells when running
from process substitution (bash <(curl ...)).

Also fixed:
- Removed orphan code that was outside function scope
- Fixed lsblk caching to use separate calls for SIZE and MOUNTPOINT

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
2026-02-05 19:57:37 -05:00
parent 58897b1f3a
commit 4a86cdd167

View File

@@ -714,24 +714,19 @@ readonly SMART_CRC_ERROR_WARN=100 # UDMA CRC error warning threshold
readonly SMART_POWER_ON_HOURS_WARN=43800 # ~5 years of continuous use
#------------------------------------------------------------------------------
# get_drive_smart_info
# parse_smart_data
#
# Retrieves SMART data for a given device.
# Parses raw SMART data and returns formatted info string.
#
# Args:
# $1 - Device name (e.g., sda, nvme0n1)
# $2 - Raw smartctl output string
#
# Returns: Pipe-delimited string: TYPE|TEMP|HEALTH|MODEL|SERIAL|WARNINGS
# TYPE: SSD, HDD, or NVMe
# TEMP: Temperature in Celsius (or "-" if unavailable)
# HEALTH: ✓ for passed, ✗ for failed, ⚠ for passed with warnings
# MODEL: Drive model string
# SERIAL: Drive serial number
# WARNINGS: Comma-separated warning codes (or empty)
#------------------------------------------------------------------------------
get_drive_smart_info() {
parse_smart_data() {
local device="$1"
local smart_info
local smart_info="$2"
local temp="-"
local type="HDD"
local health="✗"
@@ -739,19 +734,7 @@ get_drive_smart_info() {
local serial="-"
local warnings=""
# Capture both stdout and stderr for better error reporting
local smart_stderr
smart_stderr="$(mktemp)"
smart_info="$(sudo smartctl -A -i -H "/dev/$device" 2>"$smart_stderr")"
local smart_exit=$?
if [[ $smart_exit -ne 0 && -s "$smart_stderr" ]]; then
log_warn "SMART query failed for $device: $(head -1 "$smart_stderr")"
fi
rm -f "$smart_stderr"
if [[ -z "$smart_info" ]]; then
log_info "No SMART data available for $device"
echo "HDD|-|✗|-|-|"
return
fi
@@ -762,11 +745,8 @@ get_drive_smart_info() {
# - SATA: "Current Temperature: 35 Celsius"
# - NVMe: "Temperature: 42 Celsius"
if echo "$smart_info" | grep -q "Temperature_Celsius"; then
# SMART attribute format - temperature is typically the 10th field (raw value)
# But we use the last numeric field before any parentheses for reliability
temp="$(echo "$smart_info" | grep "Temperature_Celsius" | head -1 | awk '{for(i=NF;i>0;i--) if($i ~ /^[0-9]+$/) {print $i; exit}}')"
elif echo "$smart_info" | grep -qE "^(Current )?Temperature:"; then
# Simple "Temperature: XX Celsius" format
temp="$(echo "$smart_info" | grep -E "^(Current )?Temperature:" | head -1 | awk '{print $2}')"
fi
@@ -859,6 +839,24 @@ get_drive_smart_info() {
echo "${type}|${temp_display}|${health}|${model}|${serial}|${warnings}"
}
#------------------------------------------------------------------------------
# get_drive_smart_info
#
# Retrieves SMART data for a given device (fetches and parses).
#
# Args:
# $1 - Device name (e.g., sda, nvme0n1)
#
# Returns: Pipe-delimited string: TYPE|TEMP|HEALTH|MODEL|SERIAL|WARNINGS
#------------------------------------------------------------------------------
get_drive_smart_info() {
local device="$1"
local smart_info
smart_info="$(sudo smartctl -A -i -H "/dev/$device" 2>/dev/null)"
parse_smart_data "$device" "$smart_info"
}
#------------------------------------------------------------------------------
# Main Display Logic
#------------------------------------------------------------------------------
@@ -949,7 +947,7 @@ while read -r name mounts; do
done < <(lsblk -rn -o NAME,MOUNTPOINT 2>/dev/null | grep -v '^ ')
# Parallel SMART data collection for faster execution
# Collect SMART data in background jobs, store in temp files
# Collect raw smartctl output in background jobs, parse later
if [[ "$SKIP_SMART" != true ]]; then
SMART_CACHE_DIR="$(mktemp -d)"
log_info "Collecting SMART data in parallel..."
@@ -957,8 +955,8 @@ if [[ "$SKIP_SMART" != true ]]; then
for bay in $all_bays; do
device="${DRIVE_MAP[$bay]}"
if [[ -n "$device" && "$device" != "EMPTY" && -b "/dev/$device" ]]; then
# Launch background job for each device
(get_drive_smart_info "$device" > "$SMART_CACHE_DIR/$device") &
# Launch background job to collect raw smartctl data
(sudo smartctl -A -i -H "/dev/$device" > "$SMART_CACHE_DIR/${device}.raw" 2>/dev/null) &
fi
done
@@ -982,13 +980,23 @@ for bay in $all_bays; do
serial="-"
warnings=""
else
# Read from cached SMART data
if [[ -f "$SMART_CACHE_DIR/$device" ]]; then
smart_info="$(cat "$SMART_CACHE_DIR/$device")"
else
smart_info=""
# Read from cached raw SMART data and parse it
raw_smart=""
if [[ -f "$SMART_CACHE_DIR/${device}.raw" ]]; then
raw_smart="$(cat "$SMART_CACHE_DIR/${device}.raw")"
fi
# Parse the raw data using get_drive_smart_info logic inline
if [[ -n "$raw_smart" ]]; then
smart_info="$(parse_smart_data "$device" "$raw_smart")"
IFS='|' read -r type temp health model serial warnings <<< "$smart_info"
else
type="-"
temp="-"
health="-"
model="-"
serial="-"
warnings=""
fi
fi
# Check for Ceph OSD using cached data