Compare commits
3 Commits
7db30a7bbf
...
4a86cdd167
| Author | SHA1 | Date | |
|---|---|---|---|
| 4a86cdd167 | |||
| 58897b1f3a | |||
| fbd9965fb1 |
107
driveAtlas.sh
107
driveAtlas.sh
@@ -714,24 +714,19 @@ readonly SMART_CRC_ERROR_WARN=100 # UDMA CRC error warning threshold
|
||||
readonly SMART_POWER_ON_HOURS_WARN=43800 # ~5 years of continuous use
|
||||
|
||||
#------------------------------------------------------------------------------
|
||||
# get_drive_smart_info
|
||||
# parse_smart_data
|
||||
#
|
||||
# Retrieves SMART data for a given device.
|
||||
# Parses raw SMART data and returns formatted info string.
|
||||
#
|
||||
# Args:
|
||||
# $1 - Device name (e.g., sda, nvme0n1)
|
||||
# $2 - Raw smartctl output string
|
||||
#
|
||||
# Returns: Pipe-delimited string: TYPE|TEMP|HEALTH|MODEL|SERIAL|WARNINGS
|
||||
# TYPE: SSD, HDD, or NVMe
|
||||
# TEMP: Temperature in Celsius (or "-" if unavailable)
|
||||
# HEALTH: ✓ for passed, ✗ for failed, ⚠ for passed with warnings
|
||||
# MODEL: Drive model string
|
||||
# SERIAL: Drive serial number
|
||||
# WARNINGS: Comma-separated warning codes (or empty)
|
||||
#------------------------------------------------------------------------------
|
||||
get_drive_smart_info() {
|
||||
parse_smart_data() {
|
||||
local device="$1"
|
||||
local smart_info
|
||||
local smart_info="$2"
|
||||
local temp="-"
|
||||
local type="HDD"
|
||||
local health="✗"
|
||||
@@ -739,19 +734,7 @@ get_drive_smart_info() {
|
||||
local serial="-"
|
||||
local warnings=""
|
||||
|
||||
# Capture both stdout and stderr for better error reporting
|
||||
local smart_stderr
|
||||
smart_stderr="$(mktemp)"
|
||||
smart_info="$(sudo smartctl -A -i -H "/dev/$device" 2>"$smart_stderr")"
|
||||
local smart_exit=$?
|
||||
|
||||
if [[ $smart_exit -ne 0 && -s "$smart_stderr" ]]; then
|
||||
log_warn "SMART query failed for $device: $(head -1 "$smart_stderr")"
|
||||
fi
|
||||
rm -f "$smart_stderr"
|
||||
|
||||
if [[ -z "$smart_info" ]]; then
|
||||
log_info "No SMART data available for $device"
|
||||
echo "HDD|-|✗|-|-|"
|
||||
return
|
||||
fi
|
||||
@@ -762,11 +745,8 @@ get_drive_smart_info() {
|
||||
# - SATA: "Current Temperature: 35 Celsius"
|
||||
# - NVMe: "Temperature: 42 Celsius"
|
||||
if echo "$smart_info" | grep -q "Temperature_Celsius"; then
|
||||
# SMART attribute format - temperature is typically the 10th field (raw value)
|
||||
# But we use the last numeric field before any parentheses for reliability
|
||||
temp="$(echo "$smart_info" | grep "Temperature_Celsius" | head -1 | awk '{for(i=NF;i>0;i--) if($i ~ /^[0-9]+$/) {print $i; exit}}')"
|
||||
elif echo "$smart_info" | grep -qE "^(Current )?Temperature:"; then
|
||||
# Simple "Temperature: XX Celsius" format
|
||||
temp="$(echo "$smart_info" | grep -E "^(Current )?Temperature:" | head -1 | awk '{print $2}')"
|
||||
fi
|
||||
|
||||
@@ -859,6 +839,24 @@ get_drive_smart_info() {
|
||||
echo "${type}|${temp_display}|${health}|${model}|${serial}|${warnings}"
|
||||
}
|
||||
|
||||
#------------------------------------------------------------------------------
|
||||
# get_drive_smart_info
|
||||
#
|
||||
# Retrieves SMART data for a given device (fetches and parses).
|
||||
#
|
||||
# Args:
|
||||
# $1 - Device name (e.g., sda, nvme0n1)
|
||||
#
|
||||
# Returns: Pipe-delimited string: TYPE|TEMP|HEALTH|MODEL|SERIAL|WARNINGS
|
||||
#------------------------------------------------------------------------------
|
||||
get_drive_smart_info() {
|
||||
local device="$1"
|
||||
local smart_info
|
||||
|
||||
smart_info="$(sudo smartctl -A -i -H "/dev/$device" 2>/dev/null)"
|
||||
parse_smart_data "$device" "$smart_info"
|
||||
}
|
||||
|
||||
#------------------------------------------------------------------------------
|
||||
# Main Display Logic
|
||||
#------------------------------------------------------------------------------
|
||||
@@ -919,26 +917,37 @@ done
|
||||
all_bays="$(printf '%s\n' "${!DRIVE_MAP[@]}" | grep -E '^[0-9]+$' | sort -n; printf '%s\n' "${!DRIVE_MAP[@]}" | grep -E '^m2-' | sort)"
|
||||
|
||||
# Cache lsblk data to reduce redundant calls
|
||||
# Single call gets all info we need: size and mount points
|
||||
# Get device sizes (whole disk only)
|
||||
declare -A LSBLK_SIZE=()
|
||||
declare -A LSBLK_MOUNTS=()
|
||||
log_info "Caching block device information..."
|
||||
while IFS='|' read -r name size mounts; do
|
||||
|
||||
# Get sizes for whole disks only
|
||||
while read -r name size; do
|
||||
[[ -z "$name" ]] && continue
|
||||
LSBLK_SIZE[$name]="$size"
|
||||
# Accumulate mount points for parent device
|
||||
parent="${name%%[0-9]}" # Strip partition number
|
||||
if [[ -n "$mounts" ]]; then
|
||||
if [[ -n "${LSBLK_MOUNTS[$parent]}" ]]; then
|
||||
LSBLK_MOUNTS[$parent]+=",${mounts}"
|
||||
LSBLK_SIZE["$name"]="$size"
|
||||
done < <(lsblk -dn -o NAME,SIZE 2>/dev/null)
|
||||
|
||||
# Get mount points (including partitions) and map back to parent device
|
||||
while read -r name mounts; do
|
||||
[[ -z "$name" || -z "$mounts" ]] && continue
|
||||
# Strip partition suffix (sda1 -> sda, nvme0n1p1 -> nvme0n1)
|
||||
if [[ "$name" =~ ^(nvme[0-9]+n[0-9]+)p[0-9]+$ ]]; then
|
||||
parent="${BASH_REMATCH[1]}"
|
||||
elif [[ "$name" =~ ^([a-z]+)[0-9]+$ ]]; then
|
||||
parent="${BASH_REMATCH[1]}"
|
||||
else
|
||||
LSBLK_MOUNTS[$parent]="$mounts"
|
||||
parent="$name"
|
||||
fi
|
||||
if [[ -n "${LSBLK_MOUNTS[$parent]:-}" ]]; then
|
||||
LSBLK_MOUNTS["$parent"]+=",${mounts}"
|
||||
else
|
||||
LSBLK_MOUNTS["$parent"]="$mounts"
|
||||
fi
|
||||
done < <(lsblk -rn -o NAME,SIZE,MOUNTPOINT 2>/dev/null)
|
||||
done < <(lsblk -rn -o NAME,MOUNTPOINT 2>/dev/null | grep -v '^ ')
|
||||
|
||||
# Parallel SMART data collection for faster execution
|
||||
# Collect SMART data in background jobs, store in temp files
|
||||
# Collect raw smartctl output in background jobs, parse later
|
||||
if [[ "$SKIP_SMART" != true ]]; then
|
||||
SMART_CACHE_DIR="$(mktemp -d)"
|
||||
log_info "Collecting SMART data in parallel..."
|
||||
@@ -946,8 +955,8 @@ if [[ "$SKIP_SMART" != true ]]; then
|
||||
for bay in $all_bays; do
|
||||
device="${DRIVE_MAP[$bay]}"
|
||||
if [[ -n "$device" && "$device" != "EMPTY" && -b "/dev/$device" ]]; then
|
||||
# Launch background job for each device
|
||||
(get_drive_smart_info "$device" > "$SMART_CACHE_DIR/$device") &
|
||||
# Launch background job to collect raw smartctl data
|
||||
(sudo smartctl -A -i -H "/dev/$device" > "$SMART_CACHE_DIR/${device}.raw" 2>/dev/null) &
|
||||
fi
|
||||
done
|
||||
|
||||
@@ -971,13 +980,23 @@ for bay in $all_bays; do
|
||||
serial="-"
|
||||
warnings=""
|
||||
else
|
||||
# Read from cached SMART data
|
||||
if [[ -f "$SMART_CACHE_DIR/$device" ]]; then
|
||||
smart_info="$(cat "$SMART_CACHE_DIR/$device")"
|
||||
else
|
||||
smart_info=""
|
||||
# Read from cached raw SMART data and parse it
|
||||
raw_smart=""
|
||||
if [[ -f "$SMART_CACHE_DIR/${device}.raw" ]]; then
|
||||
raw_smart="$(cat "$SMART_CACHE_DIR/${device}.raw")"
|
||||
fi
|
||||
# Parse the raw data using get_drive_smart_info logic inline
|
||||
if [[ -n "$raw_smart" ]]; then
|
||||
smart_info="$(parse_smart_data "$device" "$raw_smart")"
|
||||
IFS='|' read -r type temp health model serial warnings <<< "$smart_info"
|
||||
else
|
||||
type="-"
|
||||
temp="-"
|
||||
health="-"
|
||||
model="-"
|
||||
serial="-"
|
||||
warnings=""
|
||||
fi
|
||||
fi
|
||||
|
||||
# Check for Ceph OSD using cached data
|
||||
@@ -1020,7 +1039,7 @@ for bay in $all_bays; do
|
||||
colored_health="$(colorize_health "$health")"
|
||||
|
||||
# Colorize warnings if present
|
||||
local colored_warnings="${warnings:--}"
|
||||
colored_warnings="${warnings:--}"
|
||||
if [[ "$USE_COLOR" == true && -n "$warnings" ]]; then
|
||||
colored_warnings="${COLOR_YELLOW}${warnings}${COLOR_RESET}"
|
||||
fi
|
||||
|
||||
Reference in New Issue
Block a user