Erase_Fail_Count matched two values

This commit is contained in:
2025-06-24 15:14:35 -04:00
parent 9a700e9853
commit a74c4c0309

View File

@ -109,7 +109,7 @@ class SystemHealthMonitor:
}
MANUFACTURER_SMART_PROFILES = {
'Ridata': {
'aliases': ['Ridata', 'Ritek', 'RIDATA', 'RITEK', 'SSD 512GB'], # Keep the generic model
'aliases': ['Ridata', 'Ritek', 'RIDATA', 'RITEK', 'SSD 512GB'],
'firmware_patterns': ['HT3618B7', 'HT36'], # Add exact firmware match first
'wear_leveling_behavior': 'countup',
'wear_leveling_baseline': 0,
@ -1226,13 +1226,36 @@ class SystemHealthMonitor:
# Parse SMART attributes with manufacturer-specific handling
power_on_hours = 0
# First pass: collect all SMART attributes with priority for _Total versions
smart_attributes_raw = {}
for line in output.split('\n'):
# Extract Power_On_Hours first to determine if drive is new
if 'Power_On_Hours' in line:
parts = line.split()
if len(parts) >= 10:
power_on_hours = self._parse_smart_value(parts[9])
smart_health['attributes']['Power_On_Hours'] = power_on_hours
smart_attributes_raw['Power_On_Hours'] = power_on_hours
# Handle SMART attributes with preference for _Total versions
for attr in ['Erase_Fail_Count', 'Program_Fail_Count']:
# Check for _Total version first (more accurate)
if f'{attr}_Total' in line:
parts = line.split()
if len(parts) >= 10:
raw_value = self._parse_smart_value(parts[9])
smart_attributes_raw[attr] = raw_value
logger.debug(f"Found {attr}_Total: {raw_value}")
break
# Only use non-_Total version if _Total not found
elif attr in line and f'{attr}_Total' not in smart_attributes_raw:
parts = line.split()
if len(parts) >= 10:
raw_value = self._parse_smart_value(parts[9])
smart_attributes_raw[attr] = raw_value
logger.debug(f"Found {attr} (non-Total): {raw_value}")
smart_health['attributes'] = smart_attributes_raw
# Check if this is a new drive
is_new_drive = self._is_new_drive(power_on_hours)
@ -1255,7 +1278,7 @@ class SystemHealthMonitor:
'SSD_Life_Left': {'warning': 30, 'critical': 10}
}
# Parse all SMART attributes
# Parse remaining SMART attributes
for line in output.split('\n'):
# Handle manufacturer-specific Wear_Leveling_Count
if 'Wear_Leveling_Count' in line:
@ -1295,9 +1318,9 @@ class SystemHealthMonitor:
smart_health['severity'] = 'WARNING'
smart_health['issues'].append(f"Low wear leveling remaining: {raw_value}")
# Handle all other standard SMART attributes
# Handle all other standard SMART attributes (except those already processed)
for attr, thresholds in BASE_SMART_THRESHOLDS.items():
if attr in line and attr != 'Wear_Leveling_Count': # Skip wear leveling as it's handled above
if attr in line and attr not in ['Wear_Leveling_Count', 'Erase_Fail_Count', 'Program_Fail_Count']:
parts = line.split()
if len(parts) >= 10:
raw_value = self._parse_smart_value(parts[9])
@ -1313,15 +1336,28 @@ class SystemHealthMonitor:
smart_health['severity'] = 'WARNING'
smart_health['issues'].append(f"High temperature: {raw_value}°C")
else:
# Fix: Only trigger alerts if the raw value actually exceeds thresholds
if raw_value > 0: # Only check non-zero values
if raw_value >= thresholds['critical']:
smart_health['severity'] = 'CRITICAL'
smart_health['issues'].append(f"Critical {attr}: {raw_value}")
elif raw_value >= thresholds['warning']:
if smart_health['severity'] != 'CRITICAL':
smart_health['severity'] = 'WARNING'
smart_health['issues'].append(f"Warning {attr}: {raw_value}")
# Only trigger alerts if the raw value actually exceeds thresholds
if raw_value >= thresholds['critical']:
smart_health['severity'] = 'CRITICAL'
smart_health['issues'].append(f"Critical {attr}: {raw_value}")
elif raw_value >= thresholds['warning']:
if smart_health['severity'] != 'CRITICAL':
smart_health['severity'] = 'WARNING'
smart_health['issues'].append(f"Warning {attr}: {raw_value}")
# Now check the collected Erase_Fail_Count and Program_Fail_Count
for attr in ['Erase_Fail_Count', 'Program_Fail_Count']:
if attr in smart_health['attributes']:
raw_value = smart_health['attributes'][attr]
thresholds = BASE_SMART_THRESHOLDS[attr]
if raw_value >= thresholds['critical']:
smart_health['severity'] = 'CRITICAL'
smart_health['issues'].append(f"Critical {attr}: {raw_value}")
elif raw_value >= thresholds['warning']:
if smart_health['severity'] != 'CRITICAL':
smart_health['severity'] = 'WARNING'
smart_health['issues'].append(f"Warning {attr}: {raw_value}")
# Check for recent SMART errors
error_log_pattern = r"Error \d+ occurred at disk power-on lifetime: (\d+) hours"