Erase_Fail_Count matched two values
This commit is contained in:
@ -109,7 +109,7 @@ class SystemHealthMonitor:
|
||||
}
|
||||
MANUFACTURER_SMART_PROFILES = {
|
||||
'Ridata': {
|
||||
'aliases': ['Ridata', 'Ritek', 'RIDATA', 'RITEK', 'SSD 512GB'], # Keep the generic model
|
||||
'aliases': ['Ridata', 'Ritek', 'RIDATA', 'RITEK', 'SSD 512GB'],
|
||||
'firmware_patterns': ['HT3618B7', 'HT36'], # Add exact firmware match first
|
||||
'wear_leveling_behavior': 'countup',
|
||||
'wear_leveling_baseline': 0,
|
||||
@ -1226,13 +1226,36 @@ class SystemHealthMonitor:
|
||||
# Parse SMART attributes with manufacturer-specific handling
|
||||
power_on_hours = 0
|
||||
|
||||
# First pass: collect all SMART attributes with priority for _Total versions
|
||||
smart_attributes_raw = {}
|
||||
|
||||
for line in output.split('\n'):
|
||||
# Extract Power_On_Hours first to determine if drive is new
|
||||
if 'Power_On_Hours' in line:
|
||||
parts = line.split()
|
||||
if len(parts) >= 10:
|
||||
power_on_hours = self._parse_smart_value(parts[9])
|
||||
smart_health['attributes']['Power_On_Hours'] = power_on_hours
|
||||
smart_attributes_raw['Power_On_Hours'] = power_on_hours
|
||||
|
||||
# Handle SMART attributes with preference for _Total versions
|
||||
for attr in ['Erase_Fail_Count', 'Program_Fail_Count']:
|
||||
# Check for _Total version first (more accurate)
|
||||
if f'{attr}_Total' in line:
|
||||
parts = line.split()
|
||||
if len(parts) >= 10:
|
||||
raw_value = self._parse_smart_value(parts[9])
|
||||
smart_attributes_raw[attr] = raw_value
|
||||
logger.debug(f"Found {attr}_Total: {raw_value}")
|
||||
break
|
||||
# Only use non-_Total version if _Total not found
|
||||
elif attr in line and f'{attr}_Total' not in smart_attributes_raw:
|
||||
parts = line.split()
|
||||
if len(parts) >= 10:
|
||||
raw_value = self._parse_smart_value(parts[9])
|
||||
smart_attributes_raw[attr] = raw_value
|
||||
logger.debug(f"Found {attr} (non-Total): {raw_value}")
|
||||
|
||||
smart_health['attributes'] = smart_attributes_raw
|
||||
|
||||
# Check if this is a new drive
|
||||
is_new_drive = self._is_new_drive(power_on_hours)
|
||||
@ -1255,7 +1278,7 @@ class SystemHealthMonitor:
|
||||
'SSD_Life_Left': {'warning': 30, 'critical': 10}
|
||||
}
|
||||
|
||||
# Parse all SMART attributes
|
||||
# Parse remaining SMART attributes
|
||||
for line in output.split('\n'):
|
||||
# Handle manufacturer-specific Wear_Leveling_Count
|
||||
if 'Wear_Leveling_Count' in line:
|
||||
@ -1295,9 +1318,9 @@ class SystemHealthMonitor:
|
||||
smart_health['severity'] = 'WARNING'
|
||||
smart_health['issues'].append(f"Low wear leveling remaining: {raw_value}")
|
||||
|
||||
# Handle all other standard SMART attributes
|
||||
# Handle all other standard SMART attributes (except those already processed)
|
||||
for attr, thresholds in BASE_SMART_THRESHOLDS.items():
|
||||
if attr in line and attr != 'Wear_Leveling_Count': # Skip wear leveling as it's handled above
|
||||
if attr in line and attr not in ['Wear_Leveling_Count', 'Erase_Fail_Count', 'Program_Fail_Count']:
|
||||
parts = line.split()
|
||||
if len(parts) >= 10:
|
||||
raw_value = self._parse_smart_value(parts[9])
|
||||
@ -1313,15 +1336,28 @@ class SystemHealthMonitor:
|
||||
smart_health['severity'] = 'WARNING'
|
||||
smart_health['issues'].append(f"High temperature: {raw_value}°C")
|
||||
else:
|
||||
# Fix: Only trigger alerts if the raw value actually exceeds thresholds
|
||||
if raw_value > 0: # Only check non-zero values
|
||||
if raw_value >= thresholds['critical']:
|
||||
smart_health['severity'] = 'CRITICAL'
|
||||
smart_health['issues'].append(f"Critical {attr}: {raw_value}")
|
||||
elif raw_value >= thresholds['warning']:
|
||||
if smart_health['severity'] != 'CRITICAL':
|
||||
smart_health['severity'] = 'WARNING'
|
||||
smart_health['issues'].append(f"Warning {attr}: {raw_value}")
|
||||
# Only trigger alerts if the raw value actually exceeds thresholds
|
||||
if raw_value >= thresholds['critical']:
|
||||
smart_health['severity'] = 'CRITICAL'
|
||||
smart_health['issues'].append(f"Critical {attr}: {raw_value}")
|
||||
elif raw_value >= thresholds['warning']:
|
||||
if smart_health['severity'] != 'CRITICAL':
|
||||
smart_health['severity'] = 'WARNING'
|
||||
smart_health['issues'].append(f"Warning {attr}: {raw_value}")
|
||||
|
||||
# Now check the collected Erase_Fail_Count and Program_Fail_Count
|
||||
for attr in ['Erase_Fail_Count', 'Program_Fail_Count']:
|
||||
if attr in smart_health['attributes']:
|
||||
raw_value = smart_health['attributes'][attr]
|
||||
thresholds = BASE_SMART_THRESHOLDS[attr]
|
||||
|
||||
if raw_value >= thresholds['critical']:
|
||||
smart_health['severity'] = 'CRITICAL'
|
||||
smart_health['issues'].append(f"Critical {attr}: {raw_value}")
|
||||
elif raw_value >= thresholds['warning']:
|
||||
if smart_health['severity'] != 'CRITICAL':
|
||||
smart_health['severity'] = 'WARNING'
|
||||
smart_health['issues'].append(f"Warning {attr}: {raw_value}")
|
||||
|
||||
# Check for recent SMART errors
|
||||
error_log_pattern = r"Error \d+ occurred at disk power-on lifetime: (\d+) hours"
|
||||
|
||||
Reference in New Issue
Block a user