Erase_Fail_Count matched two values
This commit is contained in:
@ -109,7 +109,7 @@ class SystemHealthMonitor:
|
|||||||
}
|
}
|
||||||
MANUFACTURER_SMART_PROFILES = {
|
MANUFACTURER_SMART_PROFILES = {
|
||||||
'Ridata': {
|
'Ridata': {
|
||||||
'aliases': ['Ridata', 'Ritek', 'RIDATA', 'RITEK', 'SSD 512GB'], # Keep the generic model
|
'aliases': ['Ridata', 'Ritek', 'RIDATA', 'RITEK', 'SSD 512GB'],
|
||||||
'firmware_patterns': ['HT3618B7', 'HT36'], # Add exact firmware match first
|
'firmware_patterns': ['HT3618B7', 'HT36'], # Add exact firmware match first
|
||||||
'wear_leveling_behavior': 'countup',
|
'wear_leveling_behavior': 'countup',
|
||||||
'wear_leveling_baseline': 0,
|
'wear_leveling_baseline': 0,
|
||||||
@ -1226,13 +1226,36 @@ class SystemHealthMonitor:
|
|||||||
# Parse SMART attributes with manufacturer-specific handling
|
# Parse SMART attributes with manufacturer-specific handling
|
||||||
power_on_hours = 0
|
power_on_hours = 0
|
||||||
|
|
||||||
|
# First pass: collect all SMART attributes with priority for _Total versions
|
||||||
|
smart_attributes_raw = {}
|
||||||
|
|
||||||
for line in output.split('\n'):
|
for line in output.split('\n'):
|
||||||
# Extract Power_On_Hours first to determine if drive is new
|
# Extract Power_On_Hours first to determine if drive is new
|
||||||
if 'Power_On_Hours' in line:
|
if 'Power_On_Hours' in line:
|
||||||
parts = line.split()
|
parts = line.split()
|
||||||
if len(parts) >= 10:
|
if len(parts) >= 10:
|
||||||
power_on_hours = self._parse_smart_value(parts[9])
|
power_on_hours = self._parse_smart_value(parts[9])
|
||||||
smart_health['attributes']['Power_On_Hours'] = power_on_hours
|
smart_attributes_raw['Power_On_Hours'] = power_on_hours
|
||||||
|
|
||||||
|
# Handle SMART attributes with preference for _Total versions
|
||||||
|
for attr in ['Erase_Fail_Count', 'Program_Fail_Count']:
|
||||||
|
# Check for _Total version first (more accurate)
|
||||||
|
if f'{attr}_Total' in line:
|
||||||
|
parts = line.split()
|
||||||
|
if len(parts) >= 10:
|
||||||
|
raw_value = self._parse_smart_value(parts[9])
|
||||||
|
smart_attributes_raw[attr] = raw_value
|
||||||
|
logger.debug(f"Found {attr}_Total: {raw_value}")
|
||||||
|
break
|
||||||
|
# Only use non-_Total version if _Total not found
|
||||||
|
elif attr in line and f'{attr}_Total' not in smart_attributes_raw:
|
||||||
|
parts = line.split()
|
||||||
|
if len(parts) >= 10:
|
||||||
|
raw_value = self._parse_smart_value(parts[9])
|
||||||
|
smart_attributes_raw[attr] = raw_value
|
||||||
|
logger.debug(f"Found {attr} (non-Total): {raw_value}")
|
||||||
|
|
||||||
|
smart_health['attributes'] = smart_attributes_raw
|
||||||
|
|
||||||
# Check if this is a new drive
|
# Check if this is a new drive
|
||||||
is_new_drive = self._is_new_drive(power_on_hours)
|
is_new_drive = self._is_new_drive(power_on_hours)
|
||||||
@ -1255,7 +1278,7 @@ class SystemHealthMonitor:
|
|||||||
'SSD_Life_Left': {'warning': 30, 'critical': 10}
|
'SSD_Life_Left': {'warning': 30, 'critical': 10}
|
||||||
}
|
}
|
||||||
|
|
||||||
# Parse all SMART attributes
|
# Parse remaining SMART attributes
|
||||||
for line in output.split('\n'):
|
for line in output.split('\n'):
|
||||||
# Handle manufacturer-specific Wear_Leveling_Count
|
# Handle manufacturer-specific Wear_Leveling_Count
|
||||||
if 'Wear_Leveling_Count' in line:
|
if 'Wear_Leveling_Count' in line:
|
||||||
@ -1295,9 +1318,9 @@ class SystemHealthMonitor:
|
|||||||
smart_health['severity'] = 'WARNING'
|
smart_health['severity'] = 'WARNING'
|
||||||
smart_health['issues'].append(f"Low wear leveling remaining: {raw_value}")
|
smart_health['issues'].append(f"Low wear leveling remaining: {raw_value}")
|
||||||
|
|
||||||
# Handle all other standard SMART attributes
|
# Handle all other standard SMART attributes (except those already processed)
|
||||||
for attr, thresholds in BASE_SMART_THRESHOLDS.items():
|
for attr, thresholds in BASE_SMART_THRESHOLDS.items():
|
||||||
if attr in line and attr != 'Wear_Leveling_Count': # Skip wear leveling as it's handled above
|
if attr in line and attr not in ['Wear_Leveling_Count', 'Erase_Fail_Count', 'Program_Fail_Count']:
|
||||||
parts = line.split()
|
parts = line.split()
|
||||||
if len(parts) >= 10:
|
if len(parts) >= 10:
|
||||||
raw_value = self._parse_smart_value(parts[9])
|
raw_value = self._parse_smart_value(parts[9])
|
||||||
@ -1313,15 +1336,28 @@ class SystemHealthMonitor:
|
|||||||
smart_health['severity'] = 'WARNING'
|
smart_health['severity'] = 'WARNING'
|
||||||
smart_health['issues'].append(f"High temperature: {raw_value}°C")
|
smart_health['issues'].append(f"High temperature: {raw_value}°C")
|
||||||
else:
|
else:
|
||||||
# Fix: Only trigger alerts if the raw value actually exceeds thresholds
|
# Only trigger alerts if the raw value actually exceeds thresholds
|
||||||
if raw_value > 0: # Only check non-zero values
|
if raw_value >= thresholds['critical']:
|
||||||
if raw_value >= thresholds['critical']:
|
smart_health['severity'] = 'CRITICAL'
|
||||||
smart_health['severity'] = 'CRITICAL'
|
smart_health['issues'].append(f"Critical {attr}: {raw_value}")
|
||||||
smart_health['issues'].append(f"Critical {attr}: {raw_value}")
|
elif raw_value >= thresholds['warning']:
|
||||||
elif raw_value >= thresholds['warning']:
|
if smart_health['severity'] != 'CRITICAL':
|
||||||
if smart_health['severity'] != 'CRITICAL':
|
smart_health['severity'] = 'WARNING'
|
||||||
smart_health['severity'] = 'WARNING'
|
smart_health['issues'].append(f"Warning {attr}: {raw_value}")
|
||||||
smart_health['issues'].append(f"Warning {attr}: {raw_value}")
|
|
||||||
|
# Now check the collected Erase_Fail_Count and Program_Fail_Count
|
||||||
|
for attr in ['Erase_Fail_Count', 'Program_Fail_Count']:
|
||||||
|
if attr in smart_health['attributes']:
|
||||||
|
raw_value = smart_health['attributes'][attr]
|
||||||
|
thresholds = BASE_SMART_THRESHOLDS[attr]
|
||||||
|
|
||||||
|
if raw_value >= thresholds['critical']:
|
||||||
|
smart_health['severity'] = 'CRITICAL'
|
||||||
|
smart_health['issues'].append(f"Critical {attr}: {raw_value}")
|
||||||
|
elif raw_value >= thresholds['warning']:
|
||||||
|
if smart_health['severity'] != 'CRITICAL':
|
||||||
|
smart_health['severity'] = 'WARNING'
|
||||||
|
smart_health['issues'].append(f"Warning {attr}: {raw_value}")
|
||||||
|
|
||||||
# Check for recent SMART errors
|
# Check for recent SMART errors
|
||||||
error_log_pattern = r"Error \d+ occurred at disk power-on lifetime: (\d+) hours"
|
error_log_pattern = r"Error \d+ occurred at disk power-on lifetime: (\d+) hours"
|
||||||
|
|||||||
Reference in New Issue
Block a user