Fixed thesholds for thermals and smart
This commit is contained in:
@ -144,6 +144,15 @@ class SystemHealthMonitor:
|
||||
'monitor': False, # Skip monitoring entirely
|
||||
'description': 'Operation counter, not actual failures - IGNORED'
|
||||
},
|
||||
# ADD THIS: Regular Erase_Fail_Count is also an operation counter for Ridata
|
||||
'Erase_Fail_Count': {
|
||||
'monitor': False, # Skip monitoring entirely for Ridata
|
||||
'description': 'Operation counter for Ridata drives, not actual failures - IGNORED'
|
||||
},
|
||||
'Program_Fail_Count': {
|
||||
'monitor': False, # Skip monitoring entirely for Ridata
|
||||
'description': 'Operation counter for Ridata drives, not actual failures - IGNORED'
|
||||
},
|
||||
# These are the REAL failure counters - monitor with standard thresholds
|
||||
'Program_Fail_Cnt_Total': {
|
||||
'monitor': True,
|
||||
@ -578,11 +587,11 @@ class SystemHealthMonitor:
|
||||
if temperature is None:
|
||||
return issues
|
||||
|
||||
# Drive-type specific temperature thresholds
|
||||
# Drive-type specific temperature thresholds - ADJUSTED TO BE LESS SENSITIVE
|
||||
if drive_type == 'SSD':
|
||||
temp_thresholds = {'warning': 70, 'critical': 85, 'optimal_max': 60}
|
||||
temp_thresholds = {'warning': 70, 'critical': 85, 'optimal_max': 65} # Raised from 60
|
||||
else: # HDD
|
||||
temp_thresholds = {'warning': 55, 'critical': 65, 'optimal_max': 45}
|
||||
temp_thresholds = {'warning': 60, 'critical': 70, 'optimal_max': 55} # Raised from 45/55/65
|
||||
|
||||
if temperature >= temp_thresholds['critical']:
|
||||
issues.append(f"CRITICAL: Drive temperature {temperature}°C exceeds safe operating limit for {drive_type}")
|
||||
@ -1519,7 +1528,7 @@ class SystemHealthMonitor:
|
||||
'behavior': attr_config.get('behavior', 'countup')
|
||||
}
|
||||
|
||||
# Enhanced BASE_SMART_THRESHOLDS with additional attributes
|
||||
# Enhanced BASE_SMART_THRESHOLDS with manufacturer-specific handling
|
||||
BASE_SMART_THRESHOLDS = {
|
||||
'Reallocated_Sector_Ct': {'warning': 5, 'critical': 10},
|
||||
'Current_Pending_Sector': {'warning': 1, 'critical': 5},
|
||||
@ -1536,10 +1545,10 @@ class SystemHealthMonitor:
|
||||
'SSD_Life_Left': {'warning': 30, 'critical': 10},
|
||||
'Program_Fail_Cnt_Total': {'warning': 1, 'critical': 5},
|
||||
'Erase_Fail_Count_Total': {'warning': 1, 'critical': 5},
|
||||
# Enhanced SMART attributes for better failure detection
|
||||
'Raw_Read_Error_Rate': {'warning': 100000, 'critical': 1000000},
|
||||
'Seek_Error_Rate': {'warning': 100000, 'critical': 1000000},
|
||||
'Command_Timeout': {'warning': 1, 'critical': 5},
|
||||
# ADJUSTED: More lenient thresholds for error rates on unknown drives
|
||||
'Raw_Read_Error_Rate': {'warning': 10000000, 'critical': 100000000}, # Raised significantly
|
||||
'Seek_Error_Rate': {'warning': 10000000, 'critical': 100000000}, # Raised significantly
|
||||
'Command_Timeout': {'warning': 100, 'critical': 1000}, # Raised significantly
|
||||
'High_Fly_Writes': {'warning': 1, 'critical': 5},
|
||||
'Airflow_Temperature_Cel': {'warning': 65, 'critical': 75},
|
||||
'G_Sense_Error_Rate': {'warning': 100, 'critical': 1000},
|
||||
@ -1658,11 +1667,16 @@ class SystemHealthMonitor:
|
||||
parts = line.split()
|
||||
if len(parts) >= 10:
|
||||
raw_value = self._parse_smart_value(parts[9])
|
||||
smart_attributes_raw[attr] = raw_value
|
||||
smart_attributes_raw[f'{attr}_Total'] = raw_value # Store as _Total
|
||||
logger.debug(f"Found {attr}_Total: {raw_value}")
|
||||
break
|
||||
# Only use non-_Total version if _Total not found
|
||||
# Only use non-_Total version if _Total not found AND not Ridata
|
||||
elif attr in line and f'{attr}_Total' not in smart_attributes_raw:
|
||||
# Check if this is a Ridata drive and should skip regular counters
|
||||
if manufacturer_profile and manufacturer_profile.get('aliases', [{}])[0] == 'Ridata':
|
||||
logger.debug(f"Skipping {attr} for Ridata drive - using _Total version only")
|
||||
continue
|
||||
|
||||
parts = line.split()
|
||||
if len(parts) >= 10:
|
||||
raw_value = self._parse_smart_value(parts[9])
|
||||
|
||||
Reference in New Issue
Block a user