diff --git a/hwmonDaemon.py b/hwmonDaemon.py index f8c3fee..54f5fbe 100644 --- a/hwmonDaemon.py +++ b/hwmonDaemon.py @@ -144,6 +144,15 @@ class SystemHealthMonitor: 'monitor': False, # Skip monitoring entirely 'description': 'Operation counter, not actual failures - IGNORED' }, + # ADD THIS: Regular Erase_Fail_Count is also an operation counter for Ridata + 'Erase_Fail_Count': { + 'monitor': False, # Skip monitoring entirely for Ridata + 'description': 'Operation counter for Ridata drives, not actual failures - IGNORED' + }, + 'Program_Fail_Count': { + 'monitor': False, # Skip monitoring entirely for Ridata + 'description': 'Operation counter for Ridata drives, not actual failures - IGNORED' + }, # These are the REAL failure counters - monitor with standard thresholds 'Program_Fail_Cnt_Total': { 'monitor': True, @@ -578,11 +587,11 @@ class SystemHealthMonitor: if temperature is None: return issues - # Drive-type specific temperature thresholds + # Drive-type specific temperature thresholds - ADJUSTED TO BE LESS SENSITIVE if drive_type == 'SSD': - temp_thresholds = {'warning': 70, 'critical': 85, 'optimal_max': 60} + temp_thresholds = {'warning': 70, 'critical': 85, 'optimal_max': 65} # Raised from 60 else: # HDD - temp_thresholds = {'warning': 55, 'critical': 65, 'optimal_max': 45} + temp_thresholds = {'warning': 60, 'critical': 70, 'optimal_max': 55} # Raised from 45/55/65 if temperature >= temp_thresholds['critical']: issues.append(f"CRITICAL: Drive temperature {temperature}°C exceeds safe operating limit for {drive_type}") @@ -1519,7 +1528,7 @@ class SystemHealthMonitor: 'behavior': attr_config.get('behavior', 'countup') } - # Enhanced BASE_SMART_THRESHOLDS with additional attributes + # Enhanced BASE_SMART_THRESHOLDS with manufacturer-specific handling BASE_SMART_THRESHOLDS = { 'Reallocated_Sector_Ct': {'warning': 5, 'critical': 10}, 'Current_Pending_Sector': {'warning': 1, 'critical': 5}, @@ -1536,10 +1545,10 @@ class SystemHealthMonitor: 'SSD_Life_Left': {'warning': 30, 'critical': 10}, 'Program_Fail_Cnt_Total': {'warning': 1, 'critical': 5}, 'Erase_Fail_Count_Total': {'warning': 1, 'critical': 5}, - # Enhanced SMART attributes for better failure detection - 'Raw_Read_Error_Rate': {'warning': 100000, 'critical': 1000000}, - 'Seek_Error_Rate': {'warning': 100000, 'critical': 1000000}, - 'Command_Timeout': {'warning': 1, 'critical': 5}, + # ADJUSTED: More lenient thresholds for error rates on unknown drives + 'Raw_Read_Error_Rate': {'warning': 10000000, 'critical': 100000000}, # Raised significantly + 'Seek_Error_Rate': {'warning': 10000000, 'critical': 100000000}, # Raised significantly + 'Command_Timeout': {'warning': 100, 'critical': 1000}, # Raised significantly 'High_Fly_Writes': {'warning': 1, 'critical': 5}, 'Airflow_Temperature_Cel': {'warning': 65, 'critical': 75}, 'G_Sense_Error_Rate': {'warning': 100, 'critical': 1000}, @@ -1658,11 +1667,16 @@ class SystemHealthMonitor: parts = line.split() if len(parts) >= 10: raw_value = self._parse_smart_value(parts[9]) - smart_attributes_raw[attr] = raw_value + smart_attributes_raw[f'{attr}_Total'] = raw_value # Store as _Total logger.debug(f"Found {attr}_Total: {raw_value}") break - # Only use non-_Total version if _Total not found + # Only use non-_Total version if _Total not found AND not Ridata elif attr in line and f'{attr}_Total' not in smart_attributes_raw: + # Check if this is a Ridata drive and should skip regular counters + if manufacturer_profile and manufacturer_profile.get('aliases', [{}])[0] == 'Ridata': + logger.debug(f"Skipping {attr} for Ridata drive - using _Total version only") + continue + parts = line.split() if len(parts) >= 10: raw_value = self._parse_smart_value(parts[9])