From 95a5a8227a7b5fe650109f4e7bbcf88b2a1973fb Mon Sep 17 00:00:00 2001
From: Jared Vititoe <jjvititoe1@gmail.com>
Date: Thu, 29 May 2025 12:44:55 -0400
Subject: [PATCH] NoneType fix?

---
 hwmonDaemon.py | 185 ++++++++++++++++++++++++++++++++++++-------------
 1 file changed, 136 insertions(+), 49 deletions(-)

diff --git a/hwmonDaemon.py b/hwmonDaemon.py
index 9898f19..6b4d4de 100644
--- a/hwmonDaemon.py
+++ b/hwmonDaemon.py
@@ -448,31 +448,53 @@ class SystemHealthMonitor:
             'serial': None,
             'capacity': None,
             'firmware': None,
-            'type': None  # SSD or HDD
+            'type': None,  # SSD or HDD
+            'smart_capable': False
         }
         
         try:
-            result = subprocess.run(
+            # First check if device supports SMART
+            capability_result = subprocess.run(
                 ['smartctl', '-i', device],
                 stdout=subprocess.PIPE,
                 stderr=subprocess.PIPE,
                 text=True
             )
             
-            for line in result.stdout.split('\n'):
-                if 'Device Model' in line:
+            # Check if smartctl failed completely
+            if capability_result.returncode not in [0, 4]:  # 0 = success, 4 = some SMART errors but readable
+                logger.debug(f"smartctl failed for {device}: return code {capability_result.returncode}")
+                return drive_details
+                
+            output = capability_result.stdout
+            
+            # Check if SMART is supported
+            if "SMART support is: Enabled" in output or "SMART support is: Available" in output:
+                drive_details['smart_capable'] = True
+            elif "SMART support is: Unavailable" in output or "does not support SMART" in output:
+                logger.debug(f"Device {device} does not support SMART")
+                return drive_details
+            
+            for line in output.split('\n'):
+                if 'Device Model' in line or 'Model Number' in line:
                     drive_details['model'] = line.split(':')[1].strip()
                 elif 'Serial Number' in line:
                     drive_details['serial'] = line.split(':')[1].strip()
                 elif 'User Capacity' in line:
-                    drive_details['capacity'] = line.split('[')[1].split(']')[0]
+                    # Extract capacity from brackets
+                    capacity_match = re.search(r'\[(.*?)\]', line)
+                    if capacity_match:
+                        drive_details['capacity'] = capacity_match.group(1)
                 elif 'Firmware Version' in line:
                     drive_details['firmware'] = line.split(':')[1].strip()
                 elif 'Rotation Rate' in line:
-                    drive_details['type'] = 'SSD' if 'Solid State Device' in line else 'HDD'
-                    
+                    if 'Solid State Device' in line:
+                        drive_details['type'] = 'SSD'
+                    else:
+                        drive_details['type'] = 'HDD'
+                        
         except Exception as e:
-            logger.debug(f"Error getting drive details: {e}")
+            logger.debug(f"Error getting drive details for {device}: {e}")
             
         return drive_details
 
@@ -801,8 +823,28 @@ class SystemHealthMonitor:
         
         # Check for drive-related issues
         for drive in health_report.get('drives_health', {}).get('drives', []):
-            if drive.get('smart_issues'):
-                issues.append(f"Drive {drive['device']} has SMART issues: {', '.join(drive['smart_issues'])}")
+            # Skip drives with ERROR or NOT_SUPPORTED status - these are likely virtual/unsupported devices
+            if drive.get('smart_status') in ['ERROR', 'NOT_SUPPORTED']:
+                logger.debug(f"Skipping issue detection for drive {drive['device']} with status {drive.get('smart_status')}")
+                continue
+                
+            # Only report issues for drives with valid SMART status
+            if drive.get('smart_issues') and drive.get('smart_status') in ['HEALTHY', 'UNHEALTHY', 'UNKNOWN']:
+                # Filter out generic error messages that don't indicate real hardware issues
+                filtered_issues = []
+                for issue in drive['smart_issues']:
+                    if not any(skip_phrase in issue for skip_phrase in [
+                        "Error checking SMART:",
+                        "Unable to read device information",
+                        "SMART not supported",
+                        "timed out"
+                    ]):
+                        filtered_issues.append(issue)
+                
+                if filtered_issues:
+                    issues.append(f"Drive {drive['device']} has SMART issues: {', '.join(filtered_issues)}")
+            
+            # Check temperature regardless of SMART status
             if drive.get('temperature') and drive['temperature'] > self.CONFIG['THRESHOLDS']['TEMPERATURE_WARNING']:
                 issues.append(f"Drive {drive['device']} temperature is high: {drive['temperature']}°C")
         
@@ -1086,10 +1128,10 @@ class SystemHealthMonitor:
 
     def _check_smart_health(self, device: str) -> Dict[str, Any]:
         """
-        Enhanced SMART health check with manufacturer-specific thresholds.
+        Enhanced SMART health check with better error handling.
         """
         smart_health = {
-            'status': 'HEALTHY',
+            'status': 'UNKNOWN',
             'severity': 'NORMAL',
             'issues': [],
             'temp': None,
@@ -1098,8 +1140,19 @@ class SystemHealthMonitor:
         }
 
         try:
-            # Get drive details first to determine manufacturer
+            # First verify the device is SMART-capable
             drive_details = self._get_drive_details(device)
+            if not drive_details.get('smart_capable', False):
+                smart_health['status'] = 'NOT_SUPPORTED'
+                smart_health['issues'].append("SMART not supported on this device")
+                return smart_health
+
+            # If we have no model info, the device might not be responding properly
+            if not drive_details.get('model'):
+                smart_health['status'] = 'ERROR'
+                smart_health['issues'].append("Unable to read device information")
+                return smart_health
+
             manufacturer_profile = self._get_manufacturer_profile(
                 drive_details.get('model', ''), 
                 drive_details.get('manufacturer', '')
@@ -1112,12 +1165,13 @@ class SystemHealthMonitor:
                 smart_health['severity'] = 'WARNING'
                 smart_health['issues'].extend(firmware_info['known_issues'])
 
-            # Get detailed SMART data
+            # Get detailed SMART data with timeout
             result = subprocess.run(
                 ['smartctl', '-A', '-H', '-l', 'error', '-l', 'background', device],
                 stdout=subprocess.PIPE,
                 stderr=subprocess.PIPE,
-                text=True
+                text=True,
+                timeout=30  # 30 second timeout
             )
             
             output = result.stdout
@@ -1127,6 +1181,10 @@ class SystemHealthMonitor:
                 smart_health['status'] = 'UNHEALTHY'
                 smart_health['severity'] = 'CRITICAL'
                 smart_health['issues'].append("SMART overall health check failed")
+            elif 'PASSED' in output:
+                smart_health['status'] = 'HEALTHY'
+            else:
+                smart_health['status'] = 'UNKNOWN'
 
             # Parse SMART attributes with manufacturer-specific handling
             power_on_hours = 0
@@ -1240,9 +1298,9 @@ class SystemHealthMonitor:
             if recent_errors:
                 smart_health['severity'] = 'WARNING'
                 smart_health['issues'].extend(recent_errors)
-        
+
             logger.debug(f"=== SMART Health Check for {device} ===")
-            logger.debug(f"Manufacturer profile: {manufacturer_profile.get('aliases', ['Unknown'])[0]}")
+            logger.debug(f"Manufacturer profile: {manufacturer_profile.get('aliases', ['Unknown'])[0] if manufacturer_profile else 'None'}")
             logger.debug("Raw SMART attributes:")
             for attr, value in smart_health['attributes'].items():
                 logger.debug(f"{attr}: {value}")
@@ -1253,38 +1311,58 @@ class SystemHealthMonitor:
 
             # Special handling for NVMe drives
             if 'nvme' in device:
-                nvme_result = subprocess.run(
-                    ['nvme', 'smart-log', device],
-                    stdout=subprocess.PIPE,
-                    stderr=subprocess.PIPE,
-                    text=True
-                )
-                logger.debug(f"NVMe smart-log raw output for {device}:")
-                logger.debug(nvme_result.stdout)
-                
-                # Add this line to initialize the temperature attribute
-                smart_health['attributes']['Temperature_Celsius'] = None
-                
-                for line in nvme_result.stdout.split('\n'):
-                    if 'temperature' in line.lower():
-                        temp_str = line.split(':')[1].strip()
-                        logger.debug(f"Raw temperature string: {temp_str}")
-                        
-                        # Extract first temperature value
-                        temp_value = int(''.join(c for c in temp_str if c.isdigit())[0:2])
-                        logger.debug(f"Parsed temperature value: {temp_value}")
-                        
-                        # Set both temperature fields
-                        smart_health['temp'] = temp_value
-                        smart_health['attributes']['Temperature_Celsius'] = temp_value
-                        
-                        logger.debug(f"Final temperature recorded: {smart_health['temp']}")
-                        break
+                try:
+                    nvme_result = subprocess.run(
+                        ['nvme', 'smart-log', device],
+                        stdout=subprocess.PIPE,
+                        stderr=subprocess.PIPE,
+                        text=True,
+                        timeout=10
+                    )
+                    logger.debug(f"NVMe smart-log raw output for {device}:")
+                    logger.debug(nvme_result.stdout)
+                    
+                    # Initialize the temperature attribute
+                    if smart_health['temp'] is None:
+                        smart_health['attributes']['Temperature_Celsius'] = None
+                    
+                    for line in nvme_result.stdout.split('\n'):
+                        # Fix the NoneType error by checking if line exists and has content
+                        if line and line.strip() and 'temperature' in line.lower():
+                            try:
+                                temp_str = line.split(':')[1].strip() if ':' in line else line.strip()
+                                logger.debug(f"Raw temperature string: {temp_str}")
+                                
+                                # Extract first temperature value more safely
+                                digits = ''.join(c for c in temp_str if c.isdigit())
+                                if len(digits) >= 2:
+                                    temp_value = int(digits[:2])
+                                    logger.debug(f"Parsed temperature value: {temp_value}")
+                                    
+                                    # Set both temperature fields
+                                    smart_health['temp'] = temp_value
+                                    smart_health['attributes']['Temperature_Celsius'] = temp_value
+                                    
+                                    logger.debug(f"Final temperature recorded: {smart_health['temp']}")
+                                    break
+                            except (ValueError, IndexError, AttributeError) as e:
+                                logger.debug(f"Error parsing NVMe temperature from line '{line}': {e}")
+                                continue
+                except subprocess.TimeoutExpired:
+                    logger.debug(f"NVMe smart-log for {device} timed out")
+                except Exception as e:
+                    logger.debug(f"Error getting NVMe smart data for {device}: {e}")
 
+        except subprocess.TimeoutExpired:
+            smart_health['status'] = 'ERROR'
+            smart_health['issues'].append("SMART check timed out")
         except Exception as e:
             smart_health['status'] = 'ERROR'
             smart_health['severity'] = 'UNKNOWN'
             smart_health['issues'].append(f"Error checking SMART: {str(e)}")
+            logger.debug(f"Exception in _check_smart_health for {device}: {e}")
+            import traceback
+            logger.debug(traceback.format_exc())
 
         return smart_health
 
@@ -1292,11 +1370,15 @@ class SystemHealthMonitor:
         drives_health = {'overall_status': 'NORMAL', 'drives': []}
         
         try:
-            # Get physical disks only
-            physical_disks = [disk for disk in self._get_all_disks() 
-                            if disk.startswith(('/dev/sd', '/dev/nvme'))]
+            # Get only valid physical disks
+            physical_disks = self._get_all_disks()
             logger.debug(f"Checking physical disks: {physical_disks}")
             
+            if not physical_disks:
+                logger.warning("No valid physical disks found for monitoring")
+                drives_health['overall_status'] = 'WARNING'
+                return drives_health
+            
             # Get ALL partition information including device mapper
             partitions = psutil.disk_partitions(all=True)
             
@@ -1355,10 +1437,15 @@ class SystemHealthMonitor:
                     'smart_attributes': smart_health['attributes']
                 })
                 
+                # Only report issues for drives that should be monitored
                 if smart_health['status'] == 'UNHEALTHY':
                     overall_status = 'CRITICAL'
-                elif smart_health['issues'] and overall_status != 'CRITICAL':
-                    overall_status = 'WARNING'
+                elif smart_health['status'] == 'ERROR':
+                    # Don't escalate overall status for ERROR drives (might be virtual)
+                    logger.debug(f"Drive {disk} returned ERROR status, skipping from issue detection")
+                elif smart_health['issues'] and smart_health['status'] not in ['ERROR', 'NOT_SUPPORTED']:
+                    if overall_status != 'CRITICAL':
+                        overall_status = 'WARNING'
                 
                 drives_health['drives'].append(drive_report)