From 05072031402f55c33613f231041a0e035f6cfb13 Mon Sep 17 00:00:00 2001
From: Jared Vititoe <jjvititoe1@gmail.com>
Date: Mon, 3 Mar 2025 17:57:07 -0500
Subject: [PATCH] Less partitions more disks

---
 hwmonDaemon.py | 60 ++++++++++++++------------------------------------
 1 file changed, 16 insertions(+), 44 deletions(-)

diff --git a/hwmonDaemon.py b/hwmonDaemon.py
index a3a3a01..aeda846 100644
--- a/hwmonDaemon.py
+++ b/hwmonDaemon.py
@@ -263,18 +263,12 @@ class SystemHealthMonitor:
         issues = []
         
         # Check for drive-related issues
-        for partition in health_report.get('drives_health', {}).get('drives', []):
-            if partition.get('usage_status') == 'CRITICAL_HIGH_USAGE':
-                issues.append(
-                    f"Disk {partition['mountpoint']} is {partition['usage_percent']}% full"
-                )
-            elif partition.get('usage_status') == 'WARNING_HIGH_USAGE':
-                issues.append(
-                    f"Disk {partition['mountpoint']} is {partition['usage_percent']}% full (Warning)"
-                )
-            if partition.get('smart_status') == 'UNHEALTHY':
-                issues.append(f"Disk {partition['mountpoint']} has an unhealthy SMART status")
-
+        for drive in health_report.get('drives_health', {}).get('drives', []):
+            if drive.get('smart_issues'):
+                issues.append(f"Drive {drive['device']} has SMART issues: {', '.join(drive['smart_issues'])}")
+            if drive.get('temperature') and drive['temperature'] > self.CONFIG['THRESHOLDS']['TEMPERATURE_WARNING']:
+                issues.append(f"Drive {drive['device']} temperature is high: {drive['temperature']}°C")
+        
         # Check for ECC memory errors
         memory_health = health_report.get('memory_health', {})
         if memory_health.get('has_ecc') and memory_health.get('ecc_errors'):
@@ -581,11 +575,10 @@ class SystemHealthMonitor:
         drives_health = {'overall_status': 'NORMAL', 'drives': []}
         
         try:
-            physical_disks = self._get_all_disks()
-            logger.debug(f"Found physical disks: {physical_disks}")
-            
-            # Filter out RBD devices
-            physical_disks = [disk for disk in physical_disks if not disk.startswith('/dev/rbd')]
+            # Get physical disks only (exclude RBD devices)
+            physical_disks = [disk for disk in self._get_all_disks() 
+                            if disk.startswith(('/dev/sd', '/dev/nvme'))]
+            logger.debug(f"Checking physical disks: {physical_disks}")
             
             overall_status = 'NORMAL'
             for disk in physical_disks:
@@ -596,30 +589,11 @@ class SystemHealthMonitor:
                     'usage_percent': 0,
                     'total_space': '0B',
                     'used_space': '0B',
-                    'free_space': '0B'
+                    'free_space': '0B',
+                    'smart_status': 'UNKNOWN'
                 }
                 
-                # Check if disk is mounted
-                try:
-                    partitions = [p for p in psutil.disk_partitions() 
-                                if p.device.startswith(disk)]
-                    if partitions:
-                        partition = partitions[0]  # Use first partition for stats
-                        usage = psutil.disk_usage(partition.mountpoint)
-                        drive_report.update({
-                            'mountpoint': partition.mountpoint,
-                            'total_space': self._convert_bytes(usage.total),
-                            'used_space': self._convert_bytes(usage.used),
-                            'free_space': self._convert_bytes(usage.free),
-                            'usage_percent': usage.percent,
-                            'usage_status': ('CRITICAL_HIGH_USAGE' if usage.percent > self.CONFIG['THRESHOLDS']['DISK_CRITICAL']
-                                        else 'WARNING_HIGH_USAGE' if usage.percent > self.CONFIG['THRESHOLDS']['DISK_WARNING']
-                                        else 'NORMAL')
-                        })
-                except Exception as e:
-                    logger.debug(f"Could not get usage stats for {disk}: {e}")
-                
-                # Check SMART health
+                # Check SMART health first
                 smart_health = self._check_smart_health(disk)
                 drive_report.update({
                     'smart_status': smart_health['status'],
@@ -628,12 +602,10 @@ class SystemHealthMonitor:
                     'smart_attributes': smart_health['attributes']
                 })
                 
-                # Update overall status
-                if (smart_health['status'] == 'UNHEALTHY' or 
-                    drive_report['usage_status'] == 'CRITICAL_HIGH_USAGE'):
+                # Update overall status based on SMART health
+                if smart_health['status'] == 'UNHEALTHY':
                     overall_status = 'CRITICAL'
-                elif (drive_report['usage_status'] == 'WARNING_HIGH_USAGE' and 
-                    overall_status != 'CRITICAL'):
+                elif smart_health['issues'] and overall_status != 'CRITICAL':
                     overall_status = 'WARNING'
                 
                 drives_health['drives'].append(drive_report)