Parallelize SMART health checks across drives with ThreadPoolExecutor

Runs SMART checks concurrently (up to 8 workers) instead of sequentially, significantly reducing check time on multi-drive systems. Results are collected and processed in original disk order. Fixes: #22 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-10 13:13:50 -05:00
parent 7b36255fb4
commit b02e416117
1 changed files with 22 additions and 7 deletions
@@ -1,5 +1,6 @@
 #!/usr/bin/env python3
 import os, sys, json, requests, psutil, socket, subprocess, logging, argparse, urllib.request, re, glob, datetime, fcntl, textwrap, shutil
+from concurrent.futures import ThreadPoolExecutor, as_completed
 from typing import Dict, Any, List

 # =============================================================================
@@ -2790,6 +2791,20 @@ class SystemHealthMonitor:
                        device_partitions[base_dev] = []
                    device_partitions[base_dev].append(part)

+            # Run SMART checks in parallel across all drives
+            smart_results = {}
+            max_workers = min(8, len(physical_disks))
+            with ThreadPoolExecutor(max_workers=max_workers) as executor:
+                futures = {executor.submit(self._check_smart_health, disk): disk for disk in physical_disks}
+                for future in as_completed(futures):
+                    disk = futures[future]
+                    try:
+                        smart_results[disk] = future.result()
+                    except Exception as e:
+                        logger.error(f"SMART check failed for {disk}: {e}")
+                        smart_results[disk] = {'status': 'ERROR', 'issues': [str(e)], 'temp': None, 'attributes': {}}
+
+            # Build drive reports in original disk order
            overall_status = 'NORMAL'
            for disk in physical_disks:
                drive_report = {
@@ -2798,7 +2813,7 @@ class SystemHealthMonitor:
                    'smart_status': 'UNKNOWN',
                    'usage_percent': 0
                }
-                
+
                # Add partition information if available
                if disk in device_partitions:
                    total_used = 0
@@ -2820,20 +2835,20 @@ class SystemHealthMonitor:
                            drive_report['partitions'].append(part_info)
                        except Exception as e:
                            logger.debug(f"Error getting partition usage for {partition.device}: {e}")
-                    
+
                    # Calculate overall drive usage percentage
                    if total_space > 0:
                        drive_report['usage_percent'] = (total_used / total_space) * 100

-                # Check SMART health
-                smart_health = self._check_smart_health(disk)
+                # Use pre-fetched SMART results
+                smart_health = smart_results.get(disk, {'status': 'ERROR', 'issues': [], 'temp': None, 'attributes': {}})
                drive_report.update({
                    'smart_status': smart_health['status'],
                    'smart_issues': smart_health['issues'],
                    'temperature': smart_health['temp'],
                    'smart_attributes': smart_health['attributes']
                })
-                
+
                # Only report issues for drives that should be monitored
                if smart_health['status'] == 'UNHEALTHY':
                    overall_status = 'CRITICAL'
@@ -2843,9 +2858,9 @@ class SystemHealthMonitor:
                elif smart_health['issues'] and smart_health['status'] not in ['ERROR', 'NOT_SUPPORTED']:
                    if overall_status != 'CRITICAL':
                        overall_status = 'WARNING'
-                
+
                drives_health['drives'].append(drive_report)
-            
+
            drives_health['overall_status'] = overall_status
            
        except Exception as e: