Parallelize SMART health checks across drives with ThreadPoolExecutor

Runs SMART checks concurrently (up to 8 workers) instead of
sequentially, significantly reducing check time on multi-drive systems.
Results are collected and processed in original disk order.

Fixes: #22

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-02-10 13:13:50 -05:00
parent 7b36255fb4
commit b02e416117

View File

@@ -1,5 +1,6 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
import os, sys, json, requests, psutil, socket, subprocess, logging, argparse, urllib.request, re, glob, datetime, fcntl, textwrap, shutil import os, sys, json, requests, psutil, socket, subprocess, logging, argparse, urllib.request, re, glob, datetime, fcntl, textwrap, shutil
from concurrent.futures import ThreadPoolExecutor, as_completed
from typing import Dict, Any, List from typing import Dict, Any, List
# ============================================================================= # =============================================================================
@@ -2790,6 +2791,20 @@ class SystemHealthMonitor:
device_partitions[base_dev] = [] device_partitions[base_dev] = []
device_partitions[base_dev].append(part) device_partitions[base_dev].append(part)
# Run SMART checks in parallel across all drives
smart_results = {}
max_workers = min(8, len(physical_disks))
with ThreadPoolExecutor(max_workers=max_workers) as executor:
futures = {executor.submit(self._check_smart_health, disk): disk for disk in physical_disks}
for future in as_completed(futures):
disk = futures[future]
try:
smart_results[disk] = future.result()
except Exception as e:
logger.error(f"SMART check failed for {disk}: {e}")
smart_results[disk] = {'status': 'ERROR', 'issues': [str(e)], 'temp': None, 'attributes': {}}
# Build drive reports in original disk order
overall_status = 'NORMAL' overall_status = 'NORMAL'
for disk in physical_disks: for disk in physical_disks:
drive_report = { drive_report = {
@@ -2798,7 +2813,7 @@ class SystemHealthMonitor:
'smart_status': 'UNKNOWN', 'smart_status': 'UNKNOWN',
'usage_percent': 0 'usage_percent': 0
} }
# Add partition information if available # Add partition information if available
if disk in device_partitions: if disk in device_partitions:
total_used = 0 total_used = 0
@@ -2820,20 +2835,20 @@ class SystemHealthMonitor:
drive_report['partitions'].append(part_info) drive_report['partitions'].append(part_info)
except Exception as e: except Exception as e:
logger.debug(f"Error getting partition usage for {partition.device}: {e}") logger.debug(f"Error getting partition usage for {partition.device}: {e}")
# Calculate overall drive usage percentage # Calculate overall drive usage percentage
if total_space > 0: if total_space > 0:
drive_report['usage_percent'] = (total_used / total_space) * 100 drive_report['usage_percent'] = (total_used / total_space) * 100
# Check SMART health # Use pre-fetched SMART results
smart_health = self._check_smart_health(disk) smart_health = smart_results.get(disk, {'status': 'ERROR', 'issues': [], 'temp': None, 'attributes': {}})
drive_report.update({ drive_report.update({
'smart_status': smart_health['status'], 'smart_status': smart_health['status'],
'smart_issues': smart_health['issues'], 'smart_issues': smart_health['issues'],
'temperature': smart_health['temp'], 'temperature': smart_health['temp'],
'smart_attributes': smart_health['attributes'] 'smart_attributes': smart_health['attributes']
}) })
# Only report issues for drives that should be monitored # Only report issues for drives that should be monitored
if smart_health['status'] == 'UNHEALTHY': if smart_health['status'] == 'UNHEALTHY':
overall_status = 'CRITICAL' overall_status = 'CRITICAL'
@@ -2843,9 +2858,9 @@ class SystemHealthMonitor:
elif smart_health['issues'] and smart_health['status'] not in ['ERROR', 'NOT_SUPPORTED']: elif smart_health['issues'] and smart_health['status'] not in ['ERROR', 'NOT_SUPPORTED']:
if overall_status != 'CRITICAL': if overall_status != 'CRITICAL':
overall_status = 'WARNING' overall_status = 'WARNING'
drives_health['drives'].append(drive_report) drives_health['drives'].append(drive_report)
drives_health['overall_status'] = overall_status drives_health['overall_status'] = overall_status
except Exception as e: except Exception as e: