Parallelize SMART health checks across drives with ThreadPoolExecutor

Runs SMART checks concurrently (up to 8 workers) instead of
sequentially, significantly reducing check time on multi-drive systems.
Results are collected and processed in original disk order.

Fixes: #22

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-02-10 13:13:50 -05:00
parent 7b36255fb4
commit b02e416117

View File

@@ -1,5 +1,6 @@
#!/usr/bin/env python3
import os, sys, json, requests, psutil, socket, subprocess, logging, argparse, urllib.request, re, glob, datetime, fcntl, textwrap, shutil
from concurrent.futures import ThreadPoolExecutor, as_completed
from typing import Dict, Any, List
# =============================================================================
@@ -2790,6 +2791,20 @@ class SystemHealthMonitor:
device_partitions[base_dev] = []
device_partitions[base_dev].append(part)
# Run SMART checks in parallel across all drives
smart_results = {}
max_workers = min(8, len(physical_disks))
with ThreadPoolExecutor(max_workers=max_workers) as executor:
futures = {executor.submit(self._check_smart_health, disk): disk for disk in physical_disks}
for future in as_completed(futures):
disk = futures[future]
try:
smart_results[disk] = future.result()
except Exception as e:
logger.error(f"SMART check failed for {disk}: {e}")
smart_results[disk] = {'status': 'ERROR', 'issues': [str(e)], 'temp': None, 'attributes': {}}
# Build drive reports in original disk order
overall_status = 'NORMAL'
for disk in physical_disks:
drive_report = {
@@ -2798,7 +2813,7 @@ class SystemHealthMonitor:
'smart_status': 'UNKNOWN',
'usage_percent': 0
}
# Add partition information if available
if disk in device_partitions:
total_used = 0
@@ -2820,20 +2835,20 @@ class SystemHealthMonitor:
drive_report['partitions'].append(part_info)
except Exception as e:
logger.debug(f"Error getting partition usage for {partition.device}: {e}")
# Calculate overall drive usage percentage
if total_space > 0:
drive_report['usage_percent'] = (total_used / total_space) * 100
# Check SMART health
smart_health = self._check_smart_health(disk)
# Use pre-fetched SMART results
smart_health = smart_results.get(disk, {'status': 'ERROR', 'issues': [], 'temp': None, 'attributes': {}})
drive_report.update({
'smart_status': smart_health['status'],
'smart_issues': smart_health['issues'],
'temperature': smart_health['temp'],
'smart_attributes': smart_health['attributes']
})
# Only report issues for drives that should be monitored
if smart_health['status'] == 'UNHEALTHY':
overall_status = 'CRITICAL'
@@ -2843,9 +2858,9 @@ class SystemHealthMonitor:
elif smart_health['issues'] and smart_health['status'] not in ['ERROR', 'NOT_SUPPORTED']:
if overall_status != 'CRITICAL':
overall_status = 'WARNING'
drives_health['drives'].append(drive_report)
drives_health['overall_status'] = overall_status
except Exception as e: