Parallelize SMART health checks across drives with ThreadPoolExecutor
Runs SMART checks concurrently (up to 8 workers) instead of sequentially, significantly reducing check time on multi-drive systems. Results are collected and processed in original disk order. Fixes: #22 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -1,5 +1,6 @@
|
|||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
import os, sys, json, requests, psutil, socket, subprocess, logging, argparse, urllib.request, re, glob, datetime, fcntl, textwrap, shutil
|
import os, sys, json, requests, psutil, socket, subprocess, logging, argparse, urllib.request, re, glob, datetime, fcntl, textwrap, shutil
|
||||||
|
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||||
from typing import Dict, Any, List
|
from typing import Dict, Any, List
|
||||||
|
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
@@ -2790,6 +2791,20 @@ class SystemHealthMonitor:
|
|||||||
device_partitions[base_dev] = []
|
device_partitions[base_dev] = []
|
||||||
device_partitions[base_dev].append(part)
|
device_partitions[base_dev].append(part)
|
||||||
|
|
||||||
|
# Run SMART checks in parallel across all drives
|
||||||
|
smart_results = {}
|
||||||
|
max_workers = min(8, len(physical_disks))
|
||||||
|
with ThreadPoolExecutor(max_workers=max_workers) as executor:
|
||||||
|
futures = {executor.submit(self._check_smart_health, disk): disk for disk in physical_disks}
|
||||||
|
for future in as_completed(futures):
|
||||||
|
disk = futures[future]
|
||||||
|
try:
|
||||||
|
smart_results[disk] = future.result()
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"SMART check failed for {disk}: {e}")
|
||||||
|
smart_results[disk] = {'status': 'ERROR', 'issues': [str(e)], 'temp': None, 'attributes': {}}
|
||||||
|
|
||||||
|
# Build drive reports in original disk order
|
||||||
overall_status = 'NORMAL'
|
overall_status = 'NORMAL'
|
||||||
for disk in physical_disks:
|
for disk in physical_disks:
|
||||||
drive_report = {
|
drive_report = {
|
||||||
@@ -2798,7 +2813,7 @@ class SystemHealthMonitor:
|
|||||||
'smart_status': 'UNKNOWN',
|
'smart_status': 'UNKNOWN',
|
||||||
'usage_percent': 0
|
'usage_percent': 0
|
||||||
}
|
}
|
||||||
|
|
||||||
# Add partition information if available
|
# Add partition information if available
|
||||||
if disk in device_partitions:
|
if disk in device_partitions:
|
||||||
total_used = 0
|
total_used = 0
|
||||||
@@ -2820,20 +2835,20 @@ class SystemHealthMonitor:
|
|||||||
drive_report['partitions'].append(part_info)
|
drive_report['partitions'].append(part_info)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.debug(f"Error getting partition usage for {partition.device}: {e}")
|
logger.debug(f"Error getting partition usage for {partition.device}: {e}")
|
||||||
|
|
||||||
# Calculate overall drive usage percentage
|
# Calculate overall drive usage percentage
|
||||||
if total_space > 0:
|
if total_space > 0:
|
||||||
drive_report['usage_percent'] = (total_used / total_space) * 100
|
drive_report['usage_percent'] = (total_used / total_space) * 100
|
||||||
|
|
||||||
# Check SMART health
|
# Use pre-fetched SMART results
|
||||||
smart_health = self._check_smart_health(disk)
|
smart_health = smart_results.get(disk, {'status': 'ERROR', 'issues': [], 'temp': None, 'attributes': {}})
|
||||||
drive_report.update({
|
drive_report.update({
|
||||||
'smart_status': smart_health['status'],
|
'smart_status': smart_health['status'],
|
||||||
'smart_issues': smart_health['issues'],
|
'smart_issues': smart_health['issues'],
|
||||||
'temperature': smart_health['temp'],
|
'temperature': smart_health['temp'],
|
||||||
'smart_attributes': smart_health['attributes']
|
'smart_attributes': smart_health['attributes']
|
||||||
})
|
})
|
||||||
|
|
||||||
# Only report issues for drives that should be monitored
|
# Only report issues for drives that should be monitored
|
||||||
if smart_health['status'] == 'UNHEALTHY':
|
if smart_health['status'] == 'UNHEALTHY':
|
||||||
overall_status = 'CRITICAL'
|
overall_status = 'CRITICAL'
|
||||||
@@ -2843,9 +2858,9 @@ class SystemHealthMonitor:
|
|||||||
elif smart_health['issues'] and smart_health['status'] not in ['ERROR', 'NOT_SUPPORTED']:
|
elif smart_health['issues'] and smart_health['status'] not in ['ERROR', 'NOT_SUPPORTED']:
|
||||||
if overall_status != 'CRITICAL':
|
if overall_status != 'CRITICAL':
|
||||||
overall_status = 'WARNING'
|
overall_status = 'WARNING'
|
||||||
|
|
||||||
drives_health['drives'].append(drive_report)
|
drives_health['drives'].append(drive_report)
|
||||||
|
|
||||||
drives_health['overall_status'] = overall_status
|
drives_health['overall_status'] = overall_status
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
|||||||
Reference in New Issue
Block a user