Less partitions more disks

This commit is contained in:
2025-03-03 17:57:07 -05:00
parent 79509642c6
commit 0507203140

View File

@ -263,18 +263,12 @@ class SystemHealthMonitor:
issues = []
# Check for drive-related issues
for partition in health_report.get('drives_health', {}).get('drives', []):
if partition.get('usage_status') == 'CRITICAL_HIGH_USAGE':
issues.append(
f"Disk {partition['mountpoint']} is {partition['usage_percent']}% full"
)
elif partition.get('usage_status') == 'WARNING_HIGH_USAGE':
issues.append(
f"Disk {partition['mountpoint']} is {partition['usage_percent']}% full (Warning)"
)
if partition.get('smart_status') == 'UNHEALTHY':
issues.append(f"Disk {partition['mountpoint']} has an unhealthy SMART status")
for drive in health_report.get('drives_health', {}).get('drives', []):
if drive.get('smart_issues'):
issues.append(f"Drive {drive['device']} has SMART issues: {', '.join(drive['smart_issues'])}")
if drive.get('temperature') and drive['temperature'] > self.CONFIG['THRESHOLDS']['TEMPERATURE_WARNING']:
issues.append(f"Drive {drive['device']} temperature is high: {drive['temperature']}°C")
# Check for ECC memory errors
memory_health = health_report.get('memory_health', {})
if memory_health.get('has_ecc') and memory_health.get('ecc_errors'):
@ -581,11 +575,10 @@ class SystemHealthMonitor:
drives_health = {'overall_status': 'NORMAL', 'drives': []}
try:
physical_disks = self._get_all_disks()
logger.debug(f"Found physical disks: {physical_disks}")
# Filter out RBD devices
physical_disks = [disk for disk in physical_disks if not disk.startswith('/dev/rbd')]
# Get physical disks only (exclude RBD devices)
physical_disks = [disk for disk in self._get_all_disks()
if disk.startswith(('/dev/sd', '/dev/nvme'))]
logger.debug(f"Checking physical disks: {physical_disks}")
overall_status = 'NORMAL'
for disk in physical_disks:
@ -596,30 +589,11 @@ class SystemHealthMonitor:
'usage_percent': 0,
'total_space': '0B',
'used_space': '0B',
'free_space': '0B'
'free_space': '0B',
'smart_status': 'UNKNOWN'
}
# Check if disk is mounted
try:
partitions = [p for p in psutil.disk_partitions()
if p.device.startswith(disk)]
if partitions:
partition = partitions[0] # Use first partition for stats
usage = psutil.disk_usage(partition.mountpoint)
drive_report.update({
'mountpoint': partition.mountpoint,
'total_space': self._convert_bytes(usage.total),
'used_space': self._convert_bytes(usage.used),
'free_space': self._convert_bytes(usage.free),
'usage_percent': usage.percent,
'usage_status': ('CRITICAL_HIGH_USAGE' if usage.percent > self.CONFIG['THRESHOLDS']['DISK_CRITICAL']
else 'WARNING_HIGH_USAGE' if usage.percent > self.CONFIG['THRESHOLDS']['DISK_WARNING']
else 'NORMAL')
})
except Exception as e:
logger.debug(f"Could not get usage stats for {disk}: {e}")
# Check SMART health
# Check SMART health first
smart_health = self._check_smart_health(disk)
drive_report.update({
'smart_status': smart_health['status'],
@ -628,12 +602,10 @@ class SystemHealthMonitor:
'smart_attributes': smart_health['attributes']
})
# Update overall status
if (smart_health['status'] == 'UNHEALTHY' or
drive_report['usage_status'] == 'CRITICAL_HIGH_USAGE'):
# Update overall status based on SMART health
if smart_health['status'] == 'UNHEALTHY':
overall_status = 'CRITICAL'
elif (drive_report['usage_status'] == 'WARNING_HIGH_USAGE' and
overall_status != 'CRITICAL'):
elif smart_health['issues'] and overall_status != 'CRITICAL':
overall_status = 'WARNING'
drives_health['drives'].append(drive_report)