Less partitions more disks
This commit is contained in:
@ -263,18 +263,12 @@ class SystemHealthMonitor:
|
|||||||
issues = []
|
issues = []
|
||||||
|
|
||||||
# Check for drive-related issues
|
# Check for drive-related issues
|
||||||
for partition in health_report.get('drives_health', {}).get('drives', []):
|
for drive in health_report.get('drives_health', {}).get('drives', []):
|
||||||
if partition.get('usage_status') == 'CRITICAL_HIGH_USAGE':
|
if drive.get('smart_issues'):
|
||||||
issues.append(
|
issues.append(f"Drive {drive['device']} has SMART issues: {', '.join(drive['smart_issues'])}")
|
||||||
f"Disk {partition['mountpoint']} is {partition['usage_percent']}% full"
|
if drive.get('temperature') and drive['temperature'] > self.CONFIG['THRESHOLDS']['TEMPERATURE_WARNING']:
|
||||||
)
|
issues.append(f"Drive {drive['device']} temperature is high: {drive['temperature']}°C")
|
||||||
elif partition.get('usage_status') == 'WARNING_HIGH_USAGE':
|
|
||||||
issues.append(
|
|
||||||
f"Disk {partition['mountpoint']} is {partition['usage_percent']}% full (Warning)"
|
|
||||||
)
|
|
||||||
if partition.get('smart_status') == 'UNHEALTHY':
|
|
||||||
issues.append(f"Disk {partition['mountpoint']} has an unhealthy SMART status")
|
|
||||||
|
|
||||||
# Check for ECC memory errors
|
# Check for ECC memory errors
|
||||||
memory_health = health_report.get('memory_health', {})
|
memory_health = health_report.get('memory_health', {})
|
||||||
if memory_health.get('has_ecc') and memory_health.get('ecc_errors'):
|
if memory_health.get('has_ecc') and memory_health.get('ecc_errors'):
|
||||||
@ -581,11 +575,10 @@ class SystemHealthMonitor:
|
|||||||
drives_health = {'overall_status': 'NORMAL', 'drives': []}
|
drives_health = {'overall_status': 'NORMAL', 'drives': []}
|
||||||
|
|
||||||
try:
|
try:
|
||||||
physical_disks = self._get_all_disks()
|
# Get physical disks only (exclude RBD devices)
|
||||||
logger.debug(f"Found physical disks: {physical_disks}")
|
physical_disks = [disk for disk in self._get_all_disks()
|
||||||
|
if disk.startswith(('/dev/sd', '/dev/nvme'))]
|
||||||
# Filter out RBD devices
|
logger.debug(f"Checking physical disks: {physical_disks}")
|
||||||
physical_disks = [disk for disk in physical_disks if not disk.startswith('/dev/rbd')]
|
|
||||||
|
|
||||||
overall_status = 'NORMAL'
|
overall_status = 'NORMAL'
|
||||||
for disk in physical_disks:
|
for disk in physical_disks:
|
||||||
@ -596,30 +589,11 @@ class SystemHealthMonitor:
|
|||||||
'usage_percent': 0,
|
'usage_percent': 0,
|
||||||
'total_space': '0B',
|
'total_space': '0B',
|
||||||
'used_space': '0B',
|
'used_space': '0B',
|
||||||
'free_space': '0B'
|
'free_space': '0B',
|
||||||
|
'smart_status': 'UNKNOWN'
|
||||||
}
|
}
|
||||||
|
|
||||||
# Check if disk is mounted
|
# Check SMART health first
|
||||||
try:
|
|
||||||
partitions = [p for p in psutil.disk_partitions()
|
|
||||||
if p.device.startswith(disk)]
|
|
||||||
if partitions:
|
|
||||||
partition = partitions[0] # Use first partition for stats
|
|
||||||
usage = psutil.disk_usage(partition.mountpoint)
|
|
||||||
drive_report.update({
|
|
||||||
'mountpoint': partition.mountpoint,
|
|
||||||
'total_space': self._convert_bytes(usage.total),
|
|
||||||
'used_space': self._convert_bytes(usage.used),
|
|
||||||
'free_space': self._convert_bytes(usage.free),
|
|
||||||
'usage_percent': usage.percent,
|
|
||||||
'usage_status': ('CRITICAL_HIGH_USAGE' if usage.percent > self.CONFIG['THRESHOLDS']['DISK_CRITICAL']
|
|
||||||
else 'WARNING_HIGH_USAGE' if usage.percent > self.CONFIG['THRESHOLDS']['DISK_WARNING']
|
|
||||||
else 'NORMAL')
|
|
||||||
})
|
|
||||||
except Exception as e:
|
|
||||||
logger.debug(f"Could not get usage stats for {disk}: {e}")
|
|
||||||
|
|
||||||
# Check SMART health
|
|
||||||
smart_health = self._check_smart_health(disk)
|
smart_health = self._check_smart_health(disk)
|
||||||
drive_report.update({
|
drive_report.update({
|
||||||
'smart_status': smart_health['status'],
|
'smart_status': smart_health['status'],
|
||||||
@ -628,12 +602,10 @@ class SystemHealthMonitor:
|
|||||||
'smart_attributes': smart_health['attributes']
|
'smart_attributes': smart_health['attributes']
|
||||||
})
|
})
|
||||||
|
|
||||||
# Update overall status
|
# Update overall status based on SMART health
|
||||||
if (smart_health['status'] == 'UNHEALTHY' or
|
if smart_health['status'] == 'UNHEALTHY':
|
||||||
drive_report['usage_status'] == 'CRITICAL_HIGH_USAGE'):
|
|
||||||
overall_status = 'CRITICAL'
|
overall_status = 'CRITICAL'
|
||||||
elif (drive_report['usage_status'] == 'WARNING_HIGH_USAGE' and
|
elif smart_health['issues'] and overall_status != 'CRITICAL':
|
||||||
overall_status != 'CRITICAL'):
|
|
||||||
overall_status = 'WARNING'
|
overall_status = 'WARNING'
|
||||||
|
|
||||||
drives_health['drives'].append(drive_report)
|
drives_health['drives'].append(drive_report)
|
||||||
|
|||||||
Reference in New Issue
Block a user