Remove RBD from smart check

This commit is contained in:
2025-03-03 17:45:46 -05:00
parent 7856f9e9bd
commit 79509642c6

View File

@ -577,57 +577,50 @@ class SystemHealthMonitor:
def _check_drives_health(self) -> Dict[str, Any]:
"""
Check overall health of physical SATA and NVMe drives including disk usage and SMART status.
:return: Combined health report of all drives and their status.
"""
drives_health = {'overall_status': 'NORMAL', 'drives': []}
physical_disks = self._get_all_disks()
logger.debug(f"Found physical disks: {physical_disks}")
try:
all_partitions = psutil.disk_partitions()
logger.debug(f"All disk partitions found: {[p.device for p in all_partitions]}")
physical_disks = self._get_all_disks()
logger.debug(f"Found physical disks: {physical_disks}")
# Log each partition evaluation
partitions = []
for p in all_partitions:
is_physical = self._is_physical_disk(p.device)
logger.debug(f"Evaluating partition: {p.device}")
logger.debug(f" Mountpoint: {p.mountpoint}")
logger.debug(f" FStype: {p.fstype}")
logger.debug(f" Opts: {p.opts}")
logger.debug(f" Is Physical: {is_physical}")
if is_physical:
partitions.append(p)
# Filter out RBD devices
physical_disks = [disk for disk in physical_disks if not disk.startswith('/dev/rbd')]
logger.debug(f"Final physical partitions selected: {[p.device for p in partitions]}")
overall_status = 'NORMAL'
for partition in partitions:
for disk in physical_disks:
drive_report = {
'device': partition.device,
'mountpoint': partition.mountpoint
'device': disk,
'mountpoint': None,
'usage_status': 'UNMOUNTED',
'usage_percent': 0,
'total_space': '0B',
'used_space': '0B',
'free_space': '0B'
}
# Check disk usage
usage = psutil.disk_usage(partition.mountpoint)
disk_usage_status = 'NORMAL'
if usage.percent > self.CONFIG['THRESHOLDS']['DISK_CRITICAL']:
disk_usage_status = 'CRITICAL_HIGH_USAGE'
elif usage.percent > self.CONFIG['THRESHOLDS']['DISK_WARNING']:
disk_usage_status = 'WARNING_HIGH_USAGE'
drive_report.update({
'total_space': self._convert_bytes(usage.total),
'used_space': self._convert_bytes(usage.used),
'free_space': self._convert_bytes(usage.free),
'usage_percent': usage.percent,
'usage_status': disk_usage_status
})
# Check if disk is mounted
try:
partitions = [p for p in psutil.disk_partitions()
if p.device.startswith(disk)]
if partitions:
partition = partitions[0] # Use first partition for stats
usage = psutil.disk_usage(partition.mountpoint)
drive_report.update({
'mountpoint': partition.mountpoint,
'total_space': self._convert_bytes(usage.total),
'used_space': self._convert_bytes(usage.used),
'free_space': self._convert_bytes(usage.free),
'usage_percent': usage.percent,
'usage_status': ('CRITICAL_HIGH_USAGE' if usage.percent > self.CONFIG['THRESHOLDS']['DISK_CRITICAL']
else 'WARNING_HIGH_USAGE' if usage.percent > self.CONFIG['THRESHOLDS']['DISK_WARNING']
else 'NORMAL')
})
except Exception as e:
logger.debug(f"Could not get usage stats for {disk}: {e}")
# Check SMART health
smart_health = self._check_smart_health(partition.device)
smart_health = self._check_smart_health(disk)
drive_report.update({
'smart_status': smart_health['status'],
'smart_issues': smart_health['issues'],
@ -636,36 +629,15 @@ class SystemHealthMonitor:
})
# Update overall status
if smart_health['status'] == 'UNHEALTHY' or disk_usage_status == 'CRITICAL_HIGH_USAGE':
if (smart_health['status'] == 'UNHEALTHY' or
drive_report['usage_status'] == 'CRITICAL_HIGH_USAGE'):
overall_status = 'CRITICAL'
elif disk_usage_status == 'WARNING_HIGH_USAGE' and overall_status != 'CRITICAL':
elif (drive_report['usage_status'] == 'WARNING_HIGH_USAGE' and
overall_status != 'CRITICAL'):
overall_status = 'WARNING'
drives_health['drives'].append(drive_report)
# Add non-mounted physical disks
for disk in physical_disks:
if not any(d['device'] == disk for d in drives_health['drives']):
drive_report = {
'device': disk,
'mountpoint': None,
'usage_status': 'UNMOUNTED'
}
# Check SMART health for unmounted disks
smart_health = self._check_smart_health(disk)
drive_report.update({
'smart_status': smart_health['status'],
'smart_issues': smart_health['issues'],
'temperature': smart_health['temp'],
'smart_attributes': smart_health['attributes']
})
if smart_health['status'] == 'UNHEALTHY':
overall_status = 'CRITICAL'
drives_health['drives'].append(drive_report)
drives_health['overall_status'] = overall_status
except Exception as e: