Remove RBD from smart check
This commit is contained in:
@ -577,82 +577,49 @@ class SystemHealthMonitor:
|
||||
def _check_drives_health(self) -> Dict[str, Any]:
|
||||
"""
|
||||
Check overall health of physical SATA and NVMe drives including disk usage and SMART status.
|
||||
|
||||
:return: Combined health report of all drives and their status.
|
||||
"""
|
||||
drives_health = {'overall_status': 'NORMAL', 'drives': []}
|
||||
|
||||
try:
|
||||
physical_disks = self._get_all_disks()
|
||||
logger.debug(f"Found physical disks: {physical_disks}")
|
||||
|
||||
try:
|
||||
all_partitions = psutil.disk_partitions()
|
||||
logger.debug(f"All disk partitions found: {[p.device for p in all_partitions]}")
|
||||
|
||||
# Log each partition evaluation
|
||||
partitions = []
|
||||
for p in all_partitions:
|
||||
is_physical = self._is_physical_disk(p.device)
|
||||
logger.debug(f"Evaluating partition: {p.device}")
|
||||
logger.debug(f" Mountpoint: {p.mountpoint}")
|
||||
logger.debug(f" FStype: {p.fstype}")
|
||||
logger.debug(f" Opts: {p.opts}")
|
||||
logger.debug(f" Is Physical: {is_physical}")
|
||||
if is_physical:
|
||||
partitions.append(p)
|
||||
|
||||
logger.debug(f"Final physical partitions selected: {[p.device for p in partitions]}")
|
||||
# Filter out RBD devices
|
||||
physical_disks = [disk for disk in physical_disks if not disk.startswith('/dev/rbd')]
|
||||
|
||||
overall_status = 'NORMAL'
|
||||
for partition in partitions:
|
||||
for disk in physical_disks:
|
||||
drive_report = {
|
||||
'device': partition.device,
|
||||
'mountpoint': partition.mountpoint
|
||||
'device': disk,
|
||||
'mountpoint': None,
|
||||
'usage_status': 'UNMOUNTED',
|
||||
'usage_percent': 0,
|
||||
'total_space': '0B',
|
||||
'used_space': '0B',
|
||||
'free_space': '0B'
|
||||
}
|
||||
|
||||
# Check disk usage
|
||||
# Check if disk is mounted
|
||||
try:
|
||||
partitions = [p for p in psutil.disk_partitions()
|
||||
if p.device.startswith(disk)]
|
||||
if partitions:
|
||||
partition = partitions[0] # Use first partition for stats
|
||||
usage = psutil.disk_usage(partition.mountpoint)
|
||||
disk_usage_status = 'NORMAL'
|
||||
if usage.percent > self.CONFIG['THRESHOLDS']['DISK_CRITICAL']:
|
||||
disk_usage_status = 'CRITICAL_HIGH_USAGE'
|
||||
elif usage.percent > self.CONFIG['THRESHOLDS']['DISK_WARNING']:
|
||||
disk_usage_status = 'WARNING_HIGH_USAGE'
|
||||
|
||||
drive_report.update({
|
||||
'mountpoint': partition.mountpoint,
|
||||
'total_space': self._convert_bytes(usage.total),
|
||||
'used_space': self._convert_bytes(usage.used),
|
||||
'free_space': self._convert_bytes(usage.free),
|
||||
'usage_percent': usage.percent,
|
||||
'usage_status': disk_usage_status
|
||||
'usage_status': ('CRITICAL_HIGH_USAGE' if usage.percent > self.CONFIG['THRESHOLDS']['DISK_CRITICAL']
|
||||
else 'WARNING_HIGH_USAGE' if usage.percent > self.CONFIG['THRESHOLDS']['DISK_WARNING']
|
||||
else 'NORMAL')
|
||||
})
|
||||
except Exception as e:
|
||||
logger.debug(f"Could not get usage stats for {disk}: {e}")
|
||||
|
||||
# Check SMART health
|
||||
smart_health = self._check_smart_health(partition.device)
|
||||
drive_report.update({
|
||||
'smart_status': smart_health['status'],
|
||||
'smart_issues': smart_health['issues'],
|
||||
'temperature': smart_health['temp'],
|
||||
'smart_attributes': smart_health['attributes']
|
||||
})
|
||||
|
||||
# Update overall status
|
||||
if smart_health['status'] == 'UNHEALTHY' or disk_usage_status == 'CRITICAL_HIGH_USAGE':
|
||||
overall_status = 'CRITICAL'
|
||||
elif disk_usage_status == 'WARNING_HIGH_USAGE' and overall_status != 'CRITICAL':
|
||||
overall_status = 'WARNING'
|
||||
|
||||
drives_health['drives'].append(drive_report)
|
||||
|
||||
# Add non-mounted physical disks
|
||||
for disk in physical_disks:
|
||||
if not any(d['device'] == disk for d in drives_health['drives']):
|
||||
drive_report = {
|
||||
'device': disk,
|
||||
'mountpoint': None,
|
||||
'usage_status': 'UNMOUNTED'
|
||||
}
|
||||
|
||||
# Check SMART health for unmounted disks
|
||||
smart_health = self._check_smart_health(disk)
|
||||
drive_report.update({
|
||||
'smart_status': smart_health['status'],
|
||||
@ -661,8 +628,13 @@ class SystemHealthMonitor:
|
||||
'smart_attributes': smart_health['attributes']
|
||||
})
|
||||
|
||||
if smart_health['status'] == 'UNHEALTHY':
|
||||
# Update overall status
|
||||
if (smart_health['status'] == 'UNHEALTHY' or
|
||||
drive_report['usage_status'] == 'CRITICAL_HIGH_USAGE'):
|
||||
overall_status = 'CRITICAL'
|
||||
elif (drive_report['usage_status'] == 'WARNING_HIGH_USAGE' and
|
||||
overall_status != 'CRITICAL'):
|
||||
overall_status = 'WARNING'
|
||||
|
||||
drives_health['drives'].append(drive_report)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user