From 586f7df0c7b65a5b1f002209b88f593f792d2d48 Mon Sep 17 00:00:00 2001 From: Jared Vititoe Date: Fri, 13 Dec 2024 18:43:10 -0500 Subject: [PATCH] Did not have memory check within class --- hwmonDaemon.py | 86 +++++++++++++++++++++++++------------------------- 1 file changed, 43 insertions(+), 43 deletions(-) diff --git a/hwmonDaemon.py b/hwmonDaemon.py index 5ec050e..785638d 100644 --- a/hwmonDaemon.py +++ b/hwmonDaemon.py @@ -431,50 +431,50 @@ class SystemHealthMonitor: bytes_value /= 1024.0 return f"{bytes_value:.1f}Y{suffix}" -def _check_memory_usage(self) -> Dict[str, Any]: - """ - Check for ECC memory errors if ECC memory is present. - """ - memory_health = { - 'has_ecc': False, - 'ecc_errors': [], - 'status': 'OK', - 'total_memory': self._convert_bytes(psutil.virtual_memory().total), - 'used_memory': self._convert_bytes(psutil.virtual_memory().used), - 'memory_percent': psutil.virtual_memory().percent - } - - try: - # More robust ECC detection - check both directory existence and content - edac_path = '/sys/devices/system/edac/mc' - if os.path.exists(edac_path) and os.listdir(edac_path): - # Verify actual ECC controller presence - for mc_dir in glob.glob('/sys/devices/system/edac/mc/mc[0-9]*'): - if os.path.exists(f"{mc_dir}/csrow0"): - memory_health['has_ecc'] = True - - # Check uncorrectable errors - ue_count = self._read_ecc_count(f"{mc_dir}/csrow0/ue_count") - if ue_count > 0: - memory_health['status'] = 'CRITICAL' - memory_health['ecc_errors'].append( - f"Uncorrectable ECC errors detected in {os.path.basename(mc_dir)}: {ue_count}" - ) - - # Check correctable errors - ce_count = self._read_ecc_count(f"{mc_dir}/csrow0/ce_count") - if ce_count > 0: - if memory_health['status'] != 'CRITICAL': - memory_health['status'] = 'WARNING' - memory_health['ecc_errors'].append( - f"Correctable ECC errors detected in {os.path.basename(mc_dir)}: {ce_count}" - ) + def _check_memory_usage(self) -> Dict[str, Any]: + """ + Check for ECC memory errors if ECC memory is present. + """ + memory_health = { + 'has_ecc': False, + 'ecc_errors': [], + 'status': 'OK', + 'total_memory': self._convert_bytes(psutil.virtual_memory().total), + 'used_memory': self._convert_bytes(psutil.virtual_memory().used), + 'memory_percent': psutil.virtual_memory().percent + } + + try: + # More robust ECC detection - check both directory existence and content + edac_path = '/sys/devices/system/edac/mc' + if os.path.exists(edac_path) and os.listdir(edac_path): + # Verify actual ECC controller presence + for mc_dir in glob.glob('/sys/devices/system/edac/mc/mc[0-9]*'): + if os.path.exists(f"{mc_dir}/csrow0"): + memory_health['has_ecc'] = True - except Exception as e: - memory_health['status'] = 'ERROR' - memory_health['ecc_errors'].append(f"Error checking ECC status: {str(e)}") - - return memory_health + # Check uncorrectable errors + ue_count = self._read_ecc_count(f"{mc_dir}/csrow0/ue_count") + if ue_count > 0: + memory_health['status'] = 'CRITICAL' + memory_health['ecc_errors'].append( + f"Uncorrectable ECC errors detected in {os.path.basename(mc_dir)}: {ue_count}" + ) + + # Check correctable errors + ce_count = self._read_ecc_count(f"{mc_dir}/csrow0/ce_count") + if ce_count > 0: + if memory_health['status'] != 'CRITICAL': + memory_health['status'] = 'WARNING' + memory_health['ecc_errors'].append( + f"Correctable ECC errors detected in {os.path.basename(mc_dir)}: {ce_count}" + ) + + except Exception as e: + memory_health['status'] = 'ERROR' + memory_health['ecc_errors'].append(f"Error checking ECC status: {str(e)}") + + return memory_health def _read_ecc_count(self, filepath: str) -> int: """