diff --git a/hwmonDaemon.py b/hwmonDaemon.py index 68caf6d..d9e96a7 100644 --- a/hwmonDaemon.py +++ b/hwmonDaemon.py @@ -445,15 +445,29 @@ class SystemHealthMonitor: } try: - # More robust ECC detection - check both directory existence and content - edac_path = '/sys/devices/system/edac/mc' - if os.path.exists(edac_path) and os.listdir(edac_path): - # Verify actual ECC controller presence + # First check using dmidecode + result = subprocess.run( + ['dmidecode', '--type', 'memory'], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True + ) + if 'Error Correction Type: Multi-bit ECC' in result.stdout: + memory_health['has_ecc'] = True + + # If dmidecode didn't find ECC, try the edac method as backup + if not memory_health['has_ecc']: + edac_path = '/sys/devices/system/edac/mc' + if os.path.exists(edac_path) and os.listdir(edac_path): + for mc_dir in glob.glob('/sys/devices/system/edac/mc/mc[0-9]*'): + if os.path.exists(f"{mc_dir}/csrow0"): + memory_health['has_ecc'] = True + break + + # If ECC is present, check for errors + if memory_health['has_ecc']: for mc_dir in glob.glob('/sys/devices/system/edac/mc/mc[0-9]*'): if os.path.exists(f"{mc_dir}/csrow0"): - memory_health['has_ecc'] = True - - # Check uncorrectable errors ue_count = self._read_ecc_count(f"{mc_dir}/csrow0/ue_count") if ue_count > 0: memory_health['status'] = 'CRITICAL' @@ -461,7 +475,6 @@ class SystemHealthMonitor: f"Uncorrectable ECC errors detected in {os.path.basename(mc_dir)}: {ue_count}" ) - # Check correctable errors ce_count = self._read_ecc_count(f"{mc_dir}/csrow0/ce_count") if ce_count > 0: if memory_health['status'] != 'CRITICAL':