diff --git a/hwmonDaemon.py b/hwmonDaemon.py index 9a0582e..64b4826 100644 --- a/hwmonDaemon.py +++ b/hwmonDaemon.py @@ -1104,6 +1104,13 @@ class SystemHealthMonitor: bytes_value /= 1024.0 return f"{bytes_value:.1f}Y{suffix}" + def _convert_size_to_bytes(self, size_str: str) -> float: + """Convert size string with units to bytes""" + units = {'B': 1, 'K': 1024, 'M': 1024**2, 'G': 1024**3, 'T': 1024**4} + size = float(size_str[:-1]) + unit = size_str[-1].upper() + return size * units[unit] + def _check_memory_usage(self) -> Dict[str, Any]: """ Check for ECC memory errors if ECC memory is present. @@ -1266,7 +1273,6 @@ class SystemHealthMonitor: } try: - # Get list of running LXC containers if self.dry_run: logger.debug("=== LXC Storage Check (Dry Run) ===") logger.debug("Executing: pct list") @@ -1281,12 +1287,15 @@ class SystemHealthMonitor: if self.dry_run: logger.debug(f"Raw pct list output:\n{result.stdout}") - # Skip header line and process each container for line in result.stdout.split('\n')[1:]: if not line.strip(): continue - vmid, status, *_ = line.split() + parts = line.split() + if len(parts) < 2: + continue + + vmid, status = parts[0], parts[1] if self.dry_run: logger.debug(f"Processing container VMID: {vmid}, Status: {status}") @@ -1296,7 +1305,6 @@ class SystemHealthMonitor: logger.debug(f"Checking disk usage for container {vmid}") logger.debug(f"Executing: pct df {vmid}") - # Get container disk usage disk_info = subprocess.run( ['pct', 'df', vmid], stdout=subprocess.PIPE, @@ -1312,43 +1320,55 @@ class SystemHealthMonitor: 'filesystems': [] } - # Skip header and process each filesystem for fs_line in disk_info.stdout.split('\n')[1:]: - if not fs_line.strip(): + if not fs_line.strip() or 'MP' in fs_line: # Skip empty lines and header continue - fs = fs_line.split() - if len(fs) >= 6: - usage_percent = int(fs[4].rstrip('%')) - filesystem = { - 'mountpoint': fs[5], - 'total': fs[1], - 'used': fs[2], - 'available': fs[3], - 'usage_percent': usage_percent - } - - if self.dry_run: - logger.debug(f"Container {vmid} filesystem details:") - logger.debug(f" Mountpoint: {filesystem['mountpoint']}") - logger.debug(f" Usage: {filesystem['used']}/{filesystem['total']} ({usage_percent}%)") - - # Check thresholds - if usage_percent >= self.CONFIG['THRESHOLDS']['DISK_CRITICAL']: - lxc_health['status'] = 'CRITICAL' - issue = f"LXC {vmid} critical storage usage: {usage_percent}% on {fs[5]}" - lxc_health['issues'].append(issue) - if self.dry_run: - logger.debug(f"Critical issue detected: {issue}") - elif usage_percent >= self.CONFIG['THRESHOLDS']['DISK_WARNING']: - if lxc_health['status'] != 'CRITICAL': - lxc_health['status'] = 'WARNING' - issue = f"LXC {vmid} high storage usage: {usage_percent}% on {fs[5]}" - lxc_health['issues'].append(issue) - if self.dry_run: - logger.debug(f"Warning issue detected: {issue}") + try: + parts = fs_line.split() + if len(parts) >= 6: + # Convert size strings to comparable values + total = parts[2] + used = parts[3] + avail = parts[4] - container_info['filesystems'].append(filesystem) + # Calculate usage percentage + total_bytes = self._convert_size_to_bytes(total) + used_bytes = self._convert_size_to_bytes(used) + usage_percent = int((used_bytes / total_bytes) * 100) + + filesystem = { + 'mountpoint': parts[5], + 'total': total, + 'used': used, + 'available': avail, + 'usage_percent': usage_percent + } + + if self.dry_run: + logger.debug(f"Container {vmid} filesystem details:") + logger.debug(f" Mountpoint: {filesystem['mountpoint']}") + logger.debug(f" Usage: {used}/{total} ({usage_percent}%)") + + if usage_percent >= self.CONFIG['THRESHOLDS']['DISK_CRITICAL']: + lxc_health['status'] = 'CRITICAL' + issue = f"LXC {vmid} critical storage usage: {usage_percent}% on {parts[5]}" + lxc_health['issues'].append(issue) + if self.dry_run: + logger.debug(f"Critical issue detected: {issue}") + elif usage_percent >= self.CONFIG['THRESHOLDS']['DISK_WARNING']: + if lxc_health['status'] != 'CRITICAL': + lxc_health['status'] = 'WARNING' + issue = f"LXC {vmid} high storage usage: {usage_percent}% on {parts[5]}" + lxc_health['issues'].append(issue) + if self.dry_run: + logger.debug(f"Warning issue detected: {issue}") + + container_info['filesystems'].append(filesystem) + except Exception as e: + if self.dry_run: + logger.debug(f"Error processing filesystem line for container {vmid}: {str(e)}") + continue lxc_health['containers'].append(container_info)