diff --git a/hwmonDaemon.py b/hwmonDaemon.py index 01f1f77..f4322e9 100644 --- a/hwmonDaemon.py +++ b/hwmonDaemon.py @@ -1320,6 +1320,7 @@ class SystemHealthMonitor: """ Check storage utilization for all running LXC containers """ + logger.debug("Starting LXC storage check") lxc_health = { 'status': 'OK', 'containers': [], @@ -1327,16 +1328,14 @@ class SystemHealthMonitor: } try: - if self.dry_run: - logger.debug("=== LXC Storage Check (Dry Run) ===") - logger.debug("Executing: pct list") - + logger.debug("Executing 'pct list' command") result = subprocess.run( ['pct', 'list'], stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True ) + logger.debug(f"pct list output:\n{result.stdout}") for line in result.stdout.split('\n')[1:]: if not line.strip(): @@ -1344,17 +1343,21 @@ class SystemHealthMonitor: parts = line.split() if len(parts) < 2: + logger.debug(f"Skipping invalid line: {line}") continue vmid, status = parts[0], parts[1] + logger.debug(f"Processing container VMID: {vmid}, Status: {status}") if status.lower() == 'running': + logger.debug(f"Checking disk usage for running container {vmid}") disk_info = subprocess.run( ['pct', 'df', vmid], stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True ) + logger.debug(f"pct df {vmid} output:\n{disk_info.stdout}") container_info = { 'vmid': vmid, @@ -1362,9 +1365,11 @@ class SystemHealthMonitor: } for fs_line in disk_info.stdout.split('\n')[1:]: - if not fs_line.strip() or 'MP' in fs_line: # Skip empty lines and header + if not fs_line.strip() or 'MP' in fs_line: + logger.debug(f"Skipping line: {fs_line}") continue - + + logger.debug(f"Processing filesystem line: {fs_line}") try: parts = fs_line.split() if len(parts) >= 6: @@ -1377,22 +1382,27 @@ class SystemHealthMonitor: 'usage_percent': 0 } - - # Skip pool name entries that start with 'appPool:' if parts[0].startswith('appPool:'): + logger.debug(f"Skipping appPool entry: {parts[0]}") continue - # Skip mediafs mount entries if '/mnt/pve/mediaf' in parts[0]: + logger.debug(f"Skipping mediafs mount: {parts[0]}") continue mountpoint = parts[5] + logger.debug(f"Processing mountpoint: {mountpoint}") if parts[1][-1] in 'BKMGT' and parts[2][-1] in 'BKMGT': total_size = self._convert_size_to_bytes(parts[1]) used_size = self._convert_size_to_bytes(parts[2]) usage_percent = float(parts[4].rstrip('%')) + logger.debug(f"Storage metrics for {mountpoint}:") + logger.debug(f" Total: {parts[1]}") + logger.debug(f" Used: {parts[2]}") + logger.debug(f" Usage: {usage_percent}%") + filesystem.update({ 'mountpoint': mountpoint, 'total': parts[1], @@ -1402,10 +1412,12 @@ class SystemHealthMonitor: }) if usage_percent >= self.CONFIG['THRESHOLDS']['DISK_CRITICAL']: + logger.debug(f"CRITICAL: Storage usage {usage_percent}% exceeds critical threshold") lxc_health['status'] = 'CRITICAL' issue = f"LXC {vmid} critical storage usage: {usage_percent}% on {mountpoint}" lxc_health['issues'].append(issue) elif usage_percent >= self.CONFIG['THRESHOLDS']['DISK_WARNING']: + logger.debug(f"WARNING: Storage usage {usage_percent}% exceeds warning threshold") if lxc_health['status'] != 'CRITICAL': lxc_health['status'] = 'WARNING' issue = f"LXC {vmid} high storage usage: {usage_percent}% on {mountpoint}" @@ -1413,29 +1425,26 @@ class SystemHealthMonitor: container_info['filesystems'].append(filesystem) except Exception as e: - if self.dry_run: - logger.debug(f"Error processing filesystem line for container {vmid}: {str(e)}") + logger.debug(f"Error processing filesystem line for container {vmid}: {str(e)}") continue lxc_health['containers'].append(container_info) + logger.debug(f"Added container info for VMID {vmid}") - if self.dry_run: - logger.debug("=== LXC Storage Check Summary ===") - logger.debug(f"Status: {lxc_health['status']}") - logger.debug(f"Total containers checked: {len(lxc_health['containers'])}") - logger.debug(f"Issues found: {len(lxc_health['issues'])}") - logger.debug("=== End LXC Storage Check ===\n") + logger.debug("=== LXC Storage Check Summary ===") + logger.debug(f"Status: {lxc_health['status']}") + logger.debug(f"Total containers checked: {len(lxc_health['containers'])}") + logger.debug(f"Issues found: {len(lxc_health['issues'])}") + logger.debug("=== End LXC Storage Check ===") except Exception as e: + logger.debug(f"Critical error during LXC storage check: {str(e)}") lxc_health['status'] = 'ERROR' error_msg = f"Error checking LXC storage: {str(e)}" lxc_health['issues'].append(error_msg) - if self.dry_run: - logger.debug(f"Error during LXC storage check: {error_msg}") return lxc_health - def main(): parser = argparse.ArgumentParser(description="System Health Monitor") parser.add_argument(