Debug to Verify Fix

This commit is contained in:
2025-03-09 19:26:07 -04:00
parent a33b897edd
commit bd658f51ff

View File

@ -1320,6 +1320,7 @@ class SystemHealthMonitor:
"""
Check storage utilization for all running LXC containers
"""
logger.debug("Starting LXC storage check")
lxc_health = {
'status': 'OK',
'containers': [],
@ -1327,16 +1328,14 @@ class SystemHealthMonitor:
}
try:
if self.dry_run:
logger.debug("=== LXC Storage Check (Dry Run) ===")
logger.debug("Executing: pct list")
logger.debug("Executing 'pct list' command")
result = subprocess.run(
['pct', 'list'],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True
)
logger.debug(f"pct list output:\n{result.stdout}")
for line in result.stdout.split('\n')[1:]:
if not line.strip():
@ -1344,17 +1343,21 @@ class SystemHealthMonitor:
parts = line.split()
if len(parts) < 2:
logger.debug(f"Skipping invalid line: {line}")
continue
vmid, status = parts[0], parts[1]
logger.debug(f"Processing container VMID: {vmid}, Status: {status}")
if status.lower() == 'running':
logger.debug(f"Checking disk usage for running container {vmid}")
disk_info = subprocess.run(
['pct', 'df', vmid],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True
)
logger.debug(f"pct df {vmid} output:\n{disk_info.stdout}")
container_info = {
'vmid': vmid,
@ -1362,9 +1365,11 @@ class SystemHealthMonitor:
}
for fs_line in disk_info.stdout.split('\n')[1:]:
if not fs_line.strip() or 'MP' in fs_line: # Skip empty lines and header
if not fs_line.strip() or 'MP' in fs_line:
logger.debug(f"Skipping line: {fs_line}")
continue
logger.debug(f"Processing filesystem line: {fs_line}")
try:
parts = fs_line.split()
if len(parts) >= 6:
@ -1377,22 +1382,27 @@ class SystemHealthMonitor:
'usage_percent': 0
}
# Skip pool name entries that start with 'appPool:'
if parts[0].startswith('appPool:'):
logger.debug(f"Skipping appPool entry: {parts[0]}")
continue
# Skip mediafs mount entries
if '/mnt/pve/mediaf' in parts[0]:
logger.debug(f"Skipping mediafs mount: {parts[0]}")
continue
mountpoint = parts[5]
logger.debug(f"Processing mountpoint: {mountpoint}")
if parts[1][-1] in 'BKMGT' and parts[2][-1] in 'BKMGT':
total_size = self._convert_size_to_bytes(parts[1])
used_size = self._convert_size_to_bytes(parts[2])
usage_percent = float(parts[4].rstrip('%'))
logger.debug(f"Storage metrics for {mountpoint}:")
logger.debug(f" Total: {parts[1]}")
logger.debug(f" Used: {parts[2]}")
logger.debug(f" Usage: {usage_percent}%")
filesystem.update({
'mountpoint': mountpoint,
'total': parts[1],
@ -1402,10 +1412,12 @@ class SystemHealthMonitor:
})
if usage_percent >= self.CONFIG['THRESHOLDS']['DISK_CRITICAL']:
logger.debug(f"CRITICAL: Storage usage {usage_percent}% exceeds critical threshold")
lxc_health['status'] = 'CRITICAL'
issue = f"LXC {vmid} critical storage usage: {usage_percent}% on {mountpoint}"
lxc_health['issues'].append(issue)
elif usage_percent >= self.CONFIG['THRESHOLDS']['DISK_WARNING']:
logger.debug(f"WARNING: Storage usage {usage_percent}% exceeds warning threshold")
if lxc_health['status'] != 'CRITICAL':
lxc_health['status'] = 'WARNING'
issue = f"LXC {vmid} high storage usage: {usage_percent}% on {mountpoint}"
@ -1413,29 +1425,26 @@ class SystemHealthMonitor:
container_info['filesystems'].append(filesystem)
except Exception as e:
if self.dry_run:
logger.debug(f"Error processing filesystem line for container {vmid}: {str(e)}")
continue
lxc_health['containers'].append(container_info)
logger.debug(f"Added container info for VMID {vmid}")
if self.dry_run:
logger.debug("=== LXC Storage Check Summary ===")
logger.debug(f"Status: {lxc_health['status']}")
logger.debug(f"Total containers checked: {len(lxc_health['containers'])}")
logger.debug(f"Issues found: {len(lxc_health['issues'])}")
logger.debug("=== End LXC Storage Check ===\n")
logger.debug("=== End LXC Storage Check ===")
except Exception as e:
logger.debug(f"Critical error during LXC storage check: {str(e)}")
lxc_health['status'] = 'ERROR'
error_msg = f"Error checking LXC storage: {str(e)}"
lxc_health['issues'].append(error_msg)
if self.dry_run:
logger.debug(f"Error during LXC storage check: {error_msg}")
return lxc_health
def main():
parser = argparse.ArgumentParser(description="System Health Monitor")
parser.add_argument(