Debug to Verify Fix

This commit is contained in:
2025-03-09 19:26:07 -04:00
parent a33b897edd
commit bd658f51ff

View File

@ -1320,6 +1320,7 @@ class SystemHealthMonitor:
""" """
Check storage utilization for all running LXC containers Check storage utilization for all running LXC containers
""" """
logger.debug("Starting LXC storage check")
lxc_health = { lxc_health = {
'status': 'OK', 'status': 'OK',
'containers': [], 'containers': [],
@ -1327,16 +1328,14 @@ class SystemHealthMonitor:
} }
try: try:
if self.dry_run: logger.debug("Executing 'pct list' command")
logger.debug("=== LXC Storage Check (Dry Run) ===")
logger.debug("Executing: pct list")
result = subprocess.run( result = subprocess.run(
['pct', 'list'], ['pct', 'list'],
stdout=subprocess.PIPE, stdout=subprocess.PIPE,
stderr=subprocess.PIPE, stderr=subprocess.PIPE,
text=True text=True
) )
logger.debug(f"pct list output:\n{result.stdout}")
for line in result.stdout.split('\n')[1:]: for line in result.stdout.split('\n')[1:]:
if not line.strip(): if not line.strip():
@ -1344,17 +1343,21 @@ class SystemHealthMonitor:
parts = line.split() parts = line.split()
if len(parts) < 2: if len(parts) < 2:
logger.debug(f"Skipping invalid line: {line}")
continue continue
vmid, status = parts[0], parts[1] vmid, status = parts[0], parts[1]
logger.debug(f"Processing container VMID: {vmid}, Status: {status}")
if status.lower() == 'running': if status.lower() == 'running':
logger.debug(f"Checking disk usage for running container {vmid}")
disk_info = subprocess.run( disk_info = subprocess.run(
['pct', 'df', vmid], ['pct', 'df', vmid],
stdout=subprocess.PIPE, stdout=subprocess.PIPE,
stderr=subprocess.PIPE, stderr=subprocess.PIPE,
text=True text=True
) )
logger.debug(f"pct df {vmid} output:\n{disk_info.stdout}")
container_info = { container_info = {
'vmid': vmid, 'vmid': vmid,
@ -1362,9 +1365,11 @@ class SystemHealthMonitor:
} }
for fs_line in disk_info.stdout.split('\n')[1:]: for fs_line in disk_info.stdout.split('\n')[1:]:
if not fs_line.strip() or 'MP' in fs_line: # Skip empty lines and header if not fs_line.strip() or 'MP' in fs_line:
logger.debug(f"Skipping line: {fs_line}")
continue continue
logger.debug(f"Processing filesystem line: {fs_line}")
try: try:
parts = fs_line.split() parts = fs_line.split()
if len(parts) >= 6: if len(parts) >= 6:
@ -1377,22 +1382,27 @@ class SystemHealthMonitor:
'usage_percent': 0 'usage_percent': 0
} }
# Skip pool name entries that start with 'appPool:'
if parts[0].startswith('appPool:'): if parts[0].startswith('appPool:'):
logger.debug(f"Skipping appPool entry: {parts[0]}")
continue continue
# Skip mediafs mount entries
if '/mnt/pve/mediaf' in parts[0]: if '/mnt/pve/mediaf' in parts[0]:
logger.debug(f"Skipping mediafs mount: {parts[0]}")
continue continue
mountpoint = parts[5] mountpoint = parts[5]
logger.debug(f"Processing mountpoint: {mountpoint}")
if parts[1][-1] in 'BKMGT' and parts[2][-1] in 'BKMGT': if parts[1][-1] in 'BKMGT' and parts[2][-1] in 'BKMGT':
total_size = self._convert_size_to_bytes(parts[1]) total_size = self._convert_size_to_bytes(parts[1])
used_size = self._convert_size_to_bytes(parts[2]) used_size = self._convert_size_to_bytes(parts[2])
usage_percent = float(parts[4].rstrip('%')) usage_percent = float(parts[4].rstrip('%'))
logger.debug(f"Storage metrics for {mountpoint}:")
logger.debug(f" Total: {parts[1]}")
logger.debug(f" Used: {parts[2]}")
logger.debug(f" Usage: {usage_percent}%")
filesystem.update({ filesystem.update({
'mountpoint': mountpoint, 'mountpoint': mountpoint,
'total': parts[1], 'total': parts[1],
@ -1402,10 +1412,12 @@ class SystemHealthMonitor:
}) })
if usage_percent >= self.CONFIG['THRESHOLDS']['DISK_CRITICAL']: if usage_percent >= self.CONFIG['THRESHOLDS']['DISK_CRITICAL']:
logger.debug(f"CRITICAL: Storage usage {usage_percent}% exceeds critical threshold")
lxc_health['status'] = 'CRITICAL' lxc_health['status'] = 'CRITICAL'
issue = f"LXC {vmid} critical storage usage: {usage_percent}% on {mountpoint}" issue = f"LXC {vmid} critical storage usage: {usage_percent}% on {mountpoint}"
lxc_health['issues'].append(issue) lxc_health['issues'].append(issue)
elif usage_percent >= self.CONFIG['THRESHOLDS']['DISK_WARNING']: elif usage_percent >= self.CONFIG['THRESHOLDS']['DISK_WARNING']:
logger.debug(f"WARNING: Storage usage {usage_percent}% exceeds warning threshold")
if lxc_health['status'] != 'CRITICAL': if lxc_health['status'] != 'CRITICAL':
lxc_health['status'] = 'WARNING' lxc_health['status'] = 'WARNING'
issue = f"LXC {vmid} high storage usage: {usage_percent}% on {mountpoint}" issue = f"LXC {vmid} high storage usage: {usage_percent}% on {mountpoint}"
@ -1413,29 +1425,26 @@ class SystemHealthMonitor:
container_info['filesystems'].append(filesystem) container_info['filesystems'].append(filesystem)
except Exception as e: except Exception as e:
if self.dry_run:
logger.debug(f"Error processing filesystem line for container {vmid}: {str(e)}") logger.debug(f"Error processing filesystem line for container {vmid}: {str(e)}")
continue continue
lxc_health['containers'].append(container_info) lxc_health['containers'].append(container_info)
logger.debug(f"Added container info for VMID {vmid}")
if self.dry_run:
logger.debug("=== LXC Storage Check Summary ===") logger.debug("=== LXC Storage Check Summary ===")
logger.debug(f"Status: {lxc_health['status']}") logger.debug(f"Status: {lxc_health['status']}")
logger.debug(f"Total containers checked: {len(lxc_health['containers'])}") logger.debug(f"Total containers checked: {len(lxc_health['containers'])}")
logger.debug(f"Issues found: {len(lxc_health['issues'])}") logger.debug(f"Issues found: {len(lxc_health['issues'])}")
logger.debug("=== End LXC Storage Check ===\n") logger.debug("=== End LXC Storage Check ===")
except Exception as e: except Exception as e:
logger.debug(f"Critical error during LXC storage check: {str(e)}")
lxc_health['status'] = 'ERROR' lxc_health['status'] = 'ERROR'
error_msg = f"Error checking LXC storage: {str(e)}" error_msg = f"Error checking LXC storage: {str(e)}"
lxc_health['issues'].append(error_msg) lxc_health['issues'].append(error_msg)
if self.dry_run:
logger.debug(f"Error during LXC storage check: {error_msg}")
return lxc_health return lxc_health
def main(): def main():
parser = argparse.ArgumentParser(description="System Health Monitor") parser = argparse.ArgumentParser(description="System Health Monitor")
parser.add_argument( parser.add_argument(