Debug to Verify Fix
This commit is contained in:
@ -1320,6 +1320,7 @@ class SystemHealthMonitor:
|
||||
"""
|
||||
Check storage utilization for all running LXC containers
|
||||
"""
|
||||
logger.debug("Starting LXC storage check")
|
||||
lxc_health = {
|
||||
'status': 'OK',
|
||||
'containers': [],
|
||||
@ -1327,16 +1328,14 @@ class SystemHealthMonitor:
|
||||
}
|
||||
|
||||
try:
|
||||
if self.dry_run:
|
||||
logger.debug("=== LXC Storage Check (Dry Run) ===")
|
||||
logger.debug("Executing: pct list")
|
||||
|
||||
logger.debug("Executing 'pct list' command")
|
||||
result = subprocess.run(
|
||||
['pct', 'list'],
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE,
|
||||
text=True
|
||||
)
|
||||
logger.debug(f"pct list output:\n{result.stdout}")
|
||||
|
||||
for line in result.stdout.split('\n')[1:]:
|
||||
if not line.strip():
|
||||
@ -1344,17 +1343,21 @@ class SystemHealthMonitor:
|
||||
|
||||
parts = line.split()
|
||||
if len(parts) < 2:
|
||||
logger.debug(f"Skipping invalid line: {line}")
|
||||
continue
|
||||
|
||||
vmid, status = parts[0], parts[1]
|
||||
logger.debug(f"Processing container VMID: {vmid}, Status: {status}")
|
||||
|
||||
if status.lower() == 'running':
|
||||
logger.debug(f"Checking disk usage for running container {vmid}")
|
||||
disk_info = subprocess.run(
|
||||
['pct', 'df', vmid],
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE,
|
||||
text=True
|
||||
)
|
||||
logger.debug(f"pct df {vmid} output:\n{disk_info.stdout}")
|
||||
|
||||
container_info = {
|
||||
'vmid': vmid,
|
||||
@ -1362,9 +1365,11 @@ class SystemHealthMonitor:
|
||||
}
|
||||
|
||||
for fs_line in disk_info.stdout.split('\n')[1:]:
|
||||
if not fs_line.strip() or 'MP' in fs_line: # Skip empty lines and header
|
||||
if not fs_line.strip() or 'MP' in fs_line:
|
||||
logger.debug(f"Skipping line: {fs_line}")
|
||||
continue
|
||||
|
||||
logger.debug(f"Processing filesystem line: {fs_line}")
|
||||
try:
|
||||
parts = fs_line.split()
|
||||
if len(parts) >= 6:
|
||||
@ -1377,22 +1382,27 @@ class SystemHealthMonitor:
|
||||
'usage_percent': 0
|
||||
}
|
||||
|
||||
|
||||
# Skip pool name entries that start with 'appPool:'
|
||||
if parts[0].startswith('appPool:'):
|
||||
logger.debug(f"Skipping appPool entry: {parts[0]}")
|
||||
continue
|
||||
|
||||
# Skip mediafs mount entries
|
||||
if '/mnt/pve/mediaf' in parts[0]:
|
||||
logger.debug(f"Skipping mediafs mount: {parts[0]}")
|
||||
continue
|
||||
|
||||
mountpoint = parts[5]
|
||||
logger.debug(f"Processing mountpoint: {mountpoint}")
|
||||
|
||||
if parts[1][-1] in 'BKMGT' and parts[2][-1] in 'BKMGT':
|
||||
total_size = self._convert_size_to_bytes(parts[1])
|
||||
used_size = self._convert_size_to_bytes(parts[2])
|
||||
usage_percent = float(parts[4].rstrip('%'))
|
||||
|
||||
logger.debug(f"Storage metrics for {mountpoint}:")
|
||||
logger.debug(f" Total: {parts[1]}")
|
||||
logger.debug(f" Used: {parts[2]}")
|
||||
logger.debug(f" Usage: {usage_percent}%")
|
||||
|
||||
filesystem.update({
|
||||
'mountpoint': mountpoint,
|
||||
'total': parts[1],
|
||||
@ -1402,10 +1412,12 @@ class SystemHealthMonitor:
|
||||
})
|
||||
|
||||
if usage_percent >= self.CONFIG['THRESHOLDS']['DISK_CRITICAL']:
|
||||
logger.debug(f"CRITICAL: Storage usage {usage_percent}% exceeds critical threshold")
|
||||
lxc_health['status'] = 'CRITICAL'
|
||||
issue = f"LXC {vmid} critical storage usage: {usage_percent}% on {mountpoint}"
|
||||
lxc_health['issues'].append(issue)
|
||||
elif usage_percent >= self.CONFIG['THRESHOLDS']['DISK_WARNING']:
|
||||
logger.debug(f"WARNING: Storage usage {usage_percent}% exceeds warning threshold")
|
||||
if lxc_health['status'] != 'CRITICAL':
|
||||
lxc_health['status'] = 'WARNING'
|
||||
issue = f"LXC {vmid} high storage usage: {usage_percent}% on {mountpoint}"
|
||||
@ -1413,29 +1425,26 @@ class SystemHealthMonitor:
|
||||
|
||||
container_info['filesystems'].append(filesystem)
|
||||
except Exception as e:
|
||||
if self.dry_run:
|
||||
logger.debug(f"Error processing filesystem line for container {vmid}: {str(e)}")
|
||||
logger.debug(f"Error processing filesystem line for container {vmid}: {str(e)}")
|
||||
continue
|
||||
|
||||
lxc_health['containers'].append(container_info)
|
||||
logger.debug(f"Added container info for VMID {vmid}")
|
||||
|
||||
if self.dry_run:
|
||||
logger.debug("=== LXC Storage Check Summary ===")
|
||||
logger.debug(f"Status: {lxc_health['status']}")
|
||||
logger.debug(f"Total containers checked: {len(lxc_health['containers'])}")
|
||||
logger.debug(f"Issues found: {len(lxc_health['issues'])}")
|
||||
logger.debug("=== End LXC Storage Check ===\n")
|
||||
logger.debug("=== LXC Storage Check Summary ===")
|
||||
logger.debug(f"Status: {lxc_health['status']}")
|
||||
logger.debug(f"Total containers checked: {len(lxc_health['containers'])}")
|
||||
logger.debug(f"Issues found: {len(lxc_health['issues'])}")
|
||||
logger.debug("=== End LXC Storage Check ===")
|
||||
|
||||
except Exception as e:
|
||||
logger.debug(f"Critical error during LXC storage check: {str(e)}")
|
||||
lxc_health['status'] = 'ERROR'
|
||||
error_msg = f"Error checking LXC storage: {str(e)}"
|
||||
lxc_health['issues'].append(error_msg)
|
||||
if self.dry_run:
|
||||
logger.debug(f"Error during LXC storage check: {error_msg}")
|
||||
|
||||
return lxc_health
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="System Health Monitor")
|
||||
parser.add_argument(
|
||||
|
||||
Reference in New Issue
Block a user