Debug to Verify Fix
This commit is contained in:
@ -1320,6 +1320,7 @@ class SystemHealthMonitor:
|
|||||||
"""
|
"""
|
||||||
Check storage utilization for all running LXC containers
|
Check storage utilization for all running LXC containers
|
||||||
"""
|
"""
|
||||||
|
logger.debug("Starting LXC storage check")
|
||||||
lxc_health = {
|
lxc_health = {
|
||||||
'status': 'OK',
|
'status': 'OK',
|
||||||
'containers': [],
|
'containers': [],
|
||||||
@ -1327,16 +1328,14 @@ class SystemHealthMonitor:
|
|||||||
}
|
}
|
||||||
|
|
||||||
try:
|
try:
|
||||||
if self.dry_run:
|
logger.debug("Executing 'pct list' command")
|
||||||
logger.debug("=== LXC Storage Check (Dry Run) ===")
|
|
||||||
logger.debug("Executing: pct list")
|
|
||||||
|
|
||||||
result = subprocess.run(
|
result = subprocess.run(
|
||||||
['pct', 'list'],
|
['pct', 'list'],
|
||||||
stdout=subprocess.PIPE,
|
stdout=subprocess.PIPE,
|
||||||
stderr=subprocess.PIPE,
|
stderr=subprocess.PIPE,
|
||||||
text=True
|
text=True
|
||||||
)
|
)
|
||||||
|
logger.debug(f"pct list output:\n{result.stdout}")
|
||||||
|
|
||||||
for line in result.stdout.split('\n')[1:]:
|
for line in result.stdout.split('\n')[1:]:
|
||||||
if not line.strip():
|
if not line.strip():
|
||||||
@ -1344,17 +1343,21 @@ class SystemHealthMonitor:
|
|||||||
|
|
||||||
parts = line.split()
|
parts = line.split()
|
||||||
if len(parts) < 2:
|
if len(parts) < 2:
|
||||||
|
logger.debug(f"Skipping invalid line: {line}")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
vmid, status = parts[0], parts[1]
|
vmid, status = parts[0], parts[1]
|
||||||
|
logger.debug(f"Processing container VMID: {vmid}, Status: {status}")
|
||||||
|
|
||||||
if status.lower() == 'running':
|
if status.lower() == 'running':
|
||||||
|
logger.debug(f"Checking disk usage for running container {vmid}")
|
||||||
disk_info = subprocess.run(
|
disk_info = subprocess.run(
|
||||||
['pct', 'df', vmid],
|
['pct', 'df', vmid],
|
||||||
stdout=subprocess.PIPE,
|
stdout=subprocess.PIPE,
|
||||||
stderr=subprocess.PIPE,
|
stderr=subprocess.PIPE,
|
||||||
text=True
|
text=True
|
||||||
)
|
)
|
||||||
|
logger.debug(f"pct df {vmid} output:\n{disk_info.stdout}")
|
||||||
|
|
||||||
container_info = {
|
container_info = {
|
||||||
'vmid': vmid,
|
'vmid': vmid,
|
||||||
@ -1362,9 +1365,11 @@ class SystemHealthMonitor:
|
|||||||
}
|
}
|
||||||
|
|
||||||
for fs_line in disk_info.stdout.split('\n')[1:]:
|
for fs_line in disk_info.stdout.split('\n')[1:]:
|
||||||
if not fs_line.strip() or 'MP' in fs_line: # Skip empty lines and header
|
if not fs_line.strip() or 'MP' in fs_line:
|
||||||
|
logger.debug(f"Skipping line: {fs_line}")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
logger.debug(f"Processing filesystem line: {fs_line}")
|
||||||
try:
|
try:
|
||||||
parts = fs_line.split()
|
parts = fs_line.split()
|
||||||
if len(parts) >= 6:
|
if len(parts) >= 6:
|
||||||
@ -1377,22 +1382,27 @@ class SystemHealthMonitor:
|
|||||||
'usage_percent': 0
|
'usage_percent': 0
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
# Skip pool name entries that start with 'appPool:'
|
|
||||||
if parts[0].startswith('appPool:'):
|
if parts[0].startswith('appPool:'):
|
||||||
|
logger.debug(f"Skipping appPool entry: {parts[0]}")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Skip mediafs mount entries
|
|
||||||
if '/mnt/pve/mediaf' in parts[0]:
|
if '/mnt/pve/mediaf' in parts[0]:
|
||||||
|
logger.debug(f"Skipping mediafs mount: {parts[0]}")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
mountpoint = parts[5]
|
mountpoint = parts[5]
|
||||||
|
logger.debug(f"Processing mountpoint: {mountpoint}")
|
||||||
|
|
||||||
if parts[1][-1] in 'BKMGT' and parts[2][-1] in 'BKMGT':
|
if parts[1][-1] in 'BKMGT' and parts[2][-1] in 'BKMGT':
|
||||||
total_size = self._convert_size_to_bytes(parts[1])
|
total_size = self._convert_size_to_bytes(parts[1])
|
||||||
used_size = self._convert_size_to_bytes(parts[2])
|
used_size = self._convert_size_to_bytes(parts[2])
|
||||||
usage_percent = float(parts[4].rstrip('%'))
|
usage_percent = float(parts[4].rstrip('%'))
|
||||||
|
|
||||||
|
logger.debug(f"Storage metrics for {mountpoint}:")
|
||||||
|
logger.debug(f" Total: {parts[1]}")
|
||||||
|
logger.debug(f" Used: {parts[2]}")
|
||||||
|
logger.debug(f" Usage: {usage_percent}%")
|
||||||
|
|
||||||
filesystem.update({
|
filesystem.update({
|
||||||
'mountpoint': mountpoint,
|
'mountpoint': mountpoint,
|
||||||
'total': parts[1],
|
'total': parts[1],
|
||||||
@ -1402,10 +1412,12 @@ class SystemHealthMonitor:
|
|||||||
})
|
})
|
||||||
|
|
||||||
if usage_percent >= self.CONFIG['THRESHOLDS']['DISK_CRITICAL']:
|
if usage_percent >= self.CONFIG['THRESHOLDS']['DISK_CRITICAL']:
|
||||||
|
logger.debug(f"CRITICAL: Storage usage {usage_percent}% exceeds critical threshold")
|
||||||
lxc_health['status'] = 'CRITICAL'
|
lxc_health['status'] = 'CRITICAL'
|
||||||
issue = f"LXC {vmid} critical storage usage: {usage_percent}% on {mountpoint}"
|
issue = f"LXC {vmid} critical storage usage: {usage_percent}% on {mountpoint}"
|
||||||
lxc_health['issues'].append(issue)
|
lxc_health['issues'].append(issue)
|
||||||
elif usage_percent >= self.CONFIG['THRESHOLDS']['DISK_WARNING']:
|
elif usage_percent >= self.CONFIG['THRESHOLDS']['DISK_WARNING']:
|
||||||
|
logger.debug(f"WARNING: Storage usage {usage_percent}% exceeds warning threshold")
|
||||||
if lxc_health['status'] != 'CRITICAL':
|
if lxc_health['status'] != 'CRITICAL':
|
||||||
lxc_health['status'] = 'WARNING'
|
lxc_health['status'] = 'WARNING'
|
||||||
issue = f"LXC {vmid} high storage usage: {usage_percent}% on {mountpoint}"
|
issue = f"LXC {vmid} high storage usage: {usage_percent}% on {mountpoint}"
|
||||||
@ -1413,29 +1425,26 @@ class SystemHealthMonitor:
|
|||||||
|
|
||||||
container_info['filesystems'].append(filesystem)
|
container_info['filesystems'].append(filesystem)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
if self.dry_run:
|
|
||||||
logger.debug(f"Error processing filesystem line for container {vmid}: {str(e)}")
|
logger.debug(f"Error processing filesystem line for container {vmid}: {str(e)}")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
lxc_health['containers'].append(container_info)
|
lxc_health['containers'].append(container_info)
|
||||||
|
logger.debug(f"Added container info for VMID {vmid}")
|
||||||
|
|
||||||
if self.dry_run:
|
|
||||||
logger.debug("=== LXC Storage Check Summary ===")
|
logger.debug("=== LXC Storage Check Summary ===")
|
||||||
logger.debug(f"Status: {lxc_health['status']}")
|
logger.debug(f"Status: {lxc_health['status']}")
|
||||||
logger.debug(f"Total containers checked: {len(lxc_health['containers'])}")
|
logger.debug(f"Total containers checked: {len(lxc_health['containers'])}")
|
||||||
logger.debug(f"Issues found: {len(lxc_health['issues'])}")
|
logger.debug(f"Issues found: {len(lxc_health['issues'])}")
|
||||||
logger.debug("=== End LXC Storage Check ===\n")
|
logger.debug("=== End LXC Storage Check ===")
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
logger.debug(f"Critical error during LXC storage check: {str(e)}")
|
||||||
lxc_health['status'] = 'ERROR'
|
lxc_health['status'] = 'ERROR'
|
||||||
error_msg = f"Error checking LXC storage: {str(e)}"
|
error_msg = f"Error checking LXC storage: {str(e)}"
|
||||||
lxc_health['issues'].append(error_msg)
|
lxc_health['issues'].append(error_msg)
|
||||||
if self.dry_run:
|
|
||||||
logger.debug(f"Error during LXC storage check: {error_msg}")
|
|
||||||
|
|
||||||
return lxc_health
|
return lxc_health
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
parser = argparse.ArgumentParser(description="System Health Monitor")
|
parser = argparse.ArgumentParser(description="System Health Monitor")
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
|
|||||||
Reference in New Issue
Block a user