Security and reliability fixes: input validation, logging, job cleanup

- C5: Validate host_ip (IPv4 check) and iface (allowlist regex) before SSH command builder
- H6: Upgrade Pulse failure logging from debug to error so operators see outages
- M6: Replace per-request O(n) purge with background daemon thread (runs every 2 min)
- M7: Background thread marks jobs stuck in 'running' > 5 min as errored

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-03-12 17:30:50 -04:00
parent b1dd5f9cad
commit 0335845101
2 changed files with 34 additions and 11 deletions

View File

@@ -261,7 +261,7 @@ class PulseClient:
execution_id = resp.json()['execution_id']
self.last_execution_id = execution_id
except Exception as e:
logger.debug(f'Pulse command submit failed: {e}')
logger.error(f'Pulse command submit failed: {e}')
return None
deadline = time.time() + self.timeout
@@ -284,7 +284,7 @@ class PulseClient:
if status == 'failed':
return None
except Exception as e:
logger.debug(f'Pulse poll failed: {e}')
logger.error(f'Pulse poll failed: {e}')
logger.warning(f'Pulse command timed out after {self.timeout}s')
return None
@@ -340,7 +340,7 @@ class LinkStatsCollector:
)
output = self.pulse.run_command(ssh_cmd)
if output is None:
logger.debug(f'Pulse ethtool collection returned None for {ip}')
logger.error(f'Pulse ethtool collection returned None for {ip}')
return {}
return self._parse_ssh_output(output)