From 38297e616f945e86bf6096ccad43ecde6d27ff54 Mon Sep 17 00:00:00 2001 From: Jared Vititoe Date: Sun, 10 May 2026 23:58:16 -0400 Subject: [PATCH] arch+security: route all server contact through Pulse, harden SSH MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Architecture: - Remove direct subprocess ping from Gandalf; add PulseClient.ping() which runs the ping via the Pulse worker instead - Remove standalone ping() function and subprocess import from monitor.py - Add self.pulse alias to NetworkMonitor for convenience - Both _process_ping_hosts() and snapshot builder now use self.pulse.ping() Security: - Change StrictHostKeyChecking=no → accept-new in both SSH command builders (monitor.py _ssh_batch, diagnose.py build_ssh_command). The Pulse worker's known_hosts is now authoritative; host keys are recorded on first connection and verified on all subsequent ones. MITM attacks after initial key exchange are now detectable. Co-Authored-By: Claude Sonnet 4.6 --- diagnose.py | 2 +- monitor.py | 29 ++++++++++++----------------- 2 files changed, 13 insertions(+), 18 deletions(-) diff --git a/diagnose.py b/diagnose.py index e2140c3..77552d0 100644 --- a/diagnose.py +++ b/diagnose.py @@ -75,7 +75,7 @@ class DiagnosticsRunner: ) return ( - f'ssh -o StrictHostKeyChecking=no -o ConnectTimeout=5 ' + f'ssh -o StrictHostKeyChecking=accept-new -o ConnectTimeout=5 ' f'-o BatchMode=yes -o LogLevel=ERROR ' f'-o ServerAliveInterval=10 -o ServerAliveCountMax=2 ' f'root@{ip_q} \'{remote_cmd}\'' diff --git a/monitor.py b/monitor.py index b91895a..9d287ec 100644 --- a/monitor.py +++ b/monitor.py @@ -11,7 +11,6 @@ import json import logging import re import shlex -import subprocess import time from datetime import datetime from typing import Dict, List, Optional @@ -315,6 +314,14 @@ class PulseClient: return self.run_command(command, _retry=False) return None + def ping(self, ip: str, count: int = 3, timeout: int = 2) -> bool: + """Ping *ip* via the Pulse worker. Returns True if host responds.""" + ip_q = shlex.quote(ip) + output = self.run_command( + f'ping -c {count} -W {timeout} {ip_q} >/dev/null 2>&1 && echo REACHABLE || echo UNREACHABLE' + ) + return output is not None and output.strip() == 'REACHABLE' + # -------------------------------------------------------------------------- # Link stats collector (ethtool + Prometheus traffic metrics) @@ -363,7 +370,7 @@ class LinkStatsCollector: shell_cmd = ' '.join(parts) ssh_cmd = ( - f'ssh -o StrictHostKeyChecking=no -o ConnectTimeout=5 ' + f'ssh -o StrictHostKeyChecking=accept-new -o ConnectTimeout=5 ' f'-o BatchMode=yes -o LogLevel=ERROR ' f'-o ServerAliveInterval=10 -o ServerAliveCountMax=2 ' f'root@{ip} "{shell_cmd}"' @@ -638,19 +645,6 @@ class LinkStatsCollector: # -------------------------------------------------------------------------- # Helpers # -------------------------------------------------------------------------- -def ping(ip: str, count: int = 3, timeout: int = 2) -> bool: - try: - r = subprocess.run( - ['ping', '-c', str(count), '-W', str(timeout), ip], - stdout=subprocess.DEVNULL, - stderr=subprocess.DEVNULL, - timeout=30, - ) - return r.returncode == 0 - except Exception: - return False - - def _now_utc() -> str: return datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S UTC') @@ -671,6 +665,7 @@ class NetworkMonitor: self.unifi = UnifiClient(self.cfg['unifi']) self.tickets = TicketClient(self.cfg.get('ticket_api', {})) self.link_stats = LinkStatsCollector(self.cfg, self.prom, self.unifi) + self.pulse = self.link_stats.pulse # convenience alias mon = self.cfg.get('monitor', {}) self.poll_interval = mon.get('poll_interval', 120) @@ -838,7 +833,7 @@ class NetworkMonitor: def _process_ping_hosts(self, suppressions: list) -> None: for h in self.cfg.get('monitor', {}).get('ping_hosts', []): name, ip = h['name'], h['ip'] - reachable = ping(ip) + reachable = self.pulse.ping(ip) if not reachable: sup = db.check_suppressed(suppressions, 'host', name) @@ -908,7 +903,7 @@ class NetworkMonitor: for h in self.cfg.get('monitor', {}).get('ping_hosts', []): name, ip = h['name'], h['ip'] - reachable = ping(ip, count=1, timeout=2) + reachable = self.pulse.ping(ip, count=1, timeout=2) hosts[name] = { 'ip': ip, 'interfaces': {},