arch+security: route all server contact through Pulse, harden SSH
Lint / Python (flake8) (push) Failing after 43s
Lint / JS (eslint) (push) Successful in 8s
Security / Python Security (bandit) (push) Successful in 1m4s
Test / Python Tests (pytest) (push) Failing after 1m5s
Lint / Notify on failure (push) Successful in 2s
Lint / Deploy (push) Has been skipped
Lint / Python (flake8) (push) Failing after 43s
Lint / JS (eslint) (push) Successful in 8s
Security / Python Security (bandit) (push) Successful in 1m4s
Test / Python Tests (pytest) (push) Failing after 1m5s
Lint / Notify on failure (push) Successful in 2s
Lint / Deploy (push) Has been skipped
Architecture: - Remove direct subprocess ping from Gandalf; add PulseClient.ping() which runs the ping via the Pulse worker instead - Remove standalone ping() function and subprocess import from monitor.py - Add self.pulse alias to NetworkMonitor for convenience - Both _process_ping_hosts() and snapshot builder now use self.pulse.ping() Security: - Change StrictHostKeyChecking=no → accept-new in both SSH command builders (monitor.py _ssh_batch, diagnose.py build_ssh_command). The Pulse worker's known_hosts is now authoritative; host keys are recorded on first connection and verified on all subsequent ones. MITM attacks after initial key exchange are now detectable. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
+1
-1
@@ -75,7 +75,7 @@ class DiagnosticsRunner:
|
||||
)
|
||||
|
||||
return (
|
||||
f'ssh -o StrictHostKeyChecking=no -o ConnectTimeout=5 '
|
||||
f'ssh -o StrictHostKeyChecking=accept-new -o ConnectTimeout=5 '
|
||||
f'-o BatchMode=yes -o LogLevel=ERROR '
|
||||
f'-o ServerAliveInterval=10 -o ServerAliveCountMax=2 '
|
||||
f'root@{ip_q} \'{remote_cmd}\''
|
||||
|
||||
+12
-17
@@ -11,7 +11,6 @@ import json
|
||||
import logging
|
||||
import re
|
||||
import shlex
|
||||
import subprocess
|
||||
import time
|
||||
from datetime import datetime
|
||||
from typing import Dict, List, Optional
|
||||
@@ -315,6 +314,14 @@ class PulseClient:
|
||||
return self.run_command(command, _retry=False)
|
||||
return None
|
||||
|
||||
def ping(self, ip: str, count: int = 3, timeout: int = 2) -> bool:
|
||||
"""Ping *ip* via the Pulse worker. Returns True if host responds."""
|
||||
ip_q = shlex.quote(ip)
|
||||
output = self.run_command(
|
||||
f'ping -c {count} -W {timeout} {ip_q} >/dev/null 2>&1 && echo REACHABLE || echo UNREACHABLE'
|
||||
)
|
||||
return output is not None and output.strip() == 'REACHABLE'
|
||||
|
||||
|
||||
# --------------------------------------------------------------------------
|
||||
# Link stats collector (ethtool + Prometheus traffic metrics)
|
||||
@@ -363,7 +370,7 @@ class LinkStatsCollector:
|
||||
shell_cmd = ' '.join(parts)
|
||||
|
||||
ssh_cmd = (
|
||||
f'ssh -o StrictHostKeyChecking=no -o ConnectTimeout=5 '
|
||||
f'ssh -o StrictHostKeyChecking=accept-new -o ConnectTimeout=5 '
|
||||
f'-o BatchMode=yes -o LogLevel=ERROR '
|
||||
f'-o ServerAliveInterval=10 -o ServerAliveCountMax=2 '
|
||||
f'root@{ip} "{shell_cmd}"'
|
||||
@@ -638,19 +645,6 @@ class LinkStatsCollector:
|
||||
# --------------------------------------------------------------------------
|
||||
# Helpers
|
||||
# --------------------------------------------------------------------------
|
||||
def ping(ip: str, count: int = 3, timeout: int = 2) -> bool:
|
||||
try:
|
||||
r = subprocess.run(
|
||||
['ping', '-c', str(count), '-W', str(timeout), ip],
|
||||
stdout=subprocess.DEVNULL,
|
||||
stderr=subprocess.DEVNULL,
|
||||
timeout=30,
|
||||
)
|
||||
return r.returncode == 0
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
|
||||
def _now_utc() -> str:
|
||||
return datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S UTC')
|
||||
|
||||
@@ -671,6 +665,7 @@ class NetworkMonitor:
|
||||
self.unifi = UnifiClient(self.cfg['unifi'])
|
||||
self.tickets = TicketClient(self.cfg.get('ticket_api', {}))
|
||||
self.link_stats = LinkStatsCollector(self.cfg, self.prom, self.unifi)
|
||||
self.pulse = self.link_stats.pulse # convenience alias
|
||||
|
||||
mon = self.cfg.get('monitor', {})
|
||||
self.poll_interval = mon.get('poll_interval', 120)
|
||||
@@ -838,7 +833,7 @@ class NetworkMonitor:
|
||||
def _process_ping_hosts(self, suppressions: list) -> None:
|
||||
for h in self.cfg.get('monitor', {}).get('ping_hosts', []):
|
||||
name, ip = h['name'], h['ip']
|
||||
reachable = ping(ip)
|
||||
reachable = self.pulse.ping(ip)
|
||||
|
||||
if not reachable:
|
||||
sup = db.check_suppressed(suppressions, 'host', name)
|
||||
@@ -908,7 +903,7 @@ class NetworkMonitor:
|
||||
|
||||
for h in self.cfg.get('monitor', {}).get('ping_hosts', []):
|
||||
name, ip = h['name'], h['ip']
|
||||
reachable = ping(ip, count=1, timeout=2)
|
||||
reachable = self.pulse.ping(ip, count=1, timeout=2)
|
||||
hosts[name] = {
|
||||
'ip': ip,
|
||||
'interfaces': {},
|
||||
|
||||
Reference in New Issue
Block a user