arch+security: route all server contact through Pulse, harden SSH
Lint / Python (flake8) (push) Failing after 43s
Lint / JS (eslint) (push) Successful in 8s
Security / Python Security (bandit) (push) Successful in 1m4s
Test / Python Tests (pytest) (push) Failing after 1m5s
Lint / Notify on failure (push) Successful in 2s
Lint / Deploy (push) Has been skipped
Lint / Python (flake8) (push) Failing after 43s
Lint / JS (eslint) (push) Successful in 8s
Security / Python Security (bandit) (push) Successful in 1m4s
Test / Python Tests (pytest) (push) Failing after 1m5s
Lint / Notify on failure (push) Successful in 2s
Lint / Deploy (push) Has been skipped
Architecture: - Remove direct subprocess ping from Gandalf; add PulseClient.ping() which runs the ping via the Pulse worker instead - Remove standalone ping() function and subprocess import from monitor.py - Add self.pulse alias to NetworkMonitor for convenience - Both _process_ping_hosts() and snapshot builder now use self.pulse.ping() Security: - Change StrictHostKeyChecking=no → accept-new in both SSH command builders (monitor.py _ssh_batch, diagnose.py build_ssh_command). The Pulse worker's known_hosts is now authoritative; host keys are recorded on first connection and verified on all subsequent ones. MITM attacks after initial key exchange are now detectable. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
+1
-1
@@ -75,7 +75,7 @@ class DiagnosticsRunner:
|
|||||||
)
|
)
|
||||||
|
|
||||||
return (
|
return (
|
||||||
f'ssh -o StrictHostKeyChecking=no -o ConnectTimeout=5 '
|
f'ssh -o StrictHostKeyChecking=accept-new -o ConnectTimeout=5 '
|
||||||
f'-o BatchMode=yes -o LogLevel=ERROR '
|
f'-o BatchMode=yes -o LogLevel=ERROR '
|
||||||
f'-o ServerAliveInterval=10 -o ServerAliveCountMax=2 '
|
f'-o ServerAliveInterval=10 -o ServerAliveCountMax=2 '
|
||||||
f'root@{ip_q} \'{remote_cmd}\''
|
f'root@{ip_q} \'{remote_cmd}\''
|
||||||
|
|||||||
+12
-17
@@ -11,7 +11,6 @@ import json
|
|||||||
import logging
|
import logging
|
||||||
import re
|
import re
|
||||||
import shlex
|
import shlex
|
||||||
import subprocess
|
|
||||||
import time
|
import time
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from typing import Dict, List, Optional
|
from typing import Dict, List, Optional
|
||||||
@@ -315,6 +314,14 @@ class PulseClient:
|
|||||||
return self.run_command(command, _retry=False)
|
return self.run_command(command, _retry=False)
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
def ping(self, ip: str, count: int = 3, timeout: int = 2) -> bool:
|
||||||
|
"""Ping *ip* via the Pulse worker. Returns True if host responds."""
|
||||||
|
ip_q = shlex.quote(ip)
|
||||||
|
output = self.run_command(
|
||||||
|
f'ping -c {count} -W {timeout} {ip_q} >/dev/null 2>&1 && echo REACHABLE || echo UNREACHABLE'
|
||||||
|
)
|
||||||
|
return output is not None and output.strip() == 'REACHABLE'
|
||||||
|
|
||||||
|
|
||||||
# --------------------------------------------------------------------------
|
# --------------------------------------------------------------------------
|
||||||
# Link stats collector (ethtool + Prometheus traffic metrics)
|
# Link stats collector (ethtool + Prometheus traffic metrics)
|
||||||
@@ -363,7 +370,7 @@ class LinkStatsCollector:
|
|||||||
shell_cmd = ' '.join(parts)
|
shell_cmd = ' '.join(parts)
|
||||||
|
|
||||||
ssh_cmd = (
|
ssh_cmd = (
|
||||||
f'ssh -o StrictHostKeyChecking=no -o ConnectTimeout=5 '
|
f'ssh -o StrictHostKeyChecking=accept-new -o ConnectTimeout=5 '
|
||||||
f'-o BatchMode=yes -o LogLevel=ERROR '
|
f'-o BatchMode=yes -o LogLevel=ERROR '
|
||||||
f'-o ServerAliveInterval=10 -o ServerAliveCountMax=2 '
|
f'-o ServerAliveInterval=10 -o ServerAliveCountMax=2 '
|
||||||
f'root@{ip} "{shell_cmd}"'
|
f'root@{ip} "{shell_cmd}"'
|
||||||
@@ -638,19 +645,6 @@ class LinkStatsCollector:
|
|||||||
# --------------------------------------------------------------------------
|
# --------------------------------------------------------------------------
|
||||||
# Helpers
|
# Helpers
|
||||||
# --------------------------------------------------------------------------
|
# --------------------------------------------------------------------------
|
||||||
def ping(ip: str, count: int = 3, timeout: int = 2) -> bool:
|
|
||||||
try:
|
|
||||||
r = subprocess.run(
|
|
||||||
['ping', '-c', str(count), '-W', str(timeout), ip],
|
|
||||||
stdout=subprocess.DEVNULL,
|
|
||||||
stderr=subprocess.DEVNULL,
|
|
||||||
timeout=30,
|
|
||||||
)
|
|
||||||
return r.returncode == 0
|
|
||||||
except Exception:
|
|
||||||
return False
|
|
||||||
|
|
||||||
|
|
||||||
def _now_utc() -> str:
|
def _now_utc() -> str:
|
||||||
return datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S UTC')
|
return datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S UTC')
|
||||||
|
|
||||||
@@ -671,6 +665,7 @@ class NetworkMonitor:
|
|||||||
self.unifi = UnifiClient(self.cfg['unifi'])
|
self.unifi = UnifiClient(self.cfg['unifi'])
|
||||||
self.tickets = TicketClient(self.cfg.get('ticket_api', {}))
|
self.tickets = TicketClient(self.cfg.get('ticket_api', {}))
|
||||||
self.link_stats = LinkStatsCollector(self.cfg, self.prom, self.unifi)
|
self.link_stats = LinkStatsCollector(self.cfg, self.prom, self.unifi)
|
||||||
|
self.pulse = self.link_stats.pulse # convenience alias
|
||||||
|
|
||||||
mon = self.cfg.get('monitor', {})
|
mon = self.cfg.get('monitor', {})
|
||||||
self.poll_interval = mon.get('poll_interval', 120)
|
self.poll_interval = mon.get('poll_interval', 120)
|
||||||
@@ -838,7 +833,7 @@ class NetworkMonitor:
|
|||||||
def _process_ping_hosts(self, suppressions: list) -> None:
|
def _process_ping_hosts(self, suppressions: list) -> None:
|
||||||
for h in self.cfg.get('monitor', {}).get('ping_hosts', []):
|
for h in self.cfg.get('monitor', {}).get('ping_hosts', []):
|
||||||
name, ip = h['name'], h['ip']
|
name, ip = h['name'], h['ip']
|
||||||
reachable = ping(ip)
|
reachable = self.pulse.ping(ip)
|
||||||
|
|
||||||
if not reachable:
|
if not reachable:
|
||||||
sup = db.check_suppressed(suppressions, 'host', name)
|
sup = db.check_suppressed(suppressions, 'host', name)
|
||||||
@@ -908,7 +903,7 @@ class NetworkMonitor:
|
|||||||
|
|
||||||
for h in self.cfg.get('monitor', {}).get('ping_hosts', []):
|
for h in self.cfg.get('monitor', {}).get('ping_hosts', []):
|
||||||
name, ip = h['name'], h['ip']
|
name, ip = h['name'], h['ip']
|
||||||
reachable = ping(ip, count=1, timeout=2)
|
reachable = self.pulse.ping(ip, count=1, timeout=2)
|
||||||
hosts[name] = {
|
hosts[name] = {
|
||||||
'ip': ip,
|
'ip': ip,
|
||||||
'interfaces': {},
|
'interfaces': {},
|
||||||
|
|||||||
Reference in New Issue
Block a user