Fix ping-only hosts polled twice per cycle with inconsistent parameters
Lint / Python (flake8) (push) Successful in 57s
Lint / JS (eslint) (push) Successful in 28s
Security / Python Security (bandit) (push) Successful in 1m14s
Lint / Notify on failure (push) Has been skipped
Lint / Deploy (push) Successful in 7s
Test / Python Tests (pytest) (push) Failing after 13m52s

_collect_snapshot called pulse.ping(count=1) independently from
_process_ping_hosts which called pulse.ping(count=3). This doubled
network load and could show a host as 'up' in the dashboard while
simultaneously firing an 'unreachable' alert, or vice versa.

Now ping_states is computed once in run() using the alert-quality
parameters (count=3) and shared by both snapshot and alert processing.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-05-13 23:13:43 -04:00
parent 31747c4bd3
commit a34898b8e8
+13 -6
View File
@@ -837,10 +837,10 @@ class NetworkMonitor:
# ------------------------------------------------------------------
# Ping-only hosts (no node_exporter)
# ------------------------------------------------------------------
def _process_ping_hosts(self, suppressions: list) -> None:
def _process_ping_hosts(self, suppressions: list, ping_states: Dict[str, bool]) -> None:
for h in self.cfg.get('monitor', {}).get('ping_hosts', []):
name, ip = h['name'], h['ip']
reachable = self.pulse.ping(ip)
reachable = ping_states.get(name, False)
if not reachable:
sup = db.check_suppressed(suppressions, 'host', name)
@@ -882,6 +882,7 @@ class NetworkMonitor:
def _collect_snapshot(
self, iface_states: Dict[str, Dict[str, bool]],
unifi_devices: Optional[List[dict]] = None,
ping_states: Optional[Dict[str, bool]] = None,
) -> dict:
# Accept pre-fetched devices; fall back to empty list if unavailable
display_unifi = unifi_devices if unifi_devices is not None else []
@@ -910,7 +911,7 @@ class NetworkMonitor:
for h in self.cfg.get('monitor', {}).get('ping_hosts', []):
name, ip = h['name'], h['ip']
reachable = self.pulse.ping(ip, count=1, timeout=2)
reachable = (ping_states or {}).get(name, False)
hosts[name] = {
'ip': ip,
'interfaces': {},
@@ -942,8 +943,14 @@ class NetworkMonitor:
# 2. Fetch UniFi devices once — used by both snapshot and alert processing
unifi_devices = self.unifi.get_devices()
# 3. Collect and store snapshot for dashboard
snapshot = self._collect_snapshot(iface_states, unifi_devices)
# 3a. Ping-only hosts once — shared by snapshot and alert processing
ping_states: Dict[str, bool] = {
h['name']: self.pulse.ping(h['ip'])
for h in self.cfg.get('monitor', {}).get('ping_hosts', [])
}
# 3b. Collect and store snapshot for dashboard
snapshot = self._collect_snapshot(iface_states, unifi_devices, ping_states)
db.set_state('network_snapshot', snapshot)
db.set_state('last_check', _now_utc())
@@ -959,7 +966,7 @@ class NetworkMonitor:
self._process_interfaces(iface_states, suppressions)
self._process_unifi(unifi_devices, suppressions)
self._process_ping_hosts(suppressions)
self._process_ping_hosts(suppressions, ping_states)
# Housekeeping: deactivate expired suppressions and purge old resolved events
db.cleanup_expired_suppressions()