Fix ping-only hosts polled twice per cycle with inconsistent parameters
Lint / Python (flake8) (push) Successful in 57s
Lint / JS (eslint) (push) Successful in 28s
Security / Python Security (bandit) (push) Successful in 1m14s
Lint / Notify on failure (push) Has been skipped
Lint / Deploy (push) Successful in 7s
Test / Python Tests (pytest) (push) Failing after 13m52s
Lint / Python (flake8) (push) Successful in 57s
Lint / JS (eslint) (push) Successful in 28s
Security / Python Security (bandit) (push) Successful in 1m14s
Lint / Notify on failure (push) Has been skipped
Lint / Deploy (push) Successful in 7s
Test / Python Tests (pytest) (push) Failing after 13m52s
_collect_snapshot called pulse.ping(count=1) independently from _process_ping_hosts which called pulse.ping(count=3). This doubled network load and could show a host as 'up' in the dashboard while simultaneously firing an 'unreachable' alert, or vice versa. Now ping_states is computed once in run() using the alert-quality parameters (count=3) and shared by both snapshot and alert processing. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
+13
-6
@@ -837,10 +837,10 @@ class NetworkMonitor:
|
||||
# ------------------------------------------------------------------
|
||||
# Ping-only hosts (no node_exporter)
|
||||
# ------------------------------------------------------------------
|
||||
def _process_ping_hosts(self, suppressions: list) -> None:
|
||||
def _process_ping_hosts(self, suppressions: list, ping_states: Dict[str, bool]) -> None:
|
||||
for h in self.cfg.get('monitor', {}).get('ping_hosts', []):
|
||||
name, ip = h['name'], h['ip']
|
||||
reachable = self.pulse.ping(ip)
|
||||
reachable = ping_states.get(name, False)
|
||||
|
||||
if not reachable:
|
||||
sup = db.check_suppressed(suppressions, 'host', name)
|
||||
@@ -882,6 +882,7 @@ class NetworkMonitor:
|
||||
def _collect_snapshot(
|
||||
self, iface_states: Dict[str, Dict[str, bool]],
|
||||
unifi_devices: Optional[List[dict]] = None,
|
||||
ping_states: Optional[Dict[str, bool]] = None,
|
||||
) -> dict:
|
||||
# Accept pre-fetched devices; fall back to empty list if unavailable
|
||||
display_unifi = unifi_devices if unifi_devices is not None else []
|
||||
@@ -910,7 +911,7 @@ class NetworkMonitor:
|
||||
|
||||
for h in self.cfg.get('monitor', {}).get('ping_hosts', []):
|
||||
name, ip = h['name'], h['ip']
|
||||
reachable = self.pulse.ping(ip, count=1, timeout=2)
|
||||
reachable = (ping_states or {}).get(name, False)
|
||||
hosts[name] = {
|
||||
'ip': ip,
|
||||
'interfaces': {},
|
||||
@@ -942,8 +943,14 @@ class NetworkMonitor:
|
||||
# 2. Fetch UniFi devices once — used by both snapshot and alert processing
|
||||
unifi_devices = self.unifi.get_devices()
|
||||
|
||||
# 3. Collect and store snapshot for dashboard
|
||||
snapshot = self._collect_snapshot(iface_states, unifi_devices)
|
||||
# 3a. Ping-only hosts once — shared by snapshot and alert processing
|
||||
ping_states: Dict[str, bool] = {
|
||||
h['name']: self.pulse.ping(h['ip'])
|
||||
for h in self.cfg.get('monitor', {}).get('ping_hosts', [])
|
||||
}
|
||||
|
||||
# 3b. Collect and store snapshot for dashboard
|
||||
snapshot = self._collect_snapshot(iface_states, unifi_devices, ping_states)
|
||||
db.set_state('network_snapshot', snapshot)
|
||||
db.set_state('last_check', _now_utc())
|
||||
|
||||
@@ -959,7 +966,7 @@ class NetworkMonitor:
|
||||
self._process_interfaces(iface_states, suppressions)
|
||||
self._process_unifi(unifi_devices, suppressions)
|
||||
|
||||
self._process_ping_hosts(suppressions)
|
||||
self._process_ping_hosts(suppressions, ping_states)
|
||||
|
||||
# Housekeeping: deactivate expired suppressions and purge old resolved events
|
||||
db.cleanup_expired_suppressions()
|
||||
|
||||
Reference in New Issue
Block a user