Compare commits

...

5 Commits

Author SHA1 Message Date
jared 222bdb08ab Fix suppression annotation for interface_down not checking host-level rules
Lint / Python (flake8) (push) Successful in 38s
Lint / JS (eslint) (push) Successful in 7s
Security / Python Security (bandit) (push) Successful in 39s
Test / Python Tests (pytest) (push) Successful in 1m5s
Lint / Notify on failure (push) Has been skipped
Lint / Deploy (push) Successful in 4s
monitor.py checks both 'interface' and 'host' suppressions for interface_down
events, but _annotate_suppressions only checked 'interface'. A host-level
suppression would silently suppress tickets but not mark the table row as
suppressed in the UI.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-13 13:14:46 -04:00
jared 8dd744b039 Show suppressed badge on host cards during global maintenance windows
Lint / Python (flake8) (push) Successful in 40s
Lint / JS (eslint) (push) Successful in 7s
Security / Python Security (bandit) (push) Successful in 38s
Test / Python Tests (pytest) (push) Successful in 52s
Lint / Notify on failure (push) Has been skipped
Lint / Deploy (push) Successful in 3s
Global suppressions (target_type='all') have an empty target_name, so
the selectattr filter never matched them, leaving no visual indicator
when a global maintenance window was active. Pre-compute has_global_sup
before the host loop and OR it into the badge condition.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-13 13:12:25 -04:00
jared 9e2be150b5 Use grep -F in dmesg filter to prevent interface name treated as regex
Lint / Python (flake8) (push) Successful in 38s
Lint / JS (eslint) (push) Failing after 13s
Security / Python Security (bandit) (push) Successful in 42s
Test / Python Tests (pytest) (push) Successful in 50s
Lint / Notify on failure (push) Successful in 2s
Lint / Deploy (push) Has been skipped
grep {iface} treats dots and other special chars as regex metacharacters.
Switch to grep -F -- {iface} for fixed-string matching and to prevent
a leading dash in the interface name from being parsed as a grep flag.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-13 11:12:02 -04:00
jared ed5ba5c59e Remove unused is_new parameter from ticket helper methods
After fixing the is_new guard bug, is_new is no longer used inside
_ticket_interface, _ticket_unifi, or _ticket_unreachable. Drop it from
their signatures and call sites.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-13 11:10:32 -04:00
jared 2be44d8b24 Fix ticket_id never stored when fail_thresh>1; guard sessionStorage JSON.parse
Lint / Python (flake8) (push) Successful in 45s
Lint / JS (eslint) (push) Successful in 8s
Security / Python Security (bandit) (push) Successful in 43s
Test / Python Tests (pytest) (push) Successful in 51s
Lint / Notify on failure (push) Has been skipped
Lint / Deploy (push) Successful in 3s
monitor.py: _ticket_interface/_ticket_unifi/_ticket_unreachable all used
`if tid and is_new` to guard db.set_ticket_id(). Since is_new is True only
on the first upsert (consec=1) but tickets are created at consec>=fail_thresh
(default 2), is_new is always False when the ticket is created, so the
ticket link never appeared in the UI. Changed to `if tid:`.

links.html: JSON.parse(sessionStorage.getItem(...)) in togglePanel and
restoreCollapseState had no try-catch. Corrupt/stale session storage would
throw an uncaught SyntaxError. Also wrapped all sessionStorage.setItem
calls in try-catch to defend against storage-full / private-browsing errors.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-11 23:45:20 -04:00
5 changed files with 42 additions and 28 deletions
+19 -10
View File
@@ -174,17 +174,26 @@ _PAGE_LIMIT = 200 # max events returned per request
def _annotate_suppressions(events: list, suppressions: list) -> None: def _annotate_suppressions(events: list, suppressions: list) -> None:
"""Annotate each event dict in-place with an is_suppressed bool.""" """Annotate each event dict in-place with an is_suppressed bool.
Mirrors the suppression check order in monitor.py exactly:
interface_down → interface OR host
unifi_device_* → unifi_device
everything else → host
"""
for ev in events: for ev in events:
sup_type = ( etype = ev.get('event_type', '')
'unifi_device' if ev.get('event_type') == 'unifi_device_offline' name = ev.get('target_name', '')
else 'interface' if ev.get('event_type') == 'interface_down' detail = ev.get('target_detail', '') or ''
else 'host' if etype == 'interface_down':
) ev['is_suppressed'] = (
ev['is_suppressed'] = db.check_suppressed( db.check_suppressed(suppressions, 'interface', name, detail) or
suppressions, sup_type, db.check_suppressed(suppressions, 'host', name)
ev.get('target_name', ''), ev.get('target_detail', '') or '', )
) elif etype == 'unifi_device_offline':
ev['is_suppressed'] = db.check_suppressed(suppressions, 'unifi_device', name, detail)
else:
ev['is_suppressed'] = db.check_suppressed(suppressions, 'host', name, detail)
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
+1 -1
View File
@@ -68,7 +68,7 @@ class DiagnosticsRunner:
f' echo "=== ip_route ===";' f' echo "=== ip_route ===";'
f' ip route show dev {q} 2>/dev/null;' f' ip route show dev {q} 2>/dev/null;'
f' echo "=== dmesg ===";' f' echo "=== dmesg ===";'
f' dmesg 2>/dev/null | grep {q} | tail -50;' f' dmesg 2>/dev/null | grep -F -- {q} | tail -50;'
f' echo "=== lldpctl ===";' f' echo "=== lldpctl ===";'
f' lldpctl 2>/dev/null || echo "lldpd not running";' f' lldpctl 2>/dev/null || echo "lldpd not running";'
f' echo "=== end ==="' f' echo "=== end ==="'
+9 -9
View File
@@ -734,7 +734,7 @@ class NetworkMonitor:
f'Interface {iface} on {host} went link-down ({_now_utc()})', f'Interface {iface} on {host} went link-down ({_now_utc()})',
) )
if not sup and consec >= self.fail_thresh: if not sup and consec >= self.fail_thresh:
self._ticket_interface(event_id, is_new, host, iface, consec) self._ticket_interface(event_id, host, iface, consec)
if host_has_regression: if host_has_regression:
hosts_with_regression.append(host) hosts_with_regression.append(host)
@@ -771,7 +771,7 @@ class NetworkMonitor:
db.resolve_event('cluster_network_issue', self.cluster_name, '') db.resolve_event('cluster_network_issue', self.cluster_name, '')
def _ticket_interface( def _ticket_interface(
self, event_id: int, is_new: bool, host: str, iface: str, consec: int self, event_id: int, host: str, iface: str, consec: int
) -> None: ) -> None:
title = ( title = (
f'[{host}][auto][production][issue][network][single-node] ' f'[{host}][auto][production][issue][network][single-node] '
@@ -789,7 +789,7 @@ class NetworkMonitor:
f'Please inspect the cable/SFP/switch port for {host}/{iface}.' f'Please inspect the cable/SFP/switch port for {host}/{iface}.'
) )
tid = self.tickets.create(title, desc, priority='2') tid = self.tickets.create(title, desc, priority='2')
if tid and is_new: if tid:
db.set_ticket_id(event_id, tid) db.set_ticket_id(event_id, tid)
# ------------------------------------------------------------------ # ------------------------------------------------------------------
@@ -810,11 +810,11 @@ class NetworkMonitor:
f'UniFi {name} ({d.get("ip","")}) offline ({_now_utc()})', f'UniFi {name} ({d.get("ip","")}) offline ({_now_utc()})',
) )
if not sup and consec >= self.fail_thresh: if not sup and consec >= self.fail_thresh:
self._ticket_unifi(event_id, is_new, d) self._ticket_unifi(event_id, d)
else: else:
db.resolve_event('unifi_device_offline', name, d.get('type', '')) db.resolve_event('unifi_device_offline', name, d.get('type', ''))
def _ticket_unifi(self, event_id: int, is_new: bool, device: dict) -> None: def _ticket_unifi(self, event_id: int, device: dict) -> None:
name = device['name'] name = device['name']
title = ( title = (
f'[{name}][auto][production][issue][network][single-node] ' f'[{name}][auto][production][issue][network][single-node] '
@@ -831,7 +831,7 @@ class NetworkMonitor:
f'Please check power and cable connectivity.' f'Please check power and cable connectivity.'
) )
tid = self.tickets.create(title, desc, priority='2') tid = self.tickets.create(title, desc, priority='2')
if tid and is_new: if tid:
db.set_ticket_id(event_id, tid) db.set_ticket_id(event_id, tid)
# ------------------------------------------------------------------ # ------------------------------------------------------------------
@@ -850,12 +850,12 @@ class NetworkMonitor:
f'Host {name} ({ip}) unreachable via ping ({_now_utc()})', f'Host {name} ({ip}) unreachable via ping ({_now_utc()})',
) )
if not sup and consec >= self.fail_thresh: if not sup and consec >= self.fail_thresh:
self._ticket_unreachable(event_id, is_new, name, ip, consec) self._ticket_unreachable(event_id, name, ip, consec)
else: else:
db.resolve_event('host_unreachable', name, ip) db.resolve_event('host_unreachable', name, ip)
def _ticket_unreachable( def _ticket_unreachable(
self, event_id: int, is_new: bool, name: str, ip: str, consec: int self, event_id: int, name: str, ip: str, consec: int
) -> None: ) -> None:
title = ( title = (
f'[{name}][auto][production][issue][network][single-node] ' f'[{name}][auto][production][issue][network][single-node] '
@@ -873,7 +873,7 @@ class NetworkMonitor:
f'Please check the host power, management interface, and network connectivity.' f'Please check the host power, management interface, and network connectivity.'
) )
tid = self.tickets.create(title, desc, priority='2') tid = self.tickets.create(title, desc, priority='2')
if tid and is_new: if tid:
db.set_ticket_id(event_id, tid) db.set_ticket_id(event_id, tid)
# ------------------------------------------------------------------ # ------------------------------------------------------------------
+2 -1
View File
@@ -324,6 +324,7 @@
</div> </div>
</div> </div>
<div class="host-grid" id="host-grid"> <div class="host-grid" id="host-grid">
{%- set has_global_sup = suppressions | selectattr('target_type', 'equalto', 'all') | list | length > 0 -%}
{% for name, host in snapshot.hosts.items() %} {% for name, host in snapshot.hosts.items() %}
{% set suppressed = suppressions | selectattr('target_name', 'equalto', name) | list %} {% set suppressed = suppressions | selectattr('target_name', 'equalto', name) | list %}
<div class="host-card host-card-{{ host.status }}" data-host="{{ name }}"> <div class="host-card host-card-{{ host.status }}" data-host="{{ name }}">
@@ -331,7 +332,7 @@
<div class="host-name-row"> <div class="host-name-row">
<span class="host-status-dot dot-{{ host.status }}"></span> <span class="host-status-dot dot-{{ host.status }}"></span>
<span class="host-name">{{ name }}</span> <span class="host-name">{{ name }}</span>
{% if suppressed %} {% if suppressed or has_global_sup %}
<span class="badge-suppressed" title="Suppressed">🔕</span> <span class="badge-suppressed" title="Suppressed">🔕</span>
{% endif %} {% endif %}
</div> </div>
+11 -7
View File
@@ -372,14 +372,16 @@ function togglePanel(panel) {
if (title) title.setAttribute('aria-expanded', isCollapsed ? 'false' : 'true'); if (title) title.setAttribute('aria-expanded', isCollapsed ? 'false' : 'true');
const id = panel.id; const id = panel.id;
if (id) { if (id) {
const collapsed = JSON.parse(sessionStorage.getItem('linksCollapsed') || '{}'); let collapsed = {};
try { collapsed = JSON.parse(sessionStorage.getItem('linksCollapsed') || '{}'); } catch(_) {}
collapsed[id] = panel.classList.contains('collapsed'); collapsed[id] = panel.classList.contains('collapsed');
sessionStorage.setItem('linksCollapsed', JSON.stringify(collapsed)); try { sessionStorage.setItem('linksCollapsed', JSON.stringify(collapsed)); } catch(_) {}
} }
} }
function restoreCollapseState() { function restoreCollapseState() {
const collapsed = JSON.parse(sessionStorage.getItem('linksCollapsed') || '{}'); let collapsed = {};
try { collapsed = JSON.parse(sessionStorage.getItem('linksCollapsed') || '{}'); } catch(_) {}
for (const [id, isCollapsed] of Object.entries(collapsed)) { for (const [id, isCollapsed] of Object.entries(collapsed)) {
const panel = document.getElementById(id); const panel = document.getElementById(id);
if (!panel) continue; if (!panel) continue;
@@ -507,9 +509,11 @@ function collapseAll() {
if (btn) btn.textContent = '[+]'; if (btn) btn.textContent = '[+]';
if (title) title.setAttribute('aria-expanded', 'false'); if (title) title.setAttribute('aria-expanded', 'false');
}); });
sessionStorage.setItem('linksCollapsed', JSON.stringify( try {
Object.fromEntries([...document.querySelectorAll('.link-host-panel')].map(p => [p.id, true])) sessionStorage.setItem('linksCollapsed', JSON.stringify(
)); Object.fromEntries([...document.querySelectorAll('.link-host-panel')].map(p => [p.id, true]))
));
} catch(_) {}
} }
function expandAll() { function expandAll() {
@@ -520,7 +524,7 @@ function expandAll() {
if (btn) btn.textContent = '[]'; if (btn) btn.textContent = '[]';
if (title) title.setAttribute('aria-expanded', 'true'); if (title) title.setAttribute('aria-expanded', 'true');
}); });
sessionStorage.setItem('linksCollapsed', '{}'); try { sessionStorage.setItem('linksCollapsed', '{}'); } catch(_) {}
} }
// ── Stale data warning ──────────────────────────────────────────── // ── Stale data warning ────────────────────────────────────────────