diff --git a/app.py b/app.py index ef9257d..a4036ad 100644 --- a/app.py +++ b/app.py @@ -391,8 +391,38 @@ def api_diagnose_poll(job_id: str): @app.route('/health') def health(): - """Health check endpoint (no auth).""" - return jsonify({'status': 'ok', 'service': 'gandalf'}) + """Health check endpoint (no auth). Checks DB and monitor freshness.""" + checks = {} + overall = 'ok' + + # DB connectivity + try: + db.get_state('last_check') + checks['db'] = 'ok' + except Exception as e: + checks['db'] = f'error: {e}' + overall = 'degraded' + + # Monitor freshness: fail if last_check is older than 20 minutes + try: + last_check = db.get_state('last_check', '') + if last_check: + from datetime import datetime, timezone + ts = datetime.strptime(last_check, '%Y-%m-%d %H:%M:%S UTC').replace(tzinfo=timezone.utc) + age_s = (datetime.now(timezone.utc) - ts).total_seconds() + if age_s > 1200: + checks['monitor'] = f'stale ({int(age_s)}s since last check)' + overall = 'degraded' + else: + checks['monitor'] = f'ok ({int(age_s)}s ago)' + else: + checks['monitor'] = 'no data yet' + except Exception as e: + checks['monitor'] = f'error: {e}' + overall = 'degraded' + + status_code = 200 if overall == 'ok' else 503 + return jsonify({'status': overall, 'service': 'gandalf', 'checks': checks}), status_code if __name__ == '__main__': diff --git a/db.py b/db.py index 0847fed..d814e26 100644 --- a/db.py +++ b/db.py @@ -269,6 +269,37 @@ def deactivate_suppression(sup_id: int) -> None: ) +def cleanup_expired_suppressions() -> int: + """Mark expired time-limited suppressions as inactive. Returns count deactivated.""" + with get_conn() as conn: + with conn.cursor() as cur: + cur.execute( + """UPDATE suppression_rules + SET active=FALSE + WHERE active=TRUE AND expires_at IS NOT NULL AND expires_at <= NOW()""" + ) + n = cur.rowcount + if n: + logger.info(f'Deactivated {n} expired suppression(s)') + return n + + +def purge_old_resolved_events(days: int = 90) -> int: + """Delete resolved events older than `days` days. Returns count deleted.""" + with get_conn() as conn: + with conn.cursor() as cur: + cur.execute( + """DELETE FROM network_events + WHERE resolved_at IS NOT NULL + AND resolved_at < DATE_SUB(NOW(), INTERVAL %s DAY)""", + (days,), + ) + n = cur.rowcount + if n: + logger.info(f'Purged {n} old resolved event(s) (>{days}d)') + return n + + def check_suppressed(suppressions: list, target_type: str, target_name: str, target_detail: str = '') -> bool: """Check suppression against a pre-loaded list (avoids per-call DB queries).""" for s in suppressions: diff --git a/monitor.py b/monitor.py index 95c80e3..6961ab7 100644 --- a/monitor.py +++ b/monitor.py @@ -916,6 +916,10 @@ class NetworkMonitor: self._process_ping_hosts(suppressions) + # Housekeeping: deactivate expired suppressions and purge old resolved events + db.cleanup_expired_suppressions() + db.purge_old_resolved_events(days=90) + logger.info('Network check cycle complete') except Exception as e: diff --git a/static/app.js b/static/app.js index 3c544a4..a91f389 100644 --- a/static/app.js +++ b/static/app.js @@ -49,6 +49,37 @@ function updateStatusBar(summary, lastCheck) { const lc = document.getElementById('last-check'); if (lc && lastCheck) lc.textContent = lastCheck; + + // Update browser tab title with alert count + const critCount = summary.critical || 0; + const warnCount = summary.warning || 0; + if (critCount) { + document.title = `(${critCount} CRIT) GANDALF`; + } else if (warnCount) { + document.title = `(${warnCount} WARN) GANDALF`; + } else { + document.title = 'GANDALF'; + } + + // Stale data banner: warn if last_check is older than 15 minutes + let staleBanner = document.getElementById('stale-banner'); + if (lastCheck) { + // last_check format: "2026-03-14 14:14:21 UTC" + const checkAge = (Date.now() - new Date(lastCheck.replace(' UTC', 'Z').replace(' ', 'T'))) / 1000; + if (checkAge > 900) { // 15 minutes + if (!staleBanner) { + staleBanner = document.createElement('div'); + staleBanner.id = 'stale-banner'; + staleBanner.className = 'stale-banner'; + document.querySelector('.main').prepend(staleBanner); + } + const mins = Math.floor(checkAge / 60); + staleBanner.textContent = `⚠ Monitoring data is stale — last check was ${mins} minute${mins !== 1 ? 's' : ''} ago. The monitor daemon may be down.`; + staleBanner.style.display = ''; + } else if (staleBanner) { + staleBanner.style.display = 'none'; + } + } } function updateHostGrid(hosts) { diff --git a/static/style.css b/static/style.css index 1ad7362..4b9c9db 100644 --- a/static/style.css +++ b/static/style.css @@ -1423,3 +1423,16 @@ a:hover { text-decoration: underline; text-shadow: var(--glow-amber); } .inspector-panel.open { width:100%; } .inspector-panel-inner { width:100%; } } + +/* ── Stale monitoring banner ──────────────────────────────────────── */ +.stale-banner { + background: rgba(255, 160, 0, 0.12); + border: 1px solid var(--warning); + border-left: 4px solid var(--warning); + color: var(--warning); + padding: 10px 16px; + margin: 12px 16px 0; + font-size: 0.88em; + font-family: var(--font-mono); + border-radius: 2px; +}