diff --git a/app.py b/app.py index 2c779bc..49ccbdc 100644 --- a/app.py +++ b/app.py @@ -4,6 +4,7 @@ Flask web application serving the monitoring dashboard and suppression management UI. Authentication via Authelia forward-auth headers. All monitoring and alerting is handled by the separate monitor.py daemon. """ +import hashlib import ipaddress import json import logging @@ -11,6 +12,7 @@ import re import threading import time import uuid +from datetime import datetime, timezone from functools import wraps from flask import Flask, jsonify, render_template, request @@ -31,9 +33,10 @@ _AVATAR_COLORS = ['lt-avatar--orange', 'lt-avatar--green', 'lt-avatar--purple', @app.template_filter('avatar_color') def avatar_color_filter(name: str) -> str: - return _AVATAR_COLORS[abs(hash(name)) % len(_AVATAR_COLORS)] + return _AVATAR_COLORS[int(hashlib.md5(name.encode()).hexdigest(), 16) % len(_AVATAR_COLORS)] _cfg = None +_cfg_lock = threading.Lock() @app.context_processor @@ -54,7 +57,6 @@ _diag_jobs: dict = {} _diag_lock = threading.Lock() -_last_event_purge = [0.0] # mutable container so the thread can update it def _purge_old_jobs_loop(): @@ -67,21 +69,12 @@ def _purge_old_jobs_loop(): stale = [jid for jid, j in _diag_jobs.items() if j.get('created_at', 0) < cutoff] for jid in stale: del _diag_jobs[jid] - for jid, j in _diag_jobs.items(): + for jid, j in list(_diag_jobs.items()): if j['status'] == 'running' and j.get('created_at', 0) < stuck_cutoff: j['status'] = 'done' j['result'] = {'status': 'error', 'error': 'Diagnostic timed out (thread crash)'} logger.error(f'Diagnostic job {jid} appeared stuck; marked as errored') - # Purge old resolved events once per day - now = time.time() - if now - _last_event_purge[0] > 86400: - try: - db.purge_old_resolved_events(days=90) - except Exception as e: - logger.error(f'Daily event purge failed: {e}') - _last_event_purge[0] = now - _purge_thread = threading.Thread(target=_purge_old_jobs_loop, daemon=True) _purge_thread.start() @@ -90,11 +83,24 @@ _purge_thread.start() def _config() -> dict: global _cfg if _cfg is None: - with open('config.json') as f: - _cfg = json.load(f) + with _cfg_lock: + if _cfg is None: + with open('config.json') as f: + _cfg = json.load(f) return _cfg +def _daemon_ok(last_check: str) -> bool: + """Return True if monitor last checked within 20 minutes.""" + if not last_check or last_check == 'Never': + return False + try: + ts = datetime.strptime(last_check, '%Y-%m-%d %H:%M:%S UTC').replace(tzinfo=timezone.utc) + return (datetime.now(timezone.utc) - ts).total_seconds() < 1200 + except Exception: + return False + + # --------------------------------------------------------------------------- # Auth helpers # --------------------------------------------------------------------------- @@ -206,11 +212,13 @@ def suppressions_page(): @require_auth def api_status(): active = db.get_active_events(limit=_PAGE_LIMIT) + last_check = db.get_state('last_check', 'Never') return jsonify({ 'summary': db.get_status_summary(), - 'last_check': db.get_state('last_check', 'Never'), + 'last_check': last_check, 'events': active, 'total_active': db.count_active_events(), + 'daemon_ok': _daemon_ok(last_check), }) @@ -453,7 +461,6 @@ def health(): try: last_check = db.get_state('last_check', '') if last_check: - from datetime import datetime, timezone ts = datetime.strptime(last_check, '%Y-%m-%d %H:%M:%S UTC').replace(tzinfo=timezone.utc) age_s = (datetime.now(timezone.utc) - ts).total_seconds() if age_s > 1200: diff --git a/db.py b/db.py index d0d3ba3..08873da 100644 --- a/db.py +++ b/db.py @@ -23,26 +23,39 @@ def _config() -> dict: return _config_cache +def _new_conn(cfg: dict): + return pymysql.connect( + host=cfg['host'], + port=cfg.get('port', 3306), + user=cfg['user'], + password=cfg['password'], + database=cfg['name'], + autocommit=True, + cursorclass=pymysql.cursors.DictCursor, + connect_timeout=10, + charset='utf8mb4', + ) + + @contextmanager def get_conn(): """Yield a per-thread cached database connection, reconnecting as needed.""" cfg = _config() conn = getattr(_local, 'conn', None) if conn is None: - conn = pymysql.connect( - host=cfg['host'], - port=cfg.get('port', 3306), - user=cfg['user'], - password=cfg['password'], - database=cfg['name'], - autocommit=True, - cursorclass=pymysql.cursors.DictCursor, - connect_timeout=10, - charset='utf8mb4', - ) + conn = _new_conn(cfg) _local.conn = conn else: - conn.ping(reconnect=True) + try: + conn.ping(reconnect=True) + except Exception: + try: + conn.close() + except Exception: + pass + _local.conn = None + conn = _new_conn(cfg) + _local.conn = conn yield conn diff --git a/monitor.py b/monitor.py index fde0d84..8373691 100644 --- a/monitor.py +++ b/monitor.py @@ -325,6 +325,7 @@ class LinkStatsCollector: def __init__(self, cfg: dict, prom: 'PrometheusClient', unifi: Optional['UnifiClient'] = None): + self.cfg = cfg self.prom = prom self.pulse = PulseClient(cfg) self.unifi = unifi @@ -876,8 +877,7 @@ class NetworkMonitor: # ------------------------------------------------------------------ # Snapshot collection (for dashboard) # ------------------------------------------------------------------ - def _collect_snapshot(self) -> dict: - iface_states = self.prom.get_interface_states() + def _collect_snapshot(self, iface_states: Dict[str, Dict[str, bool]]) -> dict: unifi_devices = self.unifi.get_devices() or [] hosts = {} @@ -930,23 +930,23 @@ class NetworkMonitor: try: logger.info('Starting network check cycle') - # 1. Collect and store snapshot for dashboard - snapshot = self._collect_snapshot() + # 1. Fetch interface states once — shared by snapshot and alert processing + iface_states = self.prom.get_interface_states() + + # 2. Collect and store snapshot for dashboard + snapshot = self._collect_snapshot(iface_states) db.set_state('network_snapshot', snapshot) db.set_state('last_check', _now_utc()) - # 2. Collect link stats (ethtool + traffic metrics) + # 3. Collect link stats (ethtool + traffic metrics) try: link_data = self.link_stats.collect(self._instance_map) db.set_state('link_stats', link_data) except Exception as e: logger.error(f'Link stats collection failed: {e}', exc_info=True) - # 3. Process alerts (separate Prometheus call for fresh data) - # Load suppressions once per cycle to avoid N*M DB queries + # 4. Process alerts using already-fetched interface states suppressions = db.get_active_suppressions() - - iface_states = self.prom.get_interface_states() self._process_interfaces(iface_states, suppressions) unifi_devices = self.unifi.get_devices() diff --git a/static/app.js b/static/app.js index becba3c..82e71ec 100644 --- a/static/app.js +++ b/static/app.js @@ -7,7 +7,10 @@ const _fetch = window.fetch; window.fetch = async function (...args) { const resp = await _fetch(...args); - if (resp.status === 401) window.location.reload(); + if (resp.status === 401) { + window.location.reload(); + throw new Error('Session expired — reloading'); + } return resp; }; })(); @@ -29,28 +32,41 @@ function _toIso(s) { // ── Dashboard auto-refresh ──────────────────────────────────────────── async function refreshAll() { + const refreshBtn = document.querySelector('[data-action="refresh"]'); + if (refreshBtn) refreshBtn.classList.add('is-loading'); try { - const [net, status] = await Promise.all([ + const [netResult, statusResult] = await Promise.allSettled([ lt.api.get('/api/network'), lt.api.get('/api/status'), ]); - updateHostGrid(net.hosts || {}); - updateUnifiTable(net.unifi || []); - updateEventsTable(status.events || [], status.total_active); - updateStatusBar(status.summary || {}, status.last_check || ''); - updateTopology(net.hosts || {}); - } catch (e) { - console.warn('Refresh failed:', e); + if (netResult.status === 'fulfilled') { + const net = netResult.value; + updateHostGrid(net.hosts || {}); + updateUnifiTable(net.unifi || []); + updateTopology(net.hosts || {}); + } else { + console.warn('Network API failed:', netResult.reason); + } + if (statusResult.status === 'fulfilled') { + const status = statusResult.value; + updateEventsTable(status.events || [], status.total_active); + updateStatusBar(status.summary || {}, status.last_check || '', status.daemon_ok); + } else { + console.warn('Status API failed:', statusResult.reason); + } + } finally { + if (refreshBtn) refreshBtn.classList.remove('is-loading'); } } -function updateStatusBar(summary, lastCheck) { +function updateStatusBar(summary, lastCheck, daemonOk) { const bar = document.querySelector('.status-chips'); if (!bar) return; const chips = []; + if (daemonOk === false) chips.push('⚠ MONITOR OFFLINE'); if (summary.critical) chips.push(`● ${summary.critical} CRITICAL`); if (summary.warning) chips.push(`● ${summary.warning} WARNING`); - if (!summary.critical && !summary.warning) chips.push('✔ ALL SYSTEMS NOMINAL'); + if (!summary.critical && !summary.warning && daemonOk !== false) chips.push('✔ ALL SYSTEMS NOMINAL'); bar.innerHTML = chips.join(''); const lc = document.getElementById('last-check'); diff --git a/static/style.css b/static/style.css index 40d7abc..58f88d9 100644 --- a/static/style.css +++ b/static/style.css @@ -40,6 +40,31 @@ --glow-xl: 0 0 8px var(--accent-green), 0 0 20px rgba(0,255,136,.5); } +/* ── Light theme overrides for dim/glow variables ────────────────── */ +[data-theme="light"] { + --green-dim: rgba(0,160,80,.08); + --green-muted: rgba(0,160,80,.45); + --amber-dim: rgba(180,120,0,.07); + --cyan-dim: rgba(0,140,180,.08); + --red-dim: rgba(200,30,60,.06); + --orange-dim: rgba(180,80,0,.06); + --glow: none; + --glow-amber: none; + --glow-red: none; + --glow-cyan: none; + --glow-xl: none; +} + +/* ── Refresh button loading state ────────────────────────────────── */ +[data-action="refresh"].is-loading { + opacity: .5; + pointer-events: none; + cursor: wait; +} +[data-action="refresh"].is-loading::after { + content: '…'; +} + /* ── Animations used by custom components ─────────────────────────── */ @keyframes pulse-red { 0%,100% { box-shadow: 0 0 0 0 rgba(255,45,85,.5); } @@ -85,6 +110,10 @@ border: 1px solid var(--border-color); padding: 1px 7px; } +.g-section-actions { margin-left: auto; } +.events-filter-bar { display: flex; align-items: center; gap: 8px; flex-wrap: wrap; } +.events-filter-bar .lt-input-sm { width: 220px; } +.sev-pills { display: flex; gap: 4px; } .g-page-header { margin-bottom: 20px; } .g-page-title { font-size: 1em; diff --git a/templates/index.html b/templates/index.html index 43d86d5..d7bc346 100644 --- a/templates/index.html +++ b/templates/index.html @@ -275,6 +275,17 @@ {% if summary.critical or summary.warning %} {{ (summary.critical or 0) + (summary.warning or 0) }} {% endif %} +