New features: stale banner, tab title alerts, health checks, DB housekeeping
static/app.js: - Browser tab title updates to show alert count: '(3 CRIT) GANDALF' or '(2 WARN) GANDALF' - Stale monitoring banner: injected into .main if last_check > 15 min old, warns operator that the monitor daemon may be down static/style.css: - .stale-banner: amber top-border warning strip app.py: - /health now checks DB connectivity and monitor freshness (last_check age) Returns 503 + degraded status if DB unreachable or monitor stale >20min db.py: - cleanup_expired_suppressions(): marks time-limited suppressions inactive when expires_at <= NOW() (was only filtered in SELECTs, never marked inactive) - purge_old_resolved_events(days=90): deletes old resolved events to prevent unbounded table growth monitor.py: - Calls cleanup_expired_suppressions() and purge_old_resolved_events() each cycle Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
34
app.py
34
app.py
@@ -391,8 +391,38 @@ def api_diagnose_poll(job_id: str):
|
||||
|
||||
@app.route('/health')
|
||||
def health():
|
||||
"""Health check endpoint (no auth)."""
|
||||
return jsonify({'status': 'ok', 'service': 'gandalf'})
|
||||
"""Health check endpoint (no auth). Checks DB and monitor freshness."""
|
||||
checks = {}
|
||||
overall = 'ok'
|
||||
|
||||
# DB connectivity
|
||||
try:
|
||||
db.get_state('last_check')
|
||||
checks['db'] = 'ok'
|
||||
except Exception as e:
|
||||
checks['db'] = f'error: {e}'
|
||||
overall = 'degraded'
|
||||
|
||||
# Monitor freshness: fail if last_check is older than 20 minutes
|
||||
try:
|
||||
last_check = db.get_state('last_check', '')
|
||||
if last_check:
|
||||
from datetime import datetime, timezone
|
||||
ts = datetime.strptime(last_check, '%Y-%m-%d %H:%M:%S UTC').replace(tzinfo=timezone.utc)
|
||||
age_s = (datetime.now(timezone.utc) - ts).total_seconds()
|
||||
if age_s > 1200:
|
||||
checks['monitor'] = f'stale ({int(age_s)}s since last check)'
|
||||
overall = 'degraded'
|
||||
else:
|
||||
checks['monitor'] = f'ok ({int(age_s)}s ago)'
|
||||
else:
|
||||
checks['monitor'] = 'no data yet'
|
||||
except Exception as e:
|
||||
checks['monitor'] = f'error: {e}'
|
||||
overall = 'degraded'
|
||||
|
||||
status_code = 200 if overall == 'ok' else 503
|
||||
return jsonify({'status': overall, 'service': 'gandalf', 'checks': checks}), status_code
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
Reference in New Issue
Block a user