feat: add health monitoring endpoint
This commit is contained in:
80
apps/health/checks.py
Normal file
80
apps/health/checks.py
Normal file
@@ -0,0 +1,80 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import importlib
|
||||
import os
|
||||
import time
|
||||
import uuid
|
||||
from pathlib import Path
|
||||
|
||||
from django.core.cache import cache
|
||||
from django.db import connection
|
||||
|
||||
BACKUP_MAX_AGE_SECONDS = 48 * 60 * 60
|
||||
|
||||
|
||||
def check_db() -> dict[str, float | str]:
|
||||
started = time.perf_counter()
|
||||
try:
|
||||
with connection.cursor() as cursor:
|
||||
cursor.execute("SELECT 1")
|
||||
except Exception as exc:
|
||||
return {"status": "fail", "detail": str(exc)}
|
||||
return {"status": "ok", "latency_ms": (time.perf_counter() - started) * 1000}
|
||||
|
||||
|
||||
def check_cache() -> dict[str, float | str]:
|
||||
cache_key = f"health:{uuid.uuid4().hex}"
|
||||
probe_value = uuid.uuid4().hex
|
||||
started = time.perf_counter()
|
||||
try:
|
||||
cache.set(cache_key, probe_value, timeout=5)
|
||||
cached_value = cache.get(cache_key)
|
||||
if cached_value != probe_value:
|
||||
return {"status": "fail", "detail": "Cache probe returned unexpected value"}
|
||||
cache.delete(cache_key)
|
||||
except Exception as exc:
|
||||
return {"status": "fail", "detail": str(exc)}
|
||||
return {"status": "ok", "latency_ms": (time.perf_counter() - started) * 1000}
|
||||
|
||||
|
||||
def check_celery() -> dict[str, str]:
|
||||
broker_url = os.environ.get("CELERY_BROKER_URL")
|
||||
if not broker_url:
|
||||
return {"status": "ok", "detail": "Celery not configured: CELERY_BROKER_URL is unset"}
|
||||
|
||||
try:
|
||||
kombu = importlib.import_module("kombu")
|
||||
except ImportError:
|
||||
return {"status": "ok", "detail": "Celery broker check skipped: kombu is not installed"}
|
||||
|
||||
try:
|
||||
with kombu.Connection(broker_url, connect_timeout=3) as broker_connection:
|
||||
broker_connection.ensure_connection(max_retries=1)
|
||||
except Exception as exc:
|
||||
return {"status": "fail", "detail": str(exc)}
|
||||
return {"status": "ok"}
|
||||
|
||||
|
||||
def check_backup() -> dict[str, str]:
|
||||
backup_status_file = os.environ.get("BACKUP_STATUS_FILE")
|
||||
if not backup_status_file:
|
||||
return {"status": "fail", "detail": "Backup monitoring not configured: BACKUP_STATUS_FILE is unset"}
|
||||
|
||||
try:
|
||||
raw_timestamp = Path(backup_status_file).read_text(encoding="utf-8").strip()
|
||||
except FileNotFoundError:
|
||||
return {"status": "fail", "detail": f"Backup status file not found: {backup_status_file}"}
|
||||
except OSError as exc:
|
||||
return {"status": "fail", "detail": str(exc)}
|
||||
|
||||
try:
|
||||
last_backup_at = float(raw_timestamp)
|
||||
except ValueError:
|
||||
return {"status": "fail", "detail": "Invalid backup status file"}
|
||||
|
||||
age_seconds = time.time() - last_backup_at
|
||||
if age_seconds > BACKUP_MAX_AGE_SECONDS:
|
||||
age_hours = age_seconds / 3600
|
||||
return {"status": "fail", "detail": f"Last backup is {age_hours:.1f} hours old (> 48 h)"}
|
||||
|
||||
return {"status": "ok"}
|
||||
Reference in New Issue
Block a user