Files
main-site/apps/health/checks.py
Codex_B 10e39b8331
All checks were successful
CI / nightly-e2e (pull_request) Has been skipped
CI / deploy (pull_request) Has been skipped
CI / pr-e2e (pull_request) Successful in 1m38s
CI / ci (pull_request) Successful in 1m46s
feat: add health monitoring endpoint
2026-03-06 15:46:08 +00:00

81 lines
2.7 KiB
Python

from __future__ import annotations
import importlib
import os
import time
import uuid
from pathlib import Path
from django.core.cache import cache
from django.db import connection
BACKUP_MAX_AGE_SECONDS = 48 * 60 * 60
def check_db() -> dict[str, float | str]:
started = time.perf_counter()
try:
with connection.cursor() as cursor:
cursor.execute("SELECT 1")
except Exception as exc:
return {"status": "fail", "detail": str(exc)}
return {"status": "ok", "latency_ms": (time.perf_counter() - started) * 1000}
def check_cache() -> dict[str, float | str]:
cache_key = f"health:{uuid.uuid4().hex}"
probe_value = uuid.uuid4().hex
started = time.perf_counter()
try:
cache.set(cache_key, probe_value, timeout=5)
cached_value = cache.get(cache_key)
if cached_value != probe_value:
return {"status": "fail", "detail": "Cache probe returned unexpected value"}
cache.delete(cache_key)
except Exception as exc:
return {"status": "fail", "detail": str(exc)}
return {"status": "ok", "latency_ms": (time.perf_counter() - started) * 1000}
def check_celery() -> dict[str, str]:
broker_url = os.environ.get("CELERY_BROKER_URL")
if not broker_url:
return {"status": "ok", "detail": "Celery not configured: CELERY_BROKER_URL is unset"}
try:
kombu = importlib.import_module("kombu")
except ImportError:
return {"status": "ok", "detail": "Celery broker check skipped: kombu is not installed"}
try:
with kombu.Connection(broker_url, connect_timeout=3) as broker_connection:
broker_connection.ensure_connection(max_retries=1)
except Exception as exc:
return {"status": "fail", "detail": str(exc)}
return {"status": "ok"}
def check_backup() -> dict[str, str]:
backup_status_file = os.environ.get("BACKUP_STATUS_FILE")
if not backup_status_file:
return {"status": "fail", "detail": "Backup monitoring not configured: BACKUP_STATUS_FILE is unset"}
try:
raw_timestamp = Path(backup_status_file).read_text(encoding="utf-8").strip()
except FileNotFoundError:
return {"status": "fail", "detail": f"Backup status file not found: {backup_status_file}"}
except OSError as exc:
return {"status": "fail", "detail": str(exc)}
try:
last_backup_at = float(raw_timestamp)
except ValueError:
return {"status": "fail", "detail": "Invalid backup status file"}
age_seconds = time.time() - last_backup_at
if age_seconds > BACKUP_MAX_AGE_SECONDS:
age_hours = age_seconds / 3600
return {"status": "fail", "detail": f"Last backup is {age_hours:.1f} hours old (> 48 h)"}
return {"status": "ok"}