From 10e39b83315b769b7f8efa5cbd1860dd5af1f422 Mon Sep 17 00:00:00 2001 From: Codex_B Date: Fri, 6 Mar 2026 15:46:08 +0000 Subject: [PATCH 1/2] feat: add health monitoring endpoint --- Dockerfile | 5 + apps/health/__init__.py | 1 + apps/health/apps.py | 6 + apps/health/checks.py | 80 ++++++++++++ apps/health/tests/__init__.py | 1 + apps/health/tests/test_checks.py | 205 +++++++++++++++++++++++++++++++ apps/health/tests/test_views.py | 103 ++++++++++++++++ apps/health/urls.py | 7 ++ apps/health/views.py | 42 +++++++ config/settings/base.py | 1 + config/urls.py | 1 + deploy/deploy.sh | 6 +- docker-compose.prod.yml | 6 +- 13 files changed, 462 insertions(+), 2 deletions(-) create mode 100644 apps/health/__init__.py create mode 100644 apps/health/apps.py create mode 100644 apps/health/checks.py create mode 100644 apps/health/tests/__init__.py create mode 100644 apps/health/tests/test_checks.py create mode 100644 apps/health/tests/test_views.py create mode 100644 apps/health/urls.py create mode 100644 apps/health/views.py diff --git a/Dockerfile b/Dockerfile index 65b0cb7..ed2cd3a 100644 --- a/Dockerfile +++ b/Dockerfile @@ -50,4 +50,9 @@ RUN pip install --upgrade pip && pip install -r requirements/base.txt COPY . /app +ARG GIT_SHA=unknown +ARG BUILD_ID=unknown +ENV GIT_SHA=${GIT_SHA} \ + BUILD_ID=${BUILD_ID} + CMD ["python", "manage.py", "runserver", "0.0.0.0:8000"] diff --git a/apps/health/__init__.py b/apps/health/__init__.py new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/apps/health/__init__.py @@ -0,0 +1 @@ + diff --git a/apps/health/apps.py b/apps/health/apps.py new file mode 100644 index 0000000..72bd69e --- /dev/null +++ b/apps/health/apps.py @@ -0,0 +1,6 @@ +from django.apps import AppConfig + + +class HealthConfig(AppConfig): + default_auto_field = "django.db.models.BigAutoField" + name = "apps.health" diff --git a/apps/health/checks.py b/apps/health/checks.py new file mode 100644 index 0000000..e3c30c2 --- /dev/null +++ b/apps/health/checks.py @@ -0,0 +1,80 @@ +from __future__ import annotations + +import importlib +import os +import time +import uuid +from pathlib import Path + +from django.core.cache import cache +from django.db import connection + +BACKUP_MAX_AGE_SECONDS = 48 * 60 * 60 + + +def check_db() -> dict[str, float | str]: + started = time.perf_counter() + try: + with connection.cursor() as cursor: + cursor.execute("SELECT 1") + except Exception as exc: + return {"status": "fail", "detail": str(exc)} + return {"status": "ok", "latency_ms": (time.perf_counter() - started) * 1000} + + +def check_cache() -> dict[str, float | str]: + cache_key = f"health:{uuid.uuid4().hex}" + probe_value = uuid.uuid4().hex + started = time.perf_counter() + try: + cache.set(cache_key, probe_value, timeout=5) + cached_value = cache.get(cache_key) + if cached_value != probe_value: + return {"status": "fail", "detail": "Cache probe returned unexpected value"} + cache.delete(cache_key) + except Exception as exc: + return {"status": "fail", "detail": str(exc)} + return {"status": "ok", "latency_ms": (time.perf_counter() - started) * 1000} + + +def check_celery() -> dict[str, str]: + broker_url = os.environ.get("CELERY_BROKER_URL") + if not broker_url: + return {"status": "ok", "detail": "Celery not configured: CELERY_BROKER_URL is unset"} + + try: + kombu = importlib.import_module("kombu") + except ImportError: + return {"status": "ok", "detail": "Celery broker check skipped: kombu is not installed"} + + try: + with kombu.Connection(broker_url, connect_timeout=3) as broker_connection: + broker_connection.ensure_connection(max_retries=1) + except Exception as exc: + return {"status": "fail", "detail": str(exc)} + return {"status": "ok"} + + +def check_backup() -> dict[str, str]: + backup_status_file = os.environ.get("BACKUP_STATUS_FILE") + if not backup_status_file: + return {"status": "fail", "detail": "Backup monitoring not configured: BACKUP_STATUS_FILE is unset"} + + try: + raw_timestamp = Path(backup_status_file).read_text(encoding="utf-8").strip() + except FileNotFoundError: + return {"status": "fail", "detail": f"Backup status file not found: {backup_status_file}"} + except OSError as exc: + return {"status": "fail", "detail": str(exc)} + + try: + last_backup_at = float(raw_timestamp) + except ValueError: + return {"status": "fail", "detail": "Invalid backup status file"} + + age_seconds = time.time() - last_backup_at + if age_seconds > BACKUP_MAX_AGE_SECONDS: + age_hours = age_seconds / 3600 + return {"status": "fail", "detail": f"Last backup is {age_hours:.1f} hours old (> 48 h)"} + + return {"status": "ok"} diff --git a/apps/health/tests/__init__.py b/apps/health/tests/__init__.py new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/apps/health/tests/__init__.py @@ -0,0 +1 @@ + diff --git a/apps/health/tests/test_checks.py b/apps/health/tests/test_checks.py new file mode 100644 index 0000000..2abd3bb --- /dev/null +++ b/apps/health/tests/test_checks.py @@ -0,0 +1,205 @@ +from __future__ import annotations + +import importlib +import time +from types import SimpleNamespace + +import pytest +from django.db.utils import OperationalError + +from apps.health import checks + + +class SuccessfulCursor: + def __enter__(self): + return self + + def __exit__(self, exc_type, exc, tb): + return False + + def execute(self, query): + self.query = query + + +class FailingCursor: + def __enter__(self): + return self + + def __exit__(self, exc_type, exc, tb): + return False + + def execute(self, query): + raise OperationalError("database unavailable") + + +class FakeCache: + def __init__(self, value_to_return=None): + self.value_to_return = value_to_return + self.stored = {} + + def set(self, key, value, timeout=None): + self.stored[key] = value + + def get(self, key): + if self.value_to_return is not None: + return self.value_to_return + return self.stored.get(key) + + def delete(self, key): + self.stored.pop(key, None) + + +@pytest.mark.django_db +def test_db_ok(monkeypatch): + monkeypatch.setattr(checks.connection, "cursor", lambda: SuccessfulCursor()) + + result = checks.check_db() + + assert result["status"] == "ok" + assert "latency_ms" in result + + +@pytest.mark.django_db +def test_db_fail(monkeypatch): + monkeypatch.setattr(checks.connection, "cursor", lambda: FailingCursor()) + + result = checks.check_db() + + assert result == {"status": "fail", "detail": "database unavailable"} + + +@pytest.mark.django_db +def test_cache_ok(monkeypatch): + monkeypatch.setattr(checks, "cache", FakeCache()) + + result = checks.check_cache() + + assert result["status"] == "ok" + assert "latency_ms" in result + + +@pytest.mark.django_db +def test_cache_fail(monkeypatch): + monkeypatch.setattr(checks, "cache", FakeCache(value_to_return="wrong-value")) + + result = checks.check_cache() + + assert result == {"status": "fail", "detail": "Cache probe returned unexpected value"} + + +def test_celery_no_broker(monkeypatch): + monkeypatch.delenv("CELERY_BROKER_URL", raising=False) + + result = checks.check_celery() + + assert result["status"] == "ok" + assert "CELERY_BROKER_URL is unset" in result["detail"] + + +def test_celery_no_kombu(monkeypatch): + monkeypatch.setenv("CELERY_BROKER_URL", "redis://broker") + + def raise_import_error(name): + raise ImportError(name) + + monkeypatch.setattr(importlib, "import_module", raise_import_error) + + result = checks.check_celery() + + assert result["status"] == "ok" + assert "kombu is not installed" in result["detail"] + + +def test_celery_ok(monkeypatch): + monkeypatch.setenv("CELERY_BROKER_URL", "redis://broker") + + class FakeBrokerConnection: + def __init__(self, url, connect_timeout): + self.url = url + self.connect_timeout = connect_timeout + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc, tb): + return False + + def ensure_connection(self, max_retries): + self.max_retries = max_retries + + monkeypatch.setattr(importlib, "import_module", lambda name: SimpleNamespace(Connection=FakeBrokerConnection)) + + result = checks.check_celery() + + assert result == {"status": "ok"} + + +def test_celery_fail(monkeypatch): + monkeypatch.setenv("CELERY_BROKER_URL", "redis://broker") + + class BrokenBrokerConnection: + def __init__(self, url, connect_timeout): + self.url = url + self.connect_timeout = connect_timeout + + def __enter__(self): + raise OSError("broker down") + + def __exit__(self, exc_type, exc, tb): + return False + + monkeypatch.setattr(importlib, "import_module", lambda name: SimpleNamespace(Connection=BrokenBrokerConnection)) + + result = checks.check_celery() + + assert result == {"status": "fail", "detail": "broker down"} + + +def test_backup_no_env(monkeypatch): + monkeypatch.delenv("BACKUP_STATUS_FILE", raising=False) + + result = checks.check_backup() + + assert result["status"] == "fail" + assert "BACKUP_STATUS_FILE is unset" in result["detail"] + + +def test_backup_missing_file(monkeypatch, tmp_path): + status_file = tmp_path / "missing-backup-status" + monkeypatch.setenv("BACKUP_STATUS_FILE", str(status_file)) + + result = checks.check_backup() + + assert result == {"status": "fail", "detail": f"Backup status file not found: {status_file}"} + + +def test_backup_fresh(monkeypatch, tmp_path): + status_file = tmp_path / "backup-status" + status_file.write_text(str(time.time() - 60), encoding="utf-8") + monkeypatch.setenv("BACKUP_STATUS_FILE", str(status_file)) + + result = checks.check_backup() + + assert result == {"status": "ok"} + + +def test_backup_stale(monkeypatch, tmp_path): + status_file = tmp_path / "backup-status" + stale_timestamp = time.time() - (checks.BACKUP_MAX_AGE_SECONDS + 1) + status_file.write_text(str(stale_timestamp), encoding="utf-8") + monkeypatch.setenv("BACKUP_STATUS_FILE", str(status_file)) + + result = checks.check_backup() + + assert result["status"] == "fail" + assert "Last backup is" in result["detail"] + + +def test_backup_invalid(monkeypatch, tmp_path): + status_file = tmp_path / "backup-status" + status_file.write_text("not-a-timestamp", encoding="utf-8") + monkeypatch.setenv("BACKUP_STATUS_FILE", str(status_file)) + + result = checks.check_backup() + + assert result == {"status": "fail", "detail": "Invalid backup status file"} diff --git a/apps/health/tests/test_views.py b/apps/health/tests/test_views.py new file mode 100644 index 0000000..6add17d --- /dev/null +++ b/apps/health/tests/test_views.py @@ -0,0 +1,103 @@ +from __future__ import annotations + +import re + +import pytest + + +def _mock_checks(monkeypatch, **overrides): + payloads = { + "db": {"status": "ok", "latency_ms": 1.0}, + "cache": {"status": "ok", "latency_ms": 1.0}, + "celery": {"status": "ok"}, + "backup": {"status": "ok"}, + } + payloads.update(overrides) + + monkeypatch.setattr("apps.health.views.check_db", lambda: payloads["db"]) + monkeypatch.setattr("apps.health.views.check_cache", lambda: payloads["cache"]) + monkeypatch.setattr("apps.health.views.check_celery", lambda: payloads["celery"]) + monkeypatch.setattr("apps.health.views.check_backup", lambda: payloads["backup"]) + + +@pytest.mark.django_db +def test_healthy(client, monkeypatch): + _mock_checks(monkeypatch) + + response = client.get("/health/") + + assert response.status_code == 200 + assert response.json()["status"] == "ok" + + +@pytest.mark.django_db +def test_degraded_celery(client, monkeypatch): + _mock_checks(monkeypatch, celery={"status": "fail", "detail": "broker down"}) + + response = client.get("/health/") + + assert response.status_code == 200 + assert response.json()["status"] == "degraded" + + +@pytest.mark.django_db +def test_degraded_backup(client, monkeypatch): + _mock_checks(monkeypatch, backup={"status": "fail", "detail": "backup missing"}) + + response = client.get("/health/") + + assert response.status_code == 200 + assert response.json()["status"] == "degraded" + + +@pytest.mark.django_db +def test_unhealthy_db(client, monkeypatch): + _mock_checks(monkeypatch, db={"status": "fail", "detail": "db down"}) + + response = client.get("/health/") + + assert response.status_code == 503 + assert response.json()["status"] == "unhealthy" + + +@pytest.mark.django_db +def test_unhealthy_cache(client, monkeypatch): + _mock_checks(monkeypatch, cache={"status": "fail", "detail": "cache down"}) + + response = client.get("/health/") + + assert response.status_code == 503 + assert response.json()["status"] == "unhealthy" + + +@pytest.mark.django_db +def test_response_shape(client, monkeypatch): + _mock_checks(monkeypatch) + + payload = client.get("/health/").json() + + assert set(payload) == {"status", "version", "checks", "timestamp"} + assert set(payload["version"]) == {"git_sha", "build"} + assert set(payload["checks"]) == {"db", "cache", "celery", "backup"} + assert re.fullmatch(r"\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}Z", payload["timestamp"]) + + +@pytest.mark.django_db +def test_version_fields(client, monkeypatch): + _mock_checks(monkeypatch) + monkeypatch.setenv("GIT_SHA", "59cc1c4") + monkeypatch.setenv("BUILD_ID", "build-20260306-59cc1c4") + + payload = client.get("/health/").json() + + assert payload["version"]["git_sha"] + assert payload["version"]["build"] + + +@pytest.mark.django_db +def test_no_cache_headers(client, monkeypatch): + _mock_checks(monkeypatch) + + response = client.get("/health/") + + assert "no-cache" in response["Cache-Control"] diff --git a/apps/health/urls.py b/apps/health/urls.py new file mode 100644 index 0000000..abc28d0 --- /dev/null +++ b/apps/health/urls.py @@ -0,0 +1,7 @@ +from django.urls import path + +from apps.health.views import health_view + +urlpatterns = [ + path("", health_view, name="health"), +] diff --git a/apps/health/views.py b/apps/health/views.py new file mode 100644 index 0000000..3d504c2 --- /dev/null +++ b/apps/health/views.py @@ -0,0 +1,42 @@ +from __future__ import annotations + +import os +from collections.abc import Mapping +from datetime import UTC, datetime +from typing import cast + +from django.http import JsonResponse +from django.views.decorators.cache import never_cache + +from apps.health.checks import check_backup, check_cache, check_celery, check_db + +CRITICAL_CHECKS = {"db", "cache"} + + +@never_cache +def health_view(request): + checks: dict[str, Mapping[str, object]] = { + "db": check_db(), + "cache": check_cache(), + "celery": check_celery(), + "backup": check_backup(), + } + + if any(cast(str, checks[name]["status"]) == "fail" for name in CRITICAL_CHECKS): + overall_status = "unhealthy" + elif any(cast(str, check["status"]) == "fail" for check in checks.values()): + overall_status = "degraded" + else: + overall_status = "ok" + + payload = { + "status": overall_status, + "version": { + "git_sha": os.environ.get("GIT_SHA", "unknown"), + "build": os.environ.get("BUILD_ID", "unknown"), + }, + "checks": checks, + "timestamp": datetime.now(UTC).strftime("%Y-%m-%dT%H:%M:%SZ"), + } + response_status = 503 if overall_status == "unhealthy" else 200 + return JsonResponse(payload, status=response_status) diff --git a/config/settings/base.py b/config/settings/base.py index 30e3146..2b745d8 100644 --- a/config/settings/base.py +++ b/config/settings/base.py @@ -49,6 +49,7 @@ INSTALLED_APPS = [ "tailwind", "theme", "django_htmx", + "apps.health", "apps.core", "apps.blog", "apps.authors", diff --git a/config/urls.py b/config/urls.py index 883a54f..3d13806 100644 --- a/config/urls.py +++ b/config/urls.py @@ -15,6 +15,7 @@ urlpatterns = [ path("cms/", include("wagtail.admin.urls")), path("documents/", include("wagtail.documents.urls")), path("comments/", include("apps.comments.urls")), + path("health/", include("apps.health.urls")), path("newsletter/", include("apps.newsletter.urls")), path("consent/", consent_view, name="consent"), path("robots.txt", robots_txt, name="robots_txt"), diff --git a/deploy/deploy.sh b/deploy/deploy.sh index 256053d..09c77a3 100755 --- a/deploy/deploy.sh +++ b/deploy/deploy.sh @@ -11,6 +11,10 @@ cd "${SITE_DIR}" echo "==> Pulling latest code" git -C "${APP_DIR}" pull origin main +GIT_SHA=$(git -C "${APP_DIR}" rev-parse --short HEAD) +BUILD_ID="build-$(date +%Y%m%d)-${GIT_SHA}" +export GIT_SHA BUILD_ID + echo "==> Updating compose file" cp "${APP_DIR}/docker-compose.prod.yml" "${SITE_DIR}/docker-compose.prod.yml" @@ -22,7 +26,7 @@ docker compose -f "${SITE_DIR}/docker-compose.prod.yml" up -d --no-deps --build echo "==> Waiting for health check" for i in $(seq 1 30); do - if curl -fsS -H "Host: nohypeai.net" http://localhost:8001/ >/dev/null 2>&1; then + if curl -fsS -H "Host: nohypeai.net" http://localhost:8001/health/ >/dev/null 2>&1; then echo "==> Site is up" exit 0 fi diff --git a/docker-compose.prod.yml b/docker-compose.prod.yml index a4a309a..8277077 100644 --- a/docker-compose.prod.yml +++ b/docker-compose.prod.yml @@ -1,6 +1,10 @@ services: web: - build: app + build: + context: app + args: + GIT_SHA: ${GIT_SHA:-unknown} + BUILD_ID: ${BUILD_ID:-unknown} working_dir: /app command: /app/deploy/entrypoint.prod.sh env_file: .env -- 2.49.1 From a450e7409fcef07dc29b4b990fcd768bac67c7b9 Mon Sep 17 00:00:00 2001 From: Codex_B Date: Fri, 6 Mar 2026 16:08:52 +0000 Subject: [PATCH 2/2] fix: address health endpoint review feedback --- apps/health/tests/test_views.py | 4 ++-- docker-compose.prod.yml | 2 ++ 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/apps/health/tests/test_views.py b/apps/health/tests/test_views.py index 6add17d..1e8cbfc 100644 --- a/apps/health/tests/test_views.py +++ b/apps/health/tests/test_views.py @@ -90,8 +90,8 @@ def test_version_fields(client, monkeypatch): payload = client.get("/health/").json() - assert payload["version"]["git_sha"] - assert payload["version"]["build"] + assert payload["version"]["git_sha"] == "59cc1c4" + assert payload["version"]["build"] == "build-20260306-59cc1c4" @pytest.mark.django_db diff --git a/docker-compose.prod.yml b/docker-compose.prod.yml index 8277077..bc6d353 100644 --- a/docker-compose.prod.yml +++ b/docker-compose.prod.yml @@ -9,8 +9,10 @@ services: command: /app/deploy/entrypoint.prod.sh env_file: .env environment: + BACKUP_STATUS_FILE: /srv/sum/nohype/backup_status DJANGO_SETTINGS_MODULE: config.settings.production volumes: + - /srv/sum/nohype:/srv/sum/nohype:ro - /srv/sum/nohype/static:/app/staticfiles - /srv/sum/nohype/media:/app/media ports: -- 2.49.1