feat: implement article search with PostgreSQL full-text search

- Configure Wagtail database search backend with English search config - Add django.contrib.postgres to INSTALLED_APPS for full PG FTS support - Expand ArticlePage.search_fields: body_text (excl. code blocks), AutocompleteField(title), RelatedFields(tags), FilterFields - Add search view at /search/?q= with query guards (strip, max 200 chars, empty/whitespace handling) and pagination preserving query param - Replace nav Subscribe CTA with compact search box (desktop + mobile) - Add search box to article index page alongside category/tag filters - Create search results template reusing article_card component - Add update_index to deploy entrypoint for automated reindexing - Update existing tests for nav change, add comprehensive search tests Closes #41 Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
2026-03-03 21:25:11 +00:00
parent eebd5c9978
commit 906206d4cd
11 changed files with 299 additions and 8 deletions
--- a/apps/blog/models.py
+++ b/apps/blog/models.py
@@ -235,8 +235,27 @@ class ArticlePage(SeoMixin, Page):

    search_fields = Page.search_fields + [
        index.SearchField("summary"),
+        index.SearchField("body_text", es_extra={"analyzer": "english"}),
+        index.AutocompleteField("title"),
+        index.RelatedFields("tags", [
+            index.SearchField("name"),
+        ]),
+        index.FilterField("category"),
+        index.FilterField("published_date"),
    ]

+    @property
+    def body_text(self) -> str:
+        """Extract prose text from body StreamField, excluding code blocks."""
+        parts: list[str] = []
+        for block in self.body:
+            if block.block_type == "code":
+                continue
+            value = block.value
+            text = value.source if hasattr(value, "source") else str(value)
+            parts.append(text)
+        return " ".join(parts)
+
    def save(self, *args: Any, **kwargs: Any) -> None:
        if not self.category_id:
            self.category, _ = Category.objects.get_or_create(
--- a/apps/blog/tests/test_search.py
+++ b/apps/blog/tests/test_search.py
@@ -0,0 +1,140 @@
+import pytest
+
+from apps.blog.models import ArticleIndexPage, ArticlePage
+from apps.blog.tests.factories import AuthorFactory
+from apps.blog.views import MAX_QUERY_LENGTH
+
+
+@pytest.fixture
+def search_articles(home_page):
+    """Create an article index with searchable articles."""
+    index = ArticleIndexPage(title="Articles", slug="articles")
+    home_page.add_child(instance=index)
+    author = AuthorFactory()
+    articles = []
+    for title, summary in [
+        ("Understanding LLM Benchmarks", "A deep dive into how language models are evaluated"),
+        ("Local Models on Apple Silicon", "Running open-source models on your MacBook"),
+        ("Agent Frameworks Compared", "Comparing LangChain, CrewAI, and AutoGen"),
+    ]:
+        a = ArticlePage(
+            title=title,
+            slug=title.lower().replace(" ", "-"),
+            author=author,
+            summary=summary,
+            body=[("rich_text", f"<p>{summary} in detail.</p>")],
+        )
+        index.add_child(instance=a)
+        a.save_revision().publish()
+        articles.append(a)
+    return articles
+
+
+@pytest.mark.django_db
+class TestSearchView:
+    def test_empty_query_returns_no_results(self, client, home_page):
+        resp = client.get("/search/")
+        assert resp.status_code == 200
+        assert resp.context["query"] == ""
+        assert resp.context["results"] is None
+
+    def test_whitespace_query_returns_no_results(self, client, home_page):
+        resp = client.get("/search/?q=   ")
+        assert resp.status_code == 200
+        assert resp.context["query"] == ""
+        assert resp.context["results"] is None
+
+    def test_search_returns_matching_articles(self, client, search_articles):
+        resp = client.get("/search/?q=benchmarks")
+        assert resp.status_code == 200
+        assert resp.context["query"] == "benchmarks"
+        assert resp.context["results"] is not None
+
+    def test_search_no_match_returns_empty_page(self, client, search_articles):
+        resp = client.get("/search/?q=zzzznonexistent")
+        assert resp.status_code == 200
+        assert resp.context["query"] == "zzzznonexistent"
+        # Either None or empty page object
+        results = resp.context["results"]
+        if results is not None:
+            assert len(list(results)) == 0
+
+    def test_query_is_truncated_to_max_length(self, client, home_page):
+        long_query = "a" * 500
+        resp = client.get(f"/search/?q={long_query}")
+        assert resp.status_code == 200
+        assert len(resp.context["query"]) <= MAX_QUERY_LENGTH
+
+    def test_query_preserved_in_template(self, client, search_articles):
+        resp = client.get("/search/?q=LLM")
+        html = resp.content.decode()
+        assert 'value="LLM"' in html
+
+    def test_search_results_page_renders(self, client, search_articles):
+        resp = client.get("/search/?q=models")
+        assert resp.status_code == 200
+        html = resp.content.decode()
+        assert "Search" in html
+
+    def test_search_url_resolves(self, client, home_page):
+        from django.urls import reverse
+        assert reverse("search") == "/search/"
+
+
+@pytest.mark.django_db
+class TestSearchFields:
+    def test_search_fields_include_summary(self):
+        field_names = [
+            f.field_name for f in ArticlePage.search_fields if hasattr(f, "field_name")
+        ]
+        assert "summary" in field_names
+
+    def test_search_fields_include_body_text(self):
+        field_names = [
+            f.field_name for f in ArticlePage.search_fields if hasattr(f, "field_name")
+        ]
+        assert "body_text" in field_names
+
+    def test_search_fields_include_autocomplete_title(self):
+        from wagtail.search.index import AutocompleteField
+        autocomplete_fields = [
+            f for f in ArticlePage.search_fields if isinstance(f, AutocompleteField)
+        ]
+        assert any(f.field_name == "title" for f in autocomplete_fields)
+
+    def test_search_fields_include_related_tags(self):
+        from wagtail.search.index import RelatedFields
+        related = [f for f in ArticlePage.search_fields if isinstance(f, RelatedFields)]
+        assert any(f.field_name == "tags" for f in related)
+
+    def test_body_text_excludes_code_blocks(self):
+        author = AuthorFactory()
+        article = ArticlePage(
+            title="Test",
+            slug="test",
+            author=author,
+            summary="summary",
+            body=[
+                ("rich_text", "<p>prose content here</p>"),
+                ("code", {"language": "python", "filename": "", "raw_code": "def secret(): pass"}),
+            ],
+        )
+        assert "prose content here" in article.body_text
+        assert "secret" not in article.body_text
+
+
+@pytest.mark.django_db
+class TestSearchNavIntegration:
+    def test_nav_contains_search_form(self, client, home_page):
+        resp = client.get("/")
+        html = resp.content.decode()
+        assert 'role="search"' in html
+        assert 'name="q"' in html
+        assert 'placeholder="Search articles..."' in html
+
+    def test_article_index_contains_search_form(self, client, home_page):
+        index = ArticleIndexPage(title="Articles", slug="articles")
+        home_page.add_child(instance=index)
+        resp = client.get("/articles/")
+        html = resp.content.decode()
+        assert 'name="q"' in html
--- a/apps/blog/tests/test_views.py
+++ b/apps/blog/tests/test_views.py
@@ -69,8 +69,9 @@ def test_newsletter_forms_render_in_nav_and_footer(client, home_page):
    resp = client.get("/")
    html = resp.content.decode()
    assert resp.status_code == 200
-    # Nav has a Subscribe CTA link (no inline form — wireframe spec)
-    assert 'href="#newsletter"' in html
+    # Nav has a search form instead of Subscribe CTA
+    assert 'role="search"' in html
+    assert 'name="q"' in html
    # Footer has Connect section with social/RSS links (no newsletter form)
    assert "Connect" in html
    assert 'name="source" value="nav"' not in html
--- a/apps/blog/views.py
+++ b/apps/blog/views.py
@@ -0,0 +1,43 @@
+from __future__ import annotations
+
+from django.core.paginator import EmptyPage, PageNotAnInteger, Paginator
+from django.http import HttpRequest, HttpResponse
+from django.template.response import TemplateResponse
+
+from apps.blog.models import ArticlePage
+
+RESULTS_PER_PAGE = 12
+MAX_QUERY_LENGTH = 200
+
+
+def search(request: HttpRequest) -> HttpResponse:
+    query = request.GET.get("q", "").strip()[:MAX_QUERY_LENGTH]
+    results_page = None
+    paginator = None
+
+    if query:
+        results = (
+            ArticlePage.objects.live()
+            .public()
+            .select_related("author", "category")
+            .prefetch_related("tags__metadata")
+            .search(query)
+        )
+        paginator = Paginator(results, RESULTS_PER_PAGE)
+        page_num = request.GET.get("page")
+        try:
+            results_page = paginator.page(page_num)
+        except PageNotAnInteger:
+            results_page = paginator.page(1)
+        except EmptyPage:
+            results_page = paginator.page(paginator.num_pages)
+
+    return TemplateResponse(
+        request,
+        "blog/search_results.html",
+        {
+            "query": query,
+            "results": results_page,
+            "paginator": paginator,
+        },
+    )