feat: implement article search with PostgreSQL full-text search
Some checks failed
CI / nightly-e2e (pull_request) Has been skipped
CI / deploy (pull_request) Has been skipped
CI / ci (pull_request) Failing after 1m21s
CI / pr-e2e (pull_request) Successful in 1m33s

- Configure Wagtail database search backend with English search config
- Add django.contrib.postgres to INSTALLED_APPS for full PG FTS support
- Expand ArticlePage.search_fields: body_text (excl. code blocks),
  AutocompleteField(title), RelatedFields(tags), FilterFields
- Add search view at /search/?q= with query guards (strip, max 200 chars,
  empty/whitespace handling) and pagination preserving query param
- Replace nav Subscribe CTA with compact search box (desktop + mobile)
- Add search box to article index page alongside category/tag filters
- Create search results template reusing article_card component
- Add update_index to deploy entrypoint for automated reindexing
- Update existing tests for nav change, add comprehensive search tests

Closes #41

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
Mark
2026-03-03 21:25:11 +00:00
parent eebd5c9978
commit 906206d4cd
11 changed files with 299 additions and 8 deletions

View File

@@ -235,8 +235,27 @@ class ArticlePage(SeoMixin, Page):
search_fields = Page.search_fields + [
index.SearchField("summary"),
index.SearchField("body_text", es_extra={"analyzer": "english"}),
index.AutocompleteField("title"),
index.RelatedFields("tags", [
index.SearchField("name"),
]),
index.FilterField("category"),
index.FilterField("published_date"),
]
@property
def body_text(self) -> str:
"""Extract prose text from body StreamField, excluding code blocks."""
parts: list[str] = []
for block in self.body:
if block.block_type == "code":
continue
value = block.value
text = value.source if hasattr(value, "source") else str(value)
parts.append(text)
return " ".join(parts)
def save(self, *args: Any, **kwargs: Any) -> None:
if not self.category_id:
self.category, _ = Category.objects.get_or_create(

View File

@@ -0,0 +1,140 @@
import pytest
from apps.blog.models import ArticleIndexPage, ArticlePage
from apps.blog.tests.factories import AuthorFactory
from apps.blog.views import MAX_QUERY_LENGTH
@pytest.fixture
def search_articles(home_page):
"""Create an article index with searchable articles."""
index = ArticleIndexPage(title="Articles", slug="articles")
home_page.add_child(instance=index)
author = AuthorFactory()
articles = []
for title, summary in [
("Understanding LLM Benchmarks", "A deep dive into how language models are evaluated"),
("Local Models on Apple Silicon", "Running open-source models on your MacBook"),
("Agent Frameworks Compared", "Comparing LangChain, CrewAI, and AutoGen"),
]:
a = ArticlePage(
title=title,
slug=title.lower().replace(" ", "-"),
author=author,
summary=summary,
body=[("rich_text", f"<p>{summary} in detail.</p>")],
)
index.add_child(instance=a)
a.save_revision().publish()
articles.append(a)
return articles
@pytest.mark.django_db
class TestSearchView:
def test_empty_query_returns_no_results(self, client, home_page):
resp = client.get("/search/")
assert resp.status_code == 200
assert resp.context["query"] == ""
assert resp.context["results"] is None
def test_whitespace_query_returns_no_results(self, client, home_page):
resp = client.get("/search/?q= ")
assert resp.status_code == 200
assert resp.context["query"] == ""
assert resp.context["results"] is None
def test_search_returns_matching_articles(self, client, search_articles):
resp = client.get("/search/?q=benchmarks")
assert resp.status_code == 200
assert resp.context["query"] == "benchmarks"
assert resp.context["results"] is not None
def test_search_no_match_returns_empty_page(self, client, search_articles):
resp = client.get("/search/?q=zzzznonexistent")
assert resp.status_code == 200
assert resp.context["query"] == "zzzznonexistent"
# Either None or empty page object
results = resp.context["results"]
if results is not None:
assert len(list(results)) == 0
def test_query_is_truncated_to_max_length(self, client, home_page):
long_query = "a" * 500
resp = client.get(f"/search/?q={long_query}")
assert resp.status_code == 200
assert len(resp.context["query"]) <= MAX_QUERY_LENGTH
def test_query_preserved_in_template(self, client, search_articles):
resp = client.get("/search/?q=LLM")
html = resp.content.decode()
assert 'value="LLM"' in html
def test_search_results_page_renders(self, client, search_articles):
resp = client.get("/search/?q=models")
assert resp.status_code == 200
html = resp.content.decode()
assert "Search" in html
def test_search_url_resolves(self, client, home_page):
from django.urls import reverse
assert reverse("search") == "/search/"
@pytest.mark.django_db
class TestSearchFields:
def test_search_fields_include_summary(self):
field_names = [
f.field_name for f in ArticlePage.search_fields if hasattr(f, "field_name")
]
assert "summary" in field_names
def test_search_fields_include_body_text(self):
field_names = [
f.field_name for f in ArticlePage.search_fields if hasattr(f, "field_name")
]
assert "body_text" in field_names
def test_search_fields_include_autocomplete_title(self):
from wagtail.search.index import AutocompleteField
autocomplete_fields = [
f for f in ArticlePage.search_fields if isinstance(f, AutocompleteField)
]
assert any(f.field_name == "title" for f in autocomplete_fields)
def test_search_fields_include_related_tags(self):
from wagtail.search.index import RelatedFields
related = [f for f in ArticlePage.search_fields if isinstance(f, RelatedFields)]
assert any(f.field_name == "tags" for f in related)
def test_body_text_excludes_code_blocks(self):
author = AuthorFactory()
article = ArticlePage(
title="Test",
slug="test",
author=author,
summary="summary",
body=[
("rich_text", "<p>prose content here</p>"),
("code", {"language": "python", "filename": "", "raw_code": "def secret(): pass"}),
],
)
assert "prose content here" in article.body_text
assert "secret" not in article.body_text
@pytest.mark.django_db
class TestSearchNavIntegration:
def test_nav_contains_search_form(self, client, home_page):
resp = client.get("/")
html = resp.content.decode()
assert 'role="search"' in html
assert 'name="q"' in html
assert 'placeholder="Search articles..."' in html
def test_article_index_contains_search_form(self, client, home_page):
index = ArticleIndexPage(title="Articles", slug="articles")
home_page.add_child(instance=index)
resp = client.get("/articles/")
html = resp.content.decode()
assert 'name="q"' in html

View File

@@ -69,8 +69,9 @@ def test_newsletter_forms_render_in_nav_and_footer(client, home_page):
resp = client.get("/")
html = resp.content.decode()
assert resp.status_code == 200
# Nav has a Subscribe CTA link (no inline form — wireframe spec)
assert 'href="#newsletter"' in html
# Nav has a search form instead of Subscribe CTA
assert 'role="search"' in html
assert 'name="q"' in html
# Footer has Connect section with social/RSS links (no newsletter form)
assert "Connect" in html
assert 'name="source" value="nav"' not in html

43
apps/blog/views.py Normal file
View File

@@ -0,0 +1,43 @@
from __future__ import annotations
from django.core.paginator import EmptyPage, PageNotAnInteger, Paginator
from django.http import HttpRequest, HttpResponse
from django.template.response import TemplateResponse
from apps.blog.models import ArticlePage
RESULTS_PER_PAGE = 12
MAX_QUERY_LENGTH = 200
def search(request: HttpRequest) -> HttpResponse:
query = request.GET.get("q", "").strip()[:MAX_QUERY_LENGTH]
results_page = None
paginator = None
if query:
results = (
ArticlePage.objects.live()
.public()
.select_related("author", "category")
.prefetch_related("tags__metadata")
.search(query)
)
paginator = Paginator(results, RESULTS_PER_PAGE)
page_num = request.GET.get("page")
try:
results_page = paginator.page(page_num)
except PageNotAnInteger:
results_page = paginator.page(1)
except EmptyPage:
results_page = paginator.page(paginator.num_pages)
return TemplateResponse(
request,
"blog/search_results.html",
{
"query": query,
"results": results_page,
"paginator": paginator,
},
)