Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions blog/admin.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,9 @@ class EntryAdmin(admin.ModelAdmin):
"pub_date",
"is_active",
"is_published",
"is_searchable",
"author",
)
list_filter = ("is_active", "is_searchable")
list_filter = ("is_active",)
exclude = ("summary_html", "body_html")
prepopulated_fields = {"slug": ("headline",)}
raw_id_fields = ["social_media_card"]
Expand Down
3 changes: 0 additions & 3 deletions blog/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,6 @@ def published(self):
def active(self):
return self.filter(is_active=True)

def searchable(self):
return self.filter(is_searchable=True)


class ContentFormat(models.TextChoices):
REST = "reST", "reStructuredText"
Expand Down
20 changes: 0 additions & 20 deletions blog/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,26 +70,6 @@ def test_manager_published(self):
transform=lambda entry: entry.headline,
)

def test_manager_searchable(self):
"""
Make sure that the Entry manager's `searchable` method works
"""
Entry.objects.create(
pub_date=self.yesterday,
is_searchable=False,
headline="not searchable",
slug="a",
)
Entry.objects.create(
pub_date=self.yesterday, is_searchable=True, headline="searchable", slug="b"
)

self.assertQuerySetEqual(
Entry.objects.searchable(),
["searchable"],
transform=lambda entry: entry.headline,
)

def test_docutils_safe(self):
"""
Make sure docutils' file inclusion directives are disabled by default.
Expand Down
2 changes: 1 addition & 1 deletion djangoproject/scss/_style.scss
Original file line number Diff line number Diff line change
Expand Up @@ -2650,7 +2650,7 @@ search.filters {
position: relative;

a {
padding: 10px 20px;
padding: 10px 15px;
text-decoration: none;
border-bottom: 3px solid transparent;
transition: color 0.3s ease, border-bottom 0.3s ease;
Expand Down
76 changes: 76 additions & 0 deletions djangoproject/sitemaps.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
from dataclasses import dataclass

from django.contrib import sitemaps
from django_hosts.resolvers import reverse


@dataclass
class URLObject:
name: str
host: str = "www"


class LocationAbsoluteUrlMixin:
def get_urls(self, site=None, **kwargs):
"""
Prevent the Django sitemap framework from prefixing the domain.
Use the absolute URL returned by location().
"""
urls = []
for item in self.items():
loc = self.location(item)
urls.append(
{
"location": loc,
"lastmod": None,
"changefreq": self.changefreq,
"priority": self.priority,
}
)
return urls


class StaticViewSitemap(LocationAbsoluteUrlMixin, sitemaps.Sitemap):
priority = 0.5
changefreq = "monthly"

def items(self):
return [
# accounts
URLObject("registration_register"),
# aggregator
URLObject("community-index"),
URLObject("community-ecosystem"),
URLObject("local-django-communities"),
# contact
URLObject("contact_foundation"),
# dashboard
URLObject("dashboard-index", host="dashboard"),
URLObject("metric-list", host="dashboard"),
# djangoproject
URLObject("homepage"),
URLObject("overview"),
URLObject("start"),
URLObject("code_of_conduct"),
URLObject("conduct_faq"),
URLObject("conduct_reporting"),
URLObject("conduct_enforcement"),
URLObject("conduct_changes"),
URLObject("diversity"),
URLObject("diversity_changes"),
# foundation
URLObject("foundation_meeting_archive_index"),
# fundraising
URLObject("fundraising:index"),
# members
URLObject("members:individual-members"),
URLObject("members:corporate-members"),
URLObject("members:corporate-members-join"),
URLObject("members:corporate-members-badges"),
URLObject("members:teams"),
# releases
URLObject("download"),
]

def location(self, item):
return reverse(item.name, host=item.host)
7 changes: 7 additions & 0 deletions djangoproject/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -202,6 +202,7 @@ def test_single_h1_per_page(self):
"styleguide/", # Has multiple <h1> examples.
"admin/", # Admin templates are out of our control.
"reset/done/", # Uses an admin template.
"sitemap.xml",
]
resolver = get_resolver()
urls = self.extract_patterns(resolver.url_patterns)
Expand All @@ -211,3 +212,9 @@ def test_single_h1_per_page(self):
response = self.client.get(url)
self.assertEqual(response.status_code, 200)
self.assertContains(response, "<h1", count=1)


class SiteMapTests(TestCase):
def test_sitemap_renders(self):
response = self.client.get(reverse("sitemap"))
self.assertEqual(response.status_code, 200)
3 changes: 3 additions & 0 deletions djangoproject/urls/www.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from aggregator.feeds import CommunityAggregatorFeed, CommunityAggregatorFirehoseFeed
from blog.feeds import WeblogEntryFeed
from blog.sitemaps import WeblogSitemap
from djangoproject.sitemaps import StaticViewSitemap
from foundation.feeds import FoundationMinutesFeed
from foundation.views import CoreDevelopers

Expand All @@ -21,6 +22,7 @@
sitemaps = {
"weblog": WeblogSitemap,
"flatpages": FlatPageSitemap,
"static": StaticViewSitemap,
}


Expand Down Expand Up @@ -135,6 +137,7 @@
"sitemap.xml",
cache_page(60 * 60 * 6)(sitemap_views.sitemap),
{"sitemaps": sitemaps},
name="sitemap",
),
path("weblog/", include("blog.urls")),
path("download/", include("releases.urls")),
Expand Down
2 changes: 2 additions & 0 deletions docs/management/commands/update_docs.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,8 @@ def build_doc_release(self, release, force=False, interactive=False):
if self.verbosity >= 1:
self.stdout.write(f"Starting update for {release} at {datetime.now()}...")

release.sync_from_sitemap(force=force)

# checkout_dir is shared for all languages.
checkout_dir = settings.DOCS_BUILD_ROOT / "sources" / release.version
parent_build_dir = settings.DOCS_BUILD_ROOT / release.lang / release.version
Expand Down
103 changes: 47 additions & 56 deletions docs/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from functools import partial, reduce
from pathlib import Path

import requests
from django.conf import settings
from django.contrib.postgres.indexes import GinIndex
from django.contrib.postgres.search import (
Expand All @@ -26,19 +27,19 @@
from django.utils.html import strip_tags
from django_hosts.resolvers import reverse

from blog.models import Entry
from releases.models import Release

from . import utils
from .search import (
DEFAULT_TEXT_SEARCH_CONFIG,
SEARCHABLE_VIEWS,
START_SEL,
STOP_SEL,
TSEARCH_CONFIG_LANGUAGES,
DocumentationCategory,
fetch_html,
get_document_search_vector,
)
from .utils import extract_inner_html


def get_search_config(lang):
Expand Down Expand Up @@ -185,7 +186,7 @@ def sync_to_db(self, decoded_documents):
the database. Deletes all the release's documents first then
reinserts them as needed.
"""
self.documents.all().delete()
self.documents.exclude(metadata__parents=DocumentationCategory.WEBSITE).delete()

# Read excluded paths from robots.docs.txt.
robots_path = settings.BASE_DIR / "djangoproject" / "static" / "robots.docs.txt"
Expand Down Expand Up @@ -216,81 +217,63 @@ def sync_to_db(self, decoded_documents):
metadata=document,
config=get_search_config(self.lang),
)
for document in self.documents.all():
for document in self.documents.exclude(
metadata__parents=DocumentationCategory.WEBSITE
):
document.metadata["breadcrumbs"] = list(
Document.objects.breadcrumbs(document).values("title", "path")
)
document.save(update_fields=("metadata",))

self._sync_blog_to_db()
self._sync_views_to_db()
def sync_from_sitemap(self, force=False):
from djangoproject.urls.www import sitemaps

def _sync_blog_to_db(self):
"""
Sync the blog entries into search based on the release documents
support end date.
"""
if self.lang != "en":
return # The blog is only written in English currently
if not self.is_dev:
return

entries = Entry.objects.published().searchable()
Document.objects.bulk_create(
[
Document(
release=self,
path=entry.get_absolute_url(),
title=entry.headline,
metadata={
"body": entry.body_html,
"breadcrumbs": [
{
"path": DocumentationCategory.WEBSITE,
"title": "News",
},
],
"parents": DocumentationCategory.WEBSITE,
"slug": entry.slug,
"title": entry.headline,
"toc": "",
},
config=get_search_config(self.lang),
)
for entry in entries
]
)
if force:
Document.objects.filter(
metadata__parents=DocumentationCategory.WEBSITE
).delete()
Comment on lines +234 to +237
Copy link
Contributor Author

@sarahboyce sarahboyce Oct 22, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In future we could add a last_modified field to Document and check the sitemap last modified and make updates according to this.
But for now, I think having a cron job to build the English dev docs with force (update_docs --force --language=en dev) running every 2 weeks (or even every month) will be good enough to update changes to the webiste pages for search and remove any stale pages. This content doesn't change very often, nor do we remove content often


def _sync_views_to_db(self):
"""
Sync the specific views into search based on the release documents
support end date.
"""
if self.lang != "en":
return # The searchable views are only written in English currently
doc_urls = set(
Document.objects.filter(
metadata__parents=DocumentationCategory.WEBSITE
).values_list("path", flat=True)
)

Document.objects.bulk_create(
[
Document(
for sitemap in sitemaps.values():
for url in sitemap().get_urls():
path = url["location"]
if path in doc_urls:
continue
try:
page_html = fetch_html(path)
except requests.RequestException:
continue
try:
main_html = extract_inner_html(page_html, tag="main")
title = extract_inner_html(page_html, tag="h1")
except ValueError:
continue
Document.objects.create(
release=self,
path=searchable_view.www_absolute_url,
title=searchable_view.page_title,
path=path,
title=title,
metadata={
"body": searchable_view.html,
"body": main_html,
"breadcrumbs": [
{
"path": DocumentationCategory.WEBSITE,
"title": "Website",
},
],
"parents": DocumentationCategory.WEBSITE,
"slug": searchable_view.url_name,
"title": searchable_view.page_title,
"title": title,
"toc": "",
},
config=get_search_config(self.lang),
)
for searchable_view in SEARCHABLE_VIEWS
]
)


def _clean_document_path(path):
Expand Down Expand Up @@ -351,6 +334,14 @@ def search(self, query_text, release, document_category=None):
config=models.F("config"),
)
base_filter = Q(release_id=release.id)
if release.lang == settings.DEFAULT_LANGUAGE_CODE and not release.is_dev:
dev_release = DocumentRelease.objects.get_by_version_and_lang(
version="dev", lang=settings.DEFAULT_LANGUAGE_CODE
)
base_filter |= Q(
release_id=dev_release.id,
metadata__parents=DocumentationCategory.WEBSITE,
)
if document_category:
base_filter &= Q(metadata__parents__startswith=document_category)
base_qs = (
Expand Down
Loading