From 0dab68b7e5534c749c7e1961dae0c214a7c8874c Mon Sep 17 00:00:00 2001 From: Anthony Johnson Date: Wed, 24 Sep 2025 17:50:03 -0700 Subject: [PATCH 1/7] Add cache tags to offline format file downloads --- readthedocs/api/mixins.py | 6 ++++-- readthedocs/projects/views/public.py | 10 +++++++++- readthedocs/proxito/tests/test_full.py | 3 +++ 3 files changed, 16 insertions(+), 3 deletions(-) diff --git a/readthedocs/api/mixins.py b/readthedocs/api/mixins.py index a6eb1f76695..806bfdded92 100644 --- a/readthedocs/api/mixins.py +++ b/readthedocs/api/mixins.py @@ -18,7 +18,9 @@ class CDNCacheTagsMixin: Add cache tags for project and version to the response of this view. The view inheriting this mixin should implement the - `self._get_project` and `self._get_version` methods. + `self._get_project` and `self._get_version` methods, + or set `self._cache_tags` to a list of cache tags if the project/version is + not a class/instance variable. If `self._get_version` returns `None`, only the project level tags are added. @@ -30,7 +32,7 @@ class CDNCacheTagsMixin: def dispatch(self, request, *args, **kwargs): response = super().dispatch(request, *args, **kwargs) - cache_tags = self._get_cache_tags() + cache_tags = getattr(self, "_cache_tags", self._get_cache_tags()) if cache_tags: add_cache_tags(response, cache_tags) return response diff --git a/readthedocs/projects/views/public.py b/readthedocs/projects/views/public.py index b8fa6b94bce..5f1823a748b 100644 --- a/readthedocs/projects/views/public.py +++ b/readthedocs/projects/views/public.py @@ -23,6 +23,7 @@ from django.views.generic import ListView from taggit.models import Tag +from readthedocs.api.mixins import CDNCacheTagsMixin from readthedocs.builds.constants import BUILD_STATE_FINISHED from readthedocs.builds.constants import EXTERNAL from readthedocs.builds.constants import INTERNAL @@ -32,6 +33,7 @@ from readthedocs.core.mixins import CDNCacheControlMixin from readthedocs.core.permissions import AdminPermission from readthedocs.core.resolver import Resolver +from readthedocs.core.utils import get_cache_tag from readthedocs.core.utils.extend import SettingsOverrideObject from readthedocs.notifications.models import Notification from readthedocs.projects.filters import ProjectVersionListFilterSet @@ -299,7 +301,7 @@ def verify_project_token(cls, token, project_slug): project_badge = never_cache(ProjectBadgeView.as_view()) -class ProjectDownloadMediaBase(CDNCacheControlMixin, ServeDocsMixin, View): +class ProjectDownloadMediaBase(CDNCacheControlMixin, CDNCacheTagsMixin, ServeDocsMixin, View): # Use new-style URLs (same domain as docs) or old-style URLs (dashboard URL) same_domain_url = False @@ -382,6 +384,12 @@ def get( slug=version_slug, ) + # For cache tag mixin + self._cache_tags = [ + project.slug, + get_cache_tag(project.slug, version.slug), + ] + return self._serve_dowload( request=request, project=version.project, diff --git a/readthedocs/proxito/tests/test_full.py b/readthedocs/proxito/tests/test_full.py index 0643ac84605..02486647143 100644 --- a/readthedocs/proxito/tests/test_full.py +++ b/readthedocs/proxito/tests/test_full.py @@ -547,6 +547,7 @@ def test_download_files_public_version(self): headers={"host": "project.dev.readthedocs.io"}, ) self.assertEqual(resp.status_code, 200) + self.assertEqual(resp["Cache-Tag"], "project,project:latest") extension = "zip" if type_ == MEDIA_TYPE_HTMLZIP else type_ self.assertEqual( resp["X-Accel-Redirect"], @@ -560,6 +561,7 @@ def test_download_files_public_version(self): headers={"host": "project.dev.readthedocs.io"}, ) self.assertEqual(resp.status_code, 200) + self.assertEqual(resp["Cache-Tag"], "translation,translation:latest") extension = "zip" if type_ == MEDIA_TYPE_HTMLZIP else type_ self.assertEqual( resp["X-Accel-Redirect"], @@ -604,6 +606,7 @@ def test_download_files_private_version(self): headers={"host": "project.dev.readthedocs.io"}, ) self.assertEqual(resp.status_code, 200) + self.assertEqual(resp["Cache-Tag"], "project,project:latest") extension = "zip" if type_ == MEDIA_TYPE_HTMLZIP else type_ self.assertEqual( resp["X-Accel-Redirect"], From 5803310daea80ae1e5b5db80a3fcc168158a8461 Mon Sep 17 00:00:00 2001 From: Anthony Johnson Date: Thu, 25 Sep 2025 17:26:46 -0700 Subject: [PATCH 2/7] Refactor CDN tag mixin to centralize tag calls --- readthedocs/api/mixins.py | 38 ++++++++++++++++++++-------- readthedocs/projects/views/public.py | 7 +---- 2 files changed, 29 insertions(+), 16 deletions(-) diff --git a/readthedocs/api/mixins.py b/readthedocs/api/mixins.py index 806bfdded92..2d2e49554a6 100644 --- a/readthedocs/api/mixins.py +++ b/readthedocs/api/mixins.py @@ -17,13 +17,8 @@ class CDNCacheTagsMixin: """ Add cache tags for project and version to the response of this view. - The view inheriting this mixin should implement the - `self._get_project` and `self._get_version` methods, - or set `self._cache_tags` to a list of cache tags if the project/version is - not a class/instance variable. - - If `self._get_version` returns `None`, - only the project level tags are added. + The view inheriting this mixin can either call :py:method:`set_cache_tags` or + implement the ``self._get_project`` and ``self._get_version`` methods. You can add an extra per-project tag by overriding the `project_cache_tag` attribute. """ @@ -37,18 +32,28 @@ def dispatch(self, request, *args, **kwargs): add_cache_tags(response, cache_tags) return response - def _get_cache_tags(self): + def _get_cache_tags(self, project=None, version=None): """ Get cache tags for this view. + This returns an array of tag identifiers used to tag the response at CDN. + + If project or version are not passed in, these values will come from the + methods ``_get_project()`` and ``_get_version()``. + If ``_get_version()`` returns ``None``, only the project level tags are added. + + It's easier to use :py:method:`set_cache_tags` if project/version aren't + set at the instance level, or if they are passed in through a method + like `get()`. + .. warning:: This method is run at the end of the request, so any exceptions like 404 should be caught. """ try: - project = self._get_project() - version = self._get_version() + project = project if project is not None else self._get_project() + version = version if version is not None else self._get_version() except Exception: log.warning( "Error while retrieving project or version for this view.", @@ -65,6 +70,19 @@ def _get_cache_tags(self): tags.append(get_cache_tag(project.slug, self.project_cache_tag)) return tags + def set_cache_tags(self, project=None, version=None): + """ + Store cache tags to be added to response. + + This method can be used if project/version do not exist on the view + instance or if they are passed into the view through a method like + ``get()``. + + The attribute methods ``_get_project()``/``_get_version()`` aren`t used + in this pattern. + """ + self._cache_tags = self._get_cache_tags(project, version) + class EmbedAPIMixin: """ diff --git a/readthedocs/projects/views/public.py b/readthedocs/projects/views/public.py index 5f1823a748b..37efcdb6554 100644 --- a/readthedocs/projects/views/public.py +++ b/readthedocs/projects/views/public.py @@ -33,7 +33,6 @@ from readthedocs.core.mixins import CDNCacheControlMixin from readthedocs.core.permissions import AdminPermission from readthedocs.core.resolver import Resolver -from readthedocs.core.utils import get_cache_tag from readthedocs.core.utils.extend import SettingsOverrideObject from readthedocs.notifications.models import Notification from readthedocs.projects.filters import ProjectVersionListFilterSet @@ -384,11 +383,7 @@ def get( slug=version_slug, ) - # For cache tag mixin - self._cache_tags = [ - project.slug, - get_cache_tag(project.slug, version.slug), - ] + self.set_cache_tags(project=project, version=version) return self._serve_dowload( request=request, From 2df76b6fe846473172fb0b1824630f4003170c21 Mon Sep 17 00:00:00 2001 From: Anthony Johnson Date: Thu, 25 Sep 2025 17:31:00 -0700 Subject: [PATCH 3/7] Alter to allow `_get_cache_tags(project=Project(), version=None)` --- readthedocs/api/mixins.py | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/readthedocs/api/mixins.py b/readthedocs/api/mixins.py index 2d2e49554a6..d4fbdcc5acd 100644 --- a/readthedocs/api/mixins.py +++ b/readthedocs/api/mixins.py @@ -38,28 +38,29 @@ def _get_cache_tags(self, project=None, version=None): This returns an array of tag identifiers used to tag the response at CDN. - If project or version are not passed in, these values will come from the + If project and version are not passed in, these values will come from the methods ``_get_project()`` and ``_get_version()``. If ``_get_version()`` returns ``None``, only the project level tags are added. It's easier to use :py:method:`set_cache_tags` if project/version aren't set at the instance level, or if they are passed in through a method - like `get()`. + like ``get()``. .. warning:: This method is run at the end of the request, so any exceptions like 404 should be caught. """ - try: - project = project if project is not None else self._get_project() - version = version if version is not None else self._get_version() - except Exception: - log.warning( - "Error while retrieving project or version for this view.", - exc_info=True, - ) - return [] + if project is None and version is None: + try: + project = self._get_project() + version = self._get_version() + except Exception: + log.warning( + "Error while retrieving project or version for this view.", + exc_info=True, + ) + return [] tags = [] if project: From 7a086c7eab3bb5c55eb36d944ef5e58b52bedd6f Mon Sep 17 00:00:00 2001 From: Anthony Johnson Date: Thu, 25 Sep 2025 18:03:03 -0700 Subject: [PATCH 4/7] Use version.project, project is not always set --- readthedocs/projects/views/public.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/readthedocs/projects/views/public.py b/readthedocs/projects/views/public.py index 37efcdb6554..391bf4b09b5 100644 --- a/readthedocs/projects/views/public.py +++ b/readthedocs/projects/views/public.py @@ -383,7 +383,7 @@ def get( slug=version_slug, ) - self.set_cache_tags(project=project, version=version) + self.set_cache_tags(project=version.project, version=version) return self._serve_dowload( request=request, From e52e916f2ab45da1ebc27e399db3c6c714166921 Mon Sep 17 00:00:00 2001 From: Anthony Johnson Date: Wed, 1 Oct 2025 16:51:43 -0700 Subject: [PATCH 5/7] Expand refactor to include proxito views --- readthedocs/proxito/views/serve.py | 45 +++++++++--------------------- 1 file changed, 13 insertions(+), 32 deletions(-) diff --git a/readthedocs/proxito/views/serve.py b/readthedocs/proxito/views/serve.py index a1f1d1e965a..b75cb68c986 100644 --- a/readthedocs/proxito/views/serve.py +++ b/readthedocs/proxito/views/serve.py @@ -704,6 +704,10 @@ def get(self, request): "sitemap_url": sitemap_url, "hidden_paths": self._get_hidden_paths(project), } + + # Cache tag only for project, don't include version cache tag + self.set_cache_tags(project=project) + return render( request, "robots.txt", @@ -720,17 +724,6 @@ def _get_hidden_paths(self, project): ] return hidden_paths - def _get_project(self): - # Method used by the CDNCacheTagsMixin class. - return self.request.unresolved_domain.project - - def _get_version(self): - # Method used by the CDNCacheTagsMixin class. - # This view isn't explicitly mapped to a version, - # but it can be when we serve a custom robots.txt file. - # TODO: refactor how we set cache tags to avoid this. - return None - class ServeRobotsTXT(SettingsOverrideObject): _default_class = ServeRobotsTXTBase @@ -877,6 +870,9 @@ def changefreqs_generator(): versions.append(element) + # Cache tag only for project, don't include version cache tag + self.set_cache_tags(project=project) + context = { "versions": versions, } @@ -887,16 +883,6 @@ def changefreqs_generator(): content_type="application/xml", ) - def _get_project(self): - # Method used by the CDNCacheTagsMixin class. - return self.request.unresolved_domain.project - - def _get_version(self): - # Method used by the CDNCacheTagsMixin class. - # This view isn't explicitly mapped to a version, - # TODO: refactor how we set cache tags to avoid this. - return None - class ServeSitemapXML(SettingsOverrideObject): _default_class = ServeSitemapXMLBase @@ -917,25 +903,20 @@ class ServeStaticFiles(CDNCacheControlMixin, CDNCacheTagsMixin, ServeDocsMixin, def get(self, request, filename): try: - return self._serve_static_file(request=request, filename=filename) + resp = self._serve_static_file(request=request, filename=filename) + # Cache tag only for project, don't include version cache tag + self.set_cache_tags(project=request.unresolved_domain.project) + return resp except InvalidPathError: raise Http404 - def _get_cache_tags(self): + def _get_cache_tags(self, project=None, version=None): """ Add an additional *global* tag. This is so we can purge all files from all projects with one single call. """ - tags = super()._get_cache_tags() + tags = super()._get_cache_tags(project=project, version=version) tags.append(self.project_cache_tag) return tags - - def _get_project(self): - # Method used by the CDNCacheTagsMixin class. - return self.request.unresolved_domain.project - - def _get_version(self): - # This view isn't attached to a version. - return None From 10b62a8f30c839c8563dc92fa9714000822f0d0a Mon Sep 17 00:00:00 2001 From: Anthony Johnson Date: Wed, 1 Oct 2025 18:23:04 -0700 Subject: [PATCH 6/7] Support robots.txt per version cache tag --- readthedocs/proxito/tests/test_full.py | 5 +++++ readthedocs/proxito/views/serve.py | 2 ++ 2 files changed, 7 insertions(+) diff --git a/readthedocs/proxito/tests/test_full.py b/readthedocs/proxito/tests/test_full.py index 02486647143..768c2d7ffa5 100644 --- a/readthedocs/proxito/tests/test_full.py +++ b/readthedocs/proxito/tests/test_full.py @@ -857,6 +857,7 @@ def test_default_robots_txt(self, storage_exists): reverse("robots_txt"), headers={"host": "project.readthedocs.io"} ) self.assertEqual(response.status_code, 200) + self.assertEqual(response.headers["Cache-Tag"], "project") expected = dedent( """ User-agent: * @@ -909,6 +910,7 @@ def test_default_robots_txt_disallow_hidden_versions(self, storage_exists): reverse("robots_txt"), headers={"host": "project.readthedocs.io"} ) self.assertEqual(response.status_code, 200) + self.assertEqual(response.headers["Cache-Tag"], "project") expected = dedent( """ User-agent: * @@ -932,6 +934,7 @@ def test_default_robots_txt_private_version(self, storage_exists): reverse("robots_txt"), headers={"host": "project.readthedocs.io"} ) self.assertEqual(response.status_code, 404) + self.assertEqual(response.headers["Cache-Tag"], "project") def test_custom_robots_txt(self): self.project.versions.update(active=True, built=True) @@ -942,6 +945,7 @@ def test_custom_robots_txt(self): response["x-accel-redirect"], "/proxito/media/html/project/latest/robots.txt", ) + self.assertEqual(response.headers["Cache-Tag"], "project,project:version") def test_custom_robots_txt_private_version(self): self.project.versions.update( @@ -951,6 +955,7 @@ def test_custom_robots_txt_private_version(self): reverse("robots_txt"), headers={"host": "project.readthedocs.io"} ) self.assertEqual(response.status_code, 404) + self.assertEqual(response.headers["Cache-Tag"], "project,project:version") def test_directory_indexes(self): self.project.versions.update(active=True, built=True) diff --git a/readthedocs/proxito/views/serve.py b/readthedocs/proxito/views/serve.py index b75cb68c986..527413c2836 100644 --- a/readthedocs/proxito/views/serve.py +++ b/readthedocs/proxito/views/serve.py @@ -690,6 +690,8 @@ def get(self, request): filename="robots.txt", check_if_exists=True, ) + # Cache tags for project and version + self.set_cache_tags(project=project, version=version) log.info("Serving custom robots.txt file.") return response except StorageFileNotFound: From f4b316d8237abeeebce85d0670ef9bee9741866f Mon Sep 17 00:00:00 2001 From: Anthony Johnson Date: Wed, 1 Oct 2025 22:14:49 -0700 Subject: [PATCH 7/7] Fix tests --- readthedocs/proxito/tests/test_full.py | 10 +++++----- readthedocs/proxito/views/serve.py | 10 ++++++---- 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/readthedocs/proxito/tests/test_full.py b/readthedocs/proxito/tests/test_full.py index 768c2d7ffa5..b614549530a 100644 --- a/readthedocs/proxito/tests/test_full.py +++ b/readthedocs/proxito/tests/test_full.py @@ -857,7 +857,7 @@ def test_default_robots_txt(self, storage_exists): reverse("robots_txt"), headers={"host": "project.readthedocs.io"} ) self.assertEqual(response.status_code, 200) - self.assertEqual(response.headers["Cache-Tag"], "project") + self.assertEqual(response.headers["Cache-Tag"], "project,project:robots.txt") expected = dedent( """ User-agent: * @@ -910,7 +910,7 @@ def test_default_robots_txt_disallow_hidden_versions(self, storage_exists): reverse("robots_txt"), headers={"host": "project.readthedocs.io"} ) self.assertEqual(response.status_code, 200) - self.assertEqual(response.headers["Cache-Tag"], "project") + self.assertEqual(response.headers["Cache-Tag"], "project,project:robots.txt") expected = dedent( """ User-agent: * @@ -934,7 +934,7 @@ def test_default_robots_txt_private_version(self, storage_exists): reverse("robots_txt"), headers={"host": "project.readthedocs.io"} ) self.assertEqual(response.status_code, 404) - self.assertEqual(response.headers["Cache-Tag"], "project") + self.assertNotIn("Cache-Tag", response.headers) def test_custom_robots_txt(self): self.project.versions.update(active=True, built=True) @@ -945,7 +945,7 @@ def test_custom_robots_txt(self): response["x-accel-redirect"], "/proxito/media/html/project/latest/robots.txt", ) - self.assertEqual(response.headers["Cache-Tag"], "project,project:version") + self.assertEqual(response.headers["Cache-Tag"], "project,project:latest,project:robots.txt") def test_custom_robots_txt_private_version(self): self.project.versions.update( @@ -955,7 +955,7 @@ def test_custom_robots_txt_private_version(self): reverse("robots_txt"), headers={"host": "project.readthedocs.io"} ) self.assertEqual(response.status_code, 404) - self.assertEqual(response.headers["Cache-Tag"], "project,project:version") + self.assertNotIn("Cache-Tag", response.headers) def test_directory_indexes(self): self.project.versions.update(active=True, built=True) diff --git a/readthedocs/proxito/views/serve.py b/readthedocs/proxito/views/serve.py index 527413c2836..06ac09c2685 100644 --- a/readthedocs/proxito/views/serve.py +++ b/readthedocs/proxito/views/serve.py @@ -640,6 +640,10 @@ def get(self, request): """ project = request.unresolved_domain.project + # Set cache tag only for project for now, for responses that don't use a + # specific version instance. + self.set_cache_tags(project=project) + if project.delisted: return render( request, @@ -690,7 +694,8 @@ def get(self, request): filename="robots.txt", check_if_exists=True, ) - # Cache tags for project and version + # Set project and version cache tags, override project only tag as + # we want to purge this file each time the version changes too. self.set_cache_tags(project=project, version=version) log.info("Serving custom robots.txt file.") return response @@ -707,9 +712,6 @@ def get(self, request): "hidden_paths": self._get_hidden_paths(project), } - # Cache tag only for project, don't include version cache tag - self.set_cache_tags(project=project) - return render( request, "robots.txt",