Skip to content

Commit

Permalink
URL slash fixes (#1614)
Browse files Browse the repository at this point in the history
* add a page template for crawlers to impact stoires

* move blocks around on impact page

* ensure all sitemap.xml urls don't end in a slash

* middleware cleanup - no trailing slashes on canonical urls

* add cards to impact section of page
  • Loading branch information
mwvolo authored Feb 27, 2025
1 parent 548c544 commit 7cbac13
Show file tree
Hide file tree
Showing 6 changed files with 191 additions and 150 deletions.
29 changes: 28 additions & 1 deletion global_settings/views.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
import inspect
from django.contrib.sitemaps import views as sitemap_views
from django.http import HttpResponseServerError, HttpResponse

from wagtail.contrib.sitemaps.sitemap_generator import Sitemap
from global_settings.functions import invalidate_cloudfront_caches


Expand All @@ -13,3 +15,28 @@ def clear_entire_cache(request):
invalidate_cloudfront_caches()
response = '<html><body><p>All Caches Invalidated</p></body></html>'
return HttpResponse(response)


def index(request, sitemaps, **kwargs):
sitemaps = prepare_sitemaps(request, sitemaps)
return sitemap_views.index(request, sitemaps, **kwargs)


def sitemap(request, sitemaps=None, **kwargs):
if sitemaps:
sitemaps = prepare_sitemaps(request, sitemaps)
else:
sitemaps = {"wagtail": Sitemap(request)}
return sitemap_views.sitemap(request, sitemaps, **kwargs)


def prepare_sitemaps(request, sitemaps):
initialised_sitemaps = {}
for name, sitemap_cls in sitemaps.items():
if inspect.isclass(sitemap_cls) and issubclass(sitemap_cls, Sitemap):
sitemap_instance = sitemap_cls(request)
sitemap_instance.sitemap_urls = [url.rstrip('/') for url in sitemap_instance.sitemap_urls]
initialised_sitemaps[name] = sitemap_instance
else:
initialised_sitemaps[name] = sitemap_cls
return initialised_sitemaps
228 changes: 88 additions & 140 deletions openstax/middleware.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,189 +14,137 @@
from pages.models import HomePage, Supporters, PrivacyPolicy, K12Subject, Subject, Subjects, RootPage


class HttpSmartRedirectResponse(HttpResponsePermanentRedirect):
pass


class CommonMiddlewareAppendSlashWithoutRedirect(CommonMiddleware):
""" This class converts HttpSmartRedirectResponse to the common response
of Django view, without redirect. This is necessary to match status_codes
for urls like /url?q=1 and /url/?q=1. If you don't use it, you will have 302
code always on pages without slash.
""" This class converts HttpResponsePermanentRedirect to the common response
of Django view, without redirect. This is necessary to match status_codes
for urls like /url?q=1 and /url/?q=1. If you don't use it, you will have 302
code always on pages without slash.
"""
response_redirect_class = HttpSmartRedirectResponse
response_redirect_class = HttpResponsePermanentRedirect

def __init__(self, *args, **kwargs):
# create django request resolver
self.handler = BaseHandler()
def __init__(self, *args, **kwargs):
# create django request resolver
self.handler = BaseHandler()

# prevent recursive includes
old = settings.MIDDLEWARE
name = self.__module__ + '.' + self.__class__.__name__
settings.MIDDLEWARE = [i for i in settings.MIDDLEWARE if i != name]
# prevent recursive includes
old = settings.MIDDLEWARE
name = self.__module__ + '.' + self.__class__.__name__
settings.MIDDLEWARE = [i for i in settings.MIDDLEWARE if i != name]

self.handler.load_middleware()
self.handler.load_middleware()

settings.MIDDLEWARE = old
super(CommonMiddlewareAppendSlashWithoutRedirect, self).__init__(*args, **kwargs)
settings.MIDDLEWARE = old
super().__init__(*args, **kwargs)

def get_full_path_with_slash(self, request):
""" Return the full path of the request with a trailing slash appended
without Exception in Debug mode
"""
new_path = request.get_full_path(force_append_slash=True)
# Prevent construction of scheme relative urls.
new_path = escape_leading_slashes(new_path)
return new_path
def get_full_path_with_slash(self, request):
""" Return the full path of the request with a trailing slash appended
without Exception in Debug mode
"""
# Prevent construction of scheme relative urls.
new_path = request.get_full_path(force_append_slash=True)
new_path = escape_leading_slashes(new_path)
return new_path

def process_response(self, request, response):
response = super(CommonMiddlewareAppendSlashWithoutRedirect, self).process_response(request, response)
def process_response(self, request, response):
response = super().process_response(request, response)

if isinstance(response, HttpSmartRedirectResponse):
if not request.path.endswith('/'):
request.path = request.path + '/'
# we don't need query string in path_info because it's in request.GET already
request.path_info = request.path
response = self.handler.get_response(request)
if isinstance(response, HttpResponsePermanentRedirect):
if not request.path.endswith('/'):
request.path = request.path + '/'
# we don't need query string in path_info because it's in request.GET already
response = self.handler.get_response(request)

return response
return response


class CommonMiddlewareOpenGraphRedirect(CommonMiddleware):
OG_USER_AGENTS = [
'baiduspider',
'bingbot',
'embedly',
'facebookbot',
'facebookexternalhit/1.1',
'facebookexternalhit',
'facebot',
'google.*snippet',
'googlebot',
'linkedinbot',
'MetadataScraper',
'outbrain',
'pinterest',
'pinterestbot',
'quora',
'quora link preview',
'rogerbot',
'showyoubot',
'slackbot',
'slackbot-linkexpanding',
'twitterbot',
'vkShare',
'W3C_Validator',
'WhatsApp',
'MetadataScraper',
'yandex',
'yahoo',
]
OG_USER_AGENTS = {
'baiduspider', 'bingbot', 'embedly', 'facebookbot', 'facebookexternalhit/1.1',
'facebookexternalhit', 'facebot', 'google.*snippet', 'googlebot', 'linkedinbot',
'MetadataScraper', 'outbrain', 'pinterest', 'pinterestbot', 'quora', 'quora link preview',
'rogerbot', 'showyoubot', 'slackbot', 'slackbot-linkexpanding', 'twitterbot', 'vkShare',
'W3C_Validator', 'WhatsApp', 'yandex', 'yahoo'
}

def __init__(self, get_response):
self.get_response = get_response

def __call__(self, request, *args, **kwargs):
if 'HTTP_USER_AGENT' in request.META:

user_agent = user_agent_parser.ParseUserAgent(request.META["HTTP_USER_AGENT"])
if user_agent['family'].lower() in self.OG_USER_AGENTS:
# url path minus the trailing /
url_path = unquote(request.get_full_path()[:-1])

full_url = unquote(request.build_absolute_uri())

# index of last / to find slug, except when there isn't a last /
if url_path == '':
page_slug = "home"
else:
index = url_path.rindex('/')
page_slug = url_path[index+1:]
page_slug = "home" if url_path == '' else url_path.rsplit('/', 1)[-1]

if self.redirect_path_found(url_path):
# supporters page has the wrong slug
if page_slug == 'foundation':
page_slug = 'supporters'

# look up correct object based on path
if '/details/books/' in url_path:
page = Book.objects.filter(slug=page_slug)
elif '/blog/' in url_path:
page = NewsArticle.objects.filter(slug=page_slug)
elif '/privacy' in url_path:
page = PrivacyPolicy.objects.filter(slug='privacy-policy')
elif '/k12' in url_path:
page = K12Subject.objects.filter(slug='k12-' + page_slug)
elif '/subjects' in url_path:
flag = FeatureFlag.objects.filter(name='new_subjects')
if flag[0].feature_active:
if page_slug == 'subjects':
page_slug = 'new-subjects'
page = Subjects.objects.filter(slug=page_slug)
else:
page = Subject.objects.filter(slug=page_slug+'-books')
else:
page_slug = 'subjects'
page = BookIndex.objects.filter(slug=page_slug)
else:
page = self.page_by_slug(page_slug)

page = self.get_page(url_path, page_slug)
if page:
template = self.build_template(page[0], full_url)
return HttpResponse(template)
else:
return self.get_response(request)
else:
return self.get_response(request)
return self.get_response(request)

def get_page(self, url_path, page_slug):
if '/details/books/' in url_path:
return Book.objects.filter(slug=page_slug)
elif '/blog/' in url_path:
return NewsArticle.objects.filter(slug=page_slug)
elif '/privacy' in url_path:
return PrivacyPolicy.objects.filter(slug='privacy-policy')
elif '/k12' in url_path:
return K12Subject.objects.filter(slug='k12-' + page_slug)
elif '/subjects' in url_path:
flag = FeatureFlag.objects.filter(name='new_subjects')
if flag[0].feature_active:
if page_slug == 'subjects':
page_slug = 'new-subjects'
return Subjects.objects.filter(slug=page_slug)
else:
return Subject.objects.filter(slug=page_slug + '-books')
else:
return BookIndex.objects.filter(slug='subjects')
else:
return self.page_by_slug(page_slug)

def build_template(self, page, page_url):
page_url = page_url.rstrip('/')
image_url = self.image_url(page.promote_image)
template = '<!DOCTYPE html> <html> <head> <meta charset="utf-8">\n'
template += ' <title>' + str(page.title) + '</title>\n'
template += ' <meta name="description" content="{}" >\n'.format(page.search_description)
template += ' <link rel="canonical" href="{}" />\n'.format(page_url)
template += ' <meta property="og:url" content="{}" />\n'.format(page_url)
template += ' <meta property="og:type" content="article" />\n'
template += ' <meta property="og:title" content="{}" />\n'.format(page.title)
template += ' <meta property="og:description" content="{}" />\n'.format(page.search_description)
template += ' <meta property="og:image" content="{}" />\n'.format(image_url)
template += ' <meta property="og:image:alt" content="{}" />\n'.format(page.title)
template += ' <meta name="twitter:card" content="summary_large_image">\n'
template += ' <meta name="twitter:site" content="@OpenStax">\n'
template += ' <meta name="twitter:title" content="{}">\n'.format(page.title)
template += ' <meta name="twitter:description" content="{}">\n'.format(page.search_description)
template += ' <meta name="twitter:image" content="{}">\n'.format(image_url)
template += ' <meta name="twitter:image:alt" content="OpenStax">\n'
template += '</head><body></body></html>'
return template
return f'''<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<title>{page.title}</title>
<meta name="description" content="{page.search_description}">
<link rel="canonical" href="{page_url}">
<meta property="og:url" content="{page_url}">
<meta property="og:type" content="article">
<meta property="og:title" content="{page.title}">
<meta property="og:description" content="{page.search_description}">
<meta property="og:image" content="{image_url}">
<meta property="og:image:alt" content="{page.title}">
<meta name="twitter:card" content="summary_large_image">
<meta name="twitter:site" content="@OpenStax">
<meta name="twitter:title" content="{page.title}">
<meta name="twitter:description" content="{page.search_description}">
<meta name="twitter:image" content="{image_url}">
<meta name="twitter:image:alt" content="OpenStax">
</head>
<body></body>
</html>'''

def redirect_path_found(self, url_path):
if '/blog/' in url_path or '/details/books/' in url_path or '/foundation' in url_path or '/privacy' in url_path or '/subjects' in url_path or '' == url_path:
return True
elif '/k12' in url_path:
last_slash = url_path.rfind('/')
k12_index = url_path.rfind('k12')
if last_slash < k12_index:
return False
else:
return True
else:
return False
return any(substring in url_path for substring in ['/blog/', '/details/books/', '/foundation', '/privacy', '/subjects', '']) or '/k12' in url_path

def image_url(self, image):
image_url = build_image_url(image)
if not image_url:
return ''
return image_url
return build_image_url(image) or ''

def page_by_slug(self, page_slug):
if page_slug == 'supporters':
return Supporters.objects.all()
if page_slug == 'openstax-homepage':
return HomePage.objects.filter(locale=1)
if page_slug == 'home':
return RootPage.objects.filter(locale=1)



return RootPage.objects.filter(locale=1)
4 changes: 1 addition & 3 deletions openstax/urls.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,14 @@
from wagtail.admin import urls as wagtailadmin_urls
from wagtail import urls as wagtail_urls
from wagtail.documents import urls as wagtaildocs_urls
from wagtail.images.views.serve import ServeView
from accounts import urls as accounts_urls

from .api import api_router
from news.search import search
from news.feeds import RssBlogFeed, AtomBlogFeed

from api import urls as api_urls
from global_settings.views import throw_error, clear_entire_cache
from wagtail.contrib.sitemaps.views import sitemap
from global_settings.views import throw_error, clear_entire_cache, sitemap

admin.site.site_header = 'OpenStax'

Expand Down
61 changes: 61 additions & 0 deletions pages/migrations/0154_alter_impact_improving_access.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
# Generated by Django 5.0.12 on 2025-02-26 04:42

import wagtail.fields
from django.db import migrations


class Migration(migrations.Migration):

dependencies = [
("pages", "0153_alter_homepage_options"),
]

operations = [
migrations.AlterField(
model_name="impact",
name="improving_access",
field=wagtail.fields.StreamField(
[("content", 12)],
block_lookup={
0: ("wagtail.images.blocks.ImageChooserBlock", (), {"required": False}),
1: ("wagtail.blocks.CharBlock", (), {"required": False}),
2: ("wagtail.blocks.URLBlock", (), {"required": False}),
3: ("pages.custom_blocks.ImageFormatChoiceBlock", (), {}),
4: (
"wagtail.blocks.CharBlock",
(),
{"help_text": "Used by the frontend for Google Analytics.", "required": False},
),
5: (
"wagtail.blocks.StructBlock",
[[("image", 0), ("alt_text", 1), ("link", 2), ("alignment", 3), ("identifier", 4)]],
{},
),
6: ("wagtail.blocks.CharBlock", (), {}),
7: ("wagtail.blocks.RichTextBlock", (), {}),
8: ("wagtail.blocks.URLBlock", (), {}),
9: ("pages.custom_blocks.APIImageChooserBlock", (), {"required": False}),
10: (
"wagtail.blocks.StructBlock",
[[("icon", 9), ("description", 6), ("link_text", 1), ("link_href", 2)]],
{},
),
11: ("wagtail.blocks.ListBlock", (10,), {}),
12: (
"wagtail.blocks.StructBlock",
[
[
("image", 5),
("heading", 6),
("description", 7),
("button_text", 6),
("button_href", 8),
("cards", 11),
]
],
{},
),
},
),
),
]
Loading

0 comments on commit 7cbac13

Please sign in to comment.