Skip to content

Commit cdc5c23

Browse files
committed
Allowed search results for Django code terms which contain stop words.
1 parent 0070473 commit cdc5c23

File tree

2 files changed

+61
-2
lines changed

2 files changed

+61
-2
lines changed

docs/models.py

+56-2
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
import html
33
import json
44
import operator
5+
import re
56
from functools import reduce
67
from pathlib import Path
78

@@ -187,6 +188,10 @@ def sync_to_db(self, decoded_documents):
187188
document_path = _clean_document_path(document["current_page_name"])
188189
document["slug"] = Path(document_path).parts[-1]
189190
document["parents"] = " ".join(Path(document_path).parts[:-1])
191+
document["code_references"] = _generate_code_references(document["body"])
192+
document["code_references_search"] = " ".join(
193+
document["code_references"].keys()
194+
)
190195
Document.objects.create(
191196
release=self,
192197
path=document_path,
@@ -213,6 +218,52 @@ def _clean_document_path(path):
213218
return path
214219

215220

221+
def _generate_code_references(body):
222+
"""
223+
Django documents classes with the syntax `.. class::`.
224+
This results in the following HTML:
225+
<dl class="py class">
226+
<dt class="sig sig-object py" id="django.db.models.ManyToManyField">
227+
...
228+
</dt>
229+
</dl>
230+
This is similar for attributes (`.. attribute::`), methods etc.
231+
"""
232+
# Collect all <dt> HTML tag ids into a list, e.g:
233+
# [
234+
# 'django.db.models.Index',
235+
# 'django.db.models.Index.expressions',
236+
# 'django.db.models.Index.fields',
237+
# ...
238+
# ]
239+
code_references = list(re.findall(r'<dt[^>]+id="([^"]+)"', body))
240+
# As the search term can be "expressions", "Index.expressions" etc. create a mapping
241+
# between potential code search terms and their HTML id.
242+
# {
243+
# 'django.db.models.Index': 'django.db.models.Index',
244+
# 'Index': 'django.db.models.Index',
245+
# 'models.Index': 'django.db.models.Index',
246+
# 'db.models.Index': 'django.db.models.Index',
247+
# 'django.db.models.Index.expressions': 'django.db.models.Index.expressions',
248+
# 'expressions': 'django.db.models.Index.expressions',
249+
# 'Index.expressions': 'django.db.models.Index.expressions',
250+
# 'models.Index.expressions': 'django.db.models.Index.expressions',
251+
# 'db.models.Index.expressions': 'django.db.models.Index.expressions',
252+
# 'django.db.models.Index.fields': 'django.db.models.Index.fields',
253+
# 'fields': 'django.db.models.Index.fields',
254+
# 'Index.fields': 'django.db.models.Index.fields',
255+
# 'models.Index.fields': 'django.db.models.Index.fields',
256+
# 'db.models.Index.fields': 'django.db.models.Index.fields',
257+
# ...
258+
# }
259+
code_paths = {}
260+
for reference in code_references:
261+
code_path = reference.split(".")
262+
for i in range(len(code_path)):
263+
code_paths[".".join(code_path[-i:])] = reference
264+
return code_paths
265+
266+
216267
def document_url(doc):
217268
if doc.path:
218269
kwargs = {
@@ -251,7 +302,10 @@ def search(self, query_text, release):
251302
search_query = SearchQuery(
252303
query_text, config=models.F("config"), search_type="websearch"
253304
)
254-
search_rank = SearchRank(models.F("search"), search_query)
305+
search_query_simple = SearchQuery(
306+
query_text, config="simple", search_type="websearch"
307+
)
308+
search_rank = SearchRank(models.F("search"), search_query_simple)
255309
base_qs = (
256310
self.prefetch_related(
257311
Prefetch(
@@ -287,7 +341,7 @@ def search(self, query_text, release):
287341
)
288342
vector_qs = (
289343
base_qs.alias(rank=search_rank)
290-
.filter(search=search_query)
344+
.filter(search=search_query_simple)
291345
.order_by("-rank")
292346
)
293347
if not vector_qs:

docs/search.py

+5
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,11 @@
4343
SearchVector("title", weight="A", config=F("config"))
4444
+ SearchVector(KeyTextTransform("slug", "metadata"), weight="A", config=F("config"))
4545
+ SearchVector(KeyTextTransform("toc", "metadata"), weight="B", config=F("config"))
46+
+ SearchVector(
47+
KeyTextTransform("code_references_search", "metadata"),
48+
weight="B",
49+
config="simple",
50+
)
4651
+ SearchVector(KeyTextTransform("body", "metadata"), weight="C", config=F("config"))
4752
+ SearchVector(
4853
KeyTextTransform("parents", "metadata"), weight="D", config=F("config")

0 commit comments

Comments
 (0)