2
2
import html
3
3
import json
4
4
import operator
5
+ import re
5
6
from functools import reduce
6
7
from pathlib import Path
7
8
@@ -187,6 +188,10 @@ def sync_to_db(self, decoded_documents):
187
188
document_path = _clean_document_path (document ["current_page_name" ])
188
189
document ["slug" ] = Path (document_path ).parts [- 1 ]
189
190
document ["parents" ] = " " .join (Path (document_path ).parts [:- 1 ])
191
+ document ["code_references" ] = _generate_code_references (document ["body" ])
192
+ document ["code_references_search" ] = " " .join (
193
+ document ["code_references" ].keys ()
194
+ )
190
195
Document .objects .create (
191
196
release = self ,
192
197
path = document_path ,
@@ -213,6 +218,52 @@ def _clean_document_path(path):
213
218
return path
214
219
215
220
221
+ def _generate_code_references (body ):
222
+ """
223
+ Django documents classes with the syntax `.. class::`.
224
+ This results in the following HTML:
225
+ <dl class="py class">
226
+ <dt class="sig sig-object py" id="django.db.models.ManyToManyField">
227
+ ...
228
+ </dt>
229
+ </dl>
230
+ This is similar for attributes (`.. attribute::`), methods etc.
231
+ """
232
+ # Collect all <dt> HTML tag ids into a list, e.g:
233
+ # [
234
+ # 'django.db.models.Index',
235
+ # 'django.db.models.Index.expressions',
236
+ # 'django.db.models.Index.fields',
237
+ # ...
238
+ # ]
239
+ code_references = list (re .findall (r'<dt[^>]+id="([^"]+)"' , body ))
240
+ # As the search term can be "expressions", "Index.expressions" etc. create a mapping
241
+ # between potential code search terms and their HTML id.
242
+ # {
243
+ # 'django.db.models.Index': 'django.db.models.Index',
244
+ # 'Index': 'django.db.models.Index',
245
+ # 'models.Index': 'django.db.models.Index',
246
+ # 'db.models.Index': 'django.db.models.Index',
247
+ # 'django.db.models.Index.expressions': 'django.db.models.Index.expressions',
248
+ # 'expressions': 'django.db.models.Index.expressions',
249
+ # 'Index.expressions': 'django.db.models.Index.expressions',
250
+ # 'models.Index.expressions': 'django.db.models.Index.expressions',
251
+ # 'db.models.Index.expressions': 'django.db.models.Index.expressions',
252
+ # 'django.db.models.Index.fields': 'django.db.models.Index.fields',
253
+ # 'fields': 'django.db.models.Index.fields',
254
+ # 'Index.fields': 'django.db.models.Index.fields',
255
+ # 'models.Index.fields': 'django.db.models.Index.fields',
256
+ # 'db.models.Index.fields': 'django.db.models.Index.fields',
257
+ # ...
258
+ # }
259
+ code_paths = {}
260
+ for reference in code_references :
261
+ code_path = reference .split ("." )
262
+ for i in range (len (code_path )):
263
+ code_paths ["." .join (code_path [- i :])] = reference
264
+ return code_paths
265
+
266
+
216
267
def document_url (doc ):
217
268
if doc .path :
218
269
kwargs = {
@@ -251,7 +302,10 @@ def search(self, query_text, release):
251
302
search_query = SearchQuery (
252
303
query_text , config = models .F ("config" ), search_type = "websearch"
253
304
)
254
- search_rank = SearchRank (models .F ("search" ), search_query )
305
+ search_query_simple = SearchQuery (
306
+ query_text , config = "simple" , search_type = "websearch"
307
+ )
308
+ search_rank = SearchRank (models .F ("search" ), search_query_simple )
255
309
base_qs = (
256
310
self .prefetch_related (
257
311
Prefetch (
@@ -287,7 +341,7 @@ def search(self, query_text, release):
287
341
)
288
342
vector_qs = (
289
343
base_qs .alias (rank = search_rank )
290
- .filter (search = search_query )
344
+ .filter (search = search_query_simple )
291
345
.order_by ("-rank" )
292
346
)
293
347
if not vector_qs :
0 commit comments