2
2
import html
3
3
import json
4
4
import operator
5
+ import re
5
6
from functools import reduce
6
7
from pathlib import Path
7
8
@@ -187,6 +188,10 @@ def sync_to_db(self, decoded_documents):
187
188
document_path = _clean_document_path (document ["current_page_name" ])
188
189
document ["slug" ] = Path (document_path ).parts [- 1 ]
189
190
document ["parents" ] = " " .join (Path (document_path ).parts [:- 1 ])
191
+ document ["code_references" ] = _generate_code_references (document ["body" ])
192
+ document ["code_references_search" ] = " " .join (
193
+ document ["code_references" ].keys ()
194
+ )
190
195
Document .objects .create (
191
196
release = self ,
192
197
path = document_path ,
@@ -213,6 +218,52 @@ def _clean_document_path(path):
213
218
return path
214
219
215
220
221
+ def _generate_code_references (body ):
222
+ """
223
+ Django documents classes with the syntax `.. class::`.
224
+ This results in the following HTML:
225
+ <dl class="py class">
226
+ <dt class="sig sig-object py" id="django.db.models.ManyToManyField">
227
+ ...
228
+ </dt>
229
+ </dl>
230
+ This is similar for attributes (`.. attribute::`), methods etc.
231
+ """
232
+ # Collect all <dt> HTML tag ids into a list, e.g:
233
+ # [
234
+ # 'django.db.models.Index',
235
+ # 'django.db.models.Index.expressions',
236
+ # 'django.db.models.Index.fields',
237
+ # ...
238
+ # ]
239
+ code_references = list (re .findall (r'<dt[^>]+id="([^"]+)"' , body ))
240
+ # As the search term can be "expressions", "Index.expressions" etc. create a mapping
241
+ # between potential code search terms and their HTML id.
242
+ # {
243
+ # 'django.db.models.Index': 'django.db.models.Index',
244
+ # 'Index': 'django.db.models.Index',
245
+ # 'models.Index': 'django.db.models.Index',
246
+ # 'db.models.Index': 'django.db.models.Index',
247
+ # 'django.db.models.Index.expressions': 'django.db.models.Index.expressions',
248
+ # 'expressions': 'django.db.models.Index.expressions',
249
+ # 'Index.expressions': 'django.db.models.Index.expressions',
250
+ # 'models.Index.expressions': 'django.db.models.Index.expressions',
251
+ # 'db.models.Index.expressions': 'django.db.models.Index.expressions',
252
+ # 'django.db.models.Index.fields': 'django.db.models.Index.fields',
253
+ # 'fields': 'django.db.models.Index.fields',
254
+ # 'Index.fields': 'django.db.models.Index.fields',
255
+ # 'models.Index.fields': 'django.db.models.Index.fields',
256
+ # 'db.models.Index.fields': 'django.db.models.Index.fields',
257
+ # ...
258
+ # }
259
+ code_paths = {}
260
+ for reference in code_references :
261
+ code_path = reference .split ("." )
262
+ for i in range (len (code_path )):
263
+ code_paths ["." .join (code_path [- i :])] = reference
264
+ return code_paths
265
+
266
+
216
267
def document_url (doc ):
217
268
if doc .path :
218
269
kwargs = {
@@ -249,7 +300,7 @@ def search(self, query_text, release):
249
300
query_text = query_text .strip ()
250
301
if query_text :
251
302
search_query = SearchQuery (
252
- query_text , config = models . F ( "config" ) , search_type = "websearch"
303
+ query_text , config = "simple" , search_type = "websearch"
253
304
)
254
305
search_rank = SearchRank (models .F ("search" ), search_query )
255
306
base_qs = (
0 commit comments