2
2
import html
3
3
import json
4
4
import operator
5
+ import re
5
6
from functools import reduce
6
7
from pathlib import Path
7
8
15
16
TrigramSimilarity ,
16
17
)
17
18
from django .core .cache import cache
18
- from django .db import models , transaction
19
+ from django .db import models , transaction , connection
19
20
from django .db .models import Prefetch , Q
20
21
from django .db .models .fields .json import KeyTextTransform
21
22
from django .utils .functional import cached_property
@@ -174,6 +175,18 @@ def sync_to_db(self, decoded_documents):
174
175
if line .startswith (f"Disallow: /{ self .lang } /{ self .release_id } /" )
175
176
]
176
177
178
+ language_mapping = TSEARCH_CONFIG_LANGUAGES
179
+ english = "custom_english"
180
+ with connection .cursor () as cursor :
181
+ cursor .execute (
182
+ "SELECT EXISTS(SELECT 1 FROM pg_ts_config WHERE cfgname = %s)" ,
183
+ [english ]
184
+ )
185
+ has_custom_english_config = cursor .fetchone ()[0 ]
186
+
187
+ if has_custom_english_config :
188
+ language_mapping ["en" ] = english
189
+
177
190
for document in decoded_documents :
178
191
if (
179
192
"body" not in document
@@ -187,12 +200,16 @@ def sync_to_db(self, decoded_documents):
187
200
document_path = _clean_document_path (document ["current_page_name" ])
188
201
document ["slug" ] = Path (document_path ).parts [- 1 ]
189
202
document ["parents" ] = " " .join (Path (document_path ).parts [:- 1 ])
203
+ document ["code_references" ] = _generate_code_references (document ["body" ])
204
+ document ["code_references_search" ] = " " .join (
205
+ document ["code_references" ].keys ()
206
+ )
190
207
Document .objects .create (
191
208
release = self ,
192
209
path = document_path ,
193
210
title = html .unescape (strip_tags (document ["title" ])),
194
211
metadata = document ,
195
- config = TSEARCH_CONFIG_LANGUAGES .get (
212
+ config = language_mapping .get (
196
213
self .lang [:2 ], DEFAULT_TEXT_SEARCH_CONFIG
197
214
),
198
215
)
@@ -213,6 +230,52 @@ def _clean_document_path(path):
213
230
return path
214
231
215
232
233
+ def _generate_code_references (body ):
234
+ """
235
+ Django documents classes with the syntax `.. class::`.
236
+ This results in the following HTML:
237
+ <dl class="py class">
238
+ <dt class="sig sig-object py" id="django.db.models.ManyToManyField">
239
+ ...
240
+ </dt>
241
+ </dl>
242
+ This is similar for attributes (`.. attribute::`), methods etc.
243
+ """
244
+ # Collect all <dt> HTML tag ids into a list, e.g:
245
+ # [
246
+ # 'django.db.models.Index',
247
+ # 'django.db.models.Index.expressions',
248
+ # 'django.db.models.Index.fields',
249
+ # ...
250
+ # ]
251
+ code_references = list (re .findall (r'<dt[^>]+id="([^"]+)"' , body ))
252
+ # As the search term can be "expressions", "Index.expressions" etc. create a mapping
253
+ # between potential code search terms and their HTML id.
254
+ # {
255
+ # 'django.db.models.Index': 'django.db.models.Index',
256
+ # 'Index': 'django.db.models.Index',
257
+ # 'models.Index': 'django.db.models.Index',
258
+ # 'db.models.Index': 'django.db.models.Index',
259
+ # 'django.db.models.Index.expressions': 'django.db.models.Index.expressions',
260
+ # 'expressions': 'django.db.models.Index.expressions',
261
+ # 'Index.expressions': 'django.db.models.Index.expressions',
262
+ # 'models.Index.expressions': 'django.db.models.Index.expressions',
263
+ # 'db.models.Index.expressions': 'django.db.models.Index.expressions',
264
+ # 'django.db.models.Index.fields': 'django.db.models.Index.fields',
265
+ # 'fields': 'django.db.models.Index.fields',
266
+ # 'Index.fields': 'django.db.models.Index.fields',
267
+ # 'models.Index.fields': 'django.db.models.Index.fields',
268
+ # 'db.models.Index.fields': 'django.db.models.Index.fields',
269
+ # ...
270
+ # }
271
+ code_paths = {}
272
+ for reference in code_references :
273
+ code_path = reference .split ("." )
274
+ for i in range (len (code_path )):
275
+ code_paths ["." .join (code_path [- i :])] = reference
276
+ return code_paths
277
+
278
+
216
279
def document_url (doc ):
217
280
if doc .path :
218
281
kwargs = {
0 commit comments