Skip to content

Commit 812885c

Browse files
committed
✨(backend) some refactor of indexer classes & modules
Rename FindDocumentIndexer as SearchIndexer Rename FindDocumentSerializer as SearchDocumentSerializer Rename package core.tasks.find as core.task.search Remove logs on http errors in SearchIndexer Factorise some code in search API view. Signed-off-by: Fabre Florian <[email protected]>
1 parent fa31d31 commit 812885c

File tree

10 files changed

+112
-111
lines changed

10 files changed

+112
-111
lines changed

env.d/development/common

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ Y_PROVIDER_API_KEY=yprovider-api-key
7979
THEME_CUSTOMIZATION_CACHE_TIMEOUT=15
8080

8181
# Indexer
82-
SEARCH_INDEXER_CLASS="core.services.search_indexers.FindDocumentIndexer"
82+
SEARCH_INDEXER_CLASS="core.services.search_indexers.SearchIndexer"
8383
SEARCH_INDEXER_SECRET=find-api-key-for-docs-with-exactly-50-chars-length # Key generated by create_demo in Find app.
8484
SEARCH_INDEXER_URL="http://find:8000/api/v1.0/documents/index/"
8585
SEARCH_INDEXER_QUERY_URL="http://find:8000/api/v1.0/documents/search/"

src/backend/core/api/serializers.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -890,8 +890,8 @@ class MoveDocumentSerializer(serializers.Serializer):
890890
)
891891

892892

893-
class FindDocumentSerializer(serializers.Serializer):
894-
"""Serializer for Find search requests"""
893+
class SearchDocumentSerializer(serializers.Serializer):
894+
"""Serializer for fulltext search requests through Find application"""
895895

896896
q = serializers.CharField(required=True, allow_blank=False, trim_whitespace=True)
897897
page_size = serializers.IntegerField(

src/backend/core/api/viewsets.py

Lines changed: 45 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -1051,63 +1051,72 @@ def duplicate(self, request, *args, **kwargs):
10511051
{"id": str(duplicated_document.id)}, status=status.HTTP_201_CREATED
10521052
)
10531053

1054+
def _simple_search_queryset(self, params):
1055+
"""
1056+
Returns a queryset filtered by the content of the document title
1057+
"""
1058+
text = params.validated_data["q"]
1059+
1060+
# As the 'list' view we get a prefiltered queryset (deleted docs are excluded)
1061+
queryset = self.get_queryset()
1062+
filterset = DocumentFilter({"title": text}, queryset=queryset)
1063+
1064+
if not filterset.is_valid():
1065+
raise drf.exceptions.ValidationError(filterset.errors)
1066+
1067+
return filterset.filter_queryset(queryset)
1068+
1069+
def _fulltext_search_queryset(self, indexer, token, user, params):
1070+
"""
1071+
Returns a queryset from the results the fulltext search of Find
1072+
"""
1073+
text = params.validated_data["q"]
1074+
queryset = models.Document.objects.all()
1075+
1076+
# Retrieve the documents ids from Find.
1077+
results = indexer.search(
1078+
text=text,
1079+
token=token,
1080+
visited=get_visited_document_ids_of(queryset, user),
1081+
page=params.validated_data.get("page", 1),
1082+
page_size=params.validated_data.get("page_size", 20),
1083+
)
1084+
1085+
return queryset.filter(pk__in=results)
1086+
10541087
@drf.decorators.action(detail=False, methods=["get"], url_path="search")
10551088
@method_decorator(refresh_oidc_access_token)
10561089
def search(self, request, *args, **kwargs):
10571090
"""
10581091
Returns a DRF response containing the filtered, annotated and ordered document list.
10591092
1060-
Applies filtering based on request parameter 'q' from `FindDocumentSerializer`.
1093+
Applies filtering based on request parameter 'q' from `SearchDocumentSerializer`.
10611094
Depending of the configuration it can be:
10621095
- A fulltext search through the opensearch indexation app "find" if the backend is
1063-
enabled (see SEARCH_BACKEND_CLASS)
1096+
enabled (see SEARCH_INDEXER_CLASS)
10641097
- A filtering by the model field 'title'.
10651098
10661099
The ordering is always by the most recent first.
10671100
"""
10681101
access_token = request.session.get("oidc_access_token")
10691102
user = request.user
10701103

1071-
serializer = serializers.FindDocumentSerializer(data=request.query_params)
1072-
serializer.is_valid(raise_exception=True)
1104+
params = serializers.SearchDocumentSerializer(data=request.query_params)
1105+
params.is_valid(raise_exception=True)
10731106

10741107
indexer = get_document_indexer()
1075-
text = serializer.validated_data["q"]
1076-
1077-
# The indexer is not configured, so we fallback on a simple filter on the
1078-
# model field 'title'.
1079-
if not indexer:
1080-
# As the 'list' view we get a prefiltered queryset (deleted docs are excluded)
1081-
queryset = self.get_queryset()
1082-
filterset = DocumentFilter({"title": text}, queryset=queryset)
1083-
1084-
if not filterset.is_valid():
1085-
raise drf.exceptions.ValidationError(filterset.errors)
10861108

1087-
queryset = filterset.filter_queryset(queryset).order_by("-updated_at")
1088-
1089-
return self.get_response_for_queryset(
1090-
queryset,
1091-
context={
1092-
"request": request,
1093-
},
1109+
if indexer:
1110+
queryset = self._fulltext_search_queryset(
1111+
indexer, token=access_token, user=user, params=params
10941112
)
1095-
1096-
queryset = models.Document.objects.all()
1097-
1098-
# Retrieve the documents ids from Find.
1099-
results = indexer.search(
1100-
text=text,
1101-
token=access_token,
1102-
visited=get_visited_document_ids_of(queryset, user),
1103-
page=serializer.validated_data.get("page", 1),
1104-
page_size=serializer.validated_data.get("page_size", 20),
1105-
)
1106-
1107-
queryset = queryset.filter(pk__in=results).order_by("-updated_at")
1113+
else:
1114+
# The indexer is not configured, we fallback on a simple icontains filter by the
1115+
# model field 'title'.
1116+
queryset = self._simple_search_queryset(params)
11081117

11091118
return self.get_response_for_queryset(
1110-
queryset,
1119+
queryset.order_by("-updated_at"),
11111120
context={
11121121
"request": request,
11131122
},

src/backend/core/services/search_indexers.py

Lines changed: 16 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -223,7 +223,7 @@ def search_query(self, data, token) -> dict:
223223
"""
224224

225225

226-
class FindDocumentIndexer(BaseDocumentIndexer):
226+
class SearchIndexer(BaseDocumentIndexer):
227227
"""
228228
Document indexer that pushes documents to La Suite Find app.
229229
"""
@@ -270,18 +270,14 @@ def search_query(self, data, token) -> requests.Response:
270270
Returns:
271271
dict: A JSON-serializable dictionary.
272272
"""
273-
try:
274-
response = requests.post(
275-
self.search_url,
276-
json=data,
277-
headers={"Authorization": f"Bearer {token}"},
278-
timeout=10,
279-
)
280-
response.raise_for_status()
281-
return response.json()
282-
except requests.exceptions.HTTPError as e:
283-
logger.error("HTTPError: %s", e)
284-
raise
273+
response = requests.post(
274+
self.search_url,
275+
json=data,
276+
headers={"Authorization": f"Bearer {token}"},
277+
timeout=10,
278+
)
279+
response.raise_for_status()
280+
return response.json()
285281

286282
def push(self, data):
287283
"""
@@ -290,14 +286,10 @@ def push(self, data):
290286
Args:
291287
data (list): List of document dictionaries.
292288
"""
293-
try:
294-
response = requests.post(
295-
self.indexer_url,
296-
json=data,
297-
headers={"Authorization": f"Bearer {self.indexer_secret}"},
298-
timeout=10,
299-
)
300-
response.raise_for_status()
301-
except requests.exceptions.HTTPError as e:
302-
logger.error("HTTPError: %s", e)
303-
raise
289+
response = requests.post(
290+
self.indexer_url,
291+
json=data,
292+
headers={"Authorization": f"Bearer {self.indexer_secret}"},
293+
timeout=10,
294+
)
295+
response.raise_for_status()

src/backend/core/signals.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
from django.dispatch import receiver
1010

1111
from . import models
12-
from .tasks.find import trigger_document_indexer
12+
from .tasks.search import trigger_document_indexer
1313

1414

1515
@receiver(signals.post_save, sender=models.Document)
File renamed without changes.

src/backend/core/tests/commands/test_index.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,15 +11,15 @@
1111
import pytest
1212

1313
from core import factories
14-
from core.services.search_indexers import FindDocumentIndexer
14+
from core.services.search_indexers import SearchIndexer
1515

1616

1717
@pytest.mark.django_db
1818
@pytest.mark.usefixtures("indexer_settings")
1919
def test_index():
2020
"""Test the command `index` that run the Find app indexer for all the available documents."""
2121
user = factories.UserFactory()
22-
indexer = FindDocumentIndexer()
22+
indexer = SearchIndexer()
2323

2424
with transaction.atomic():
2525
doc = factories.DocumentFactory()
@@ -36,7 +36,7 @@ def test_index():
3636
str(no_title_doc.path): {"users": [user.sub]},
3737
}
3838

39-
with mock.patch.object(FindDocumentIndexer, "push") as mock_push:
39+
with mock.patch.object(SearchIndexer, "push") as mock_push:
4040
call_command("index")
4141

4242
push_call_args = [call.args[0] for call in mock_push.call_args_list]

src/backend/core/tests/conftest.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ def indexer_settings_fixture(settings):
3939

4040
get_document_indexer.cache_clear()
4141

42-
settings.SEARCH_INDEXER_CLASS = "core.services.search_indexers.FindDocumentIndexer"
42+
settings.SEARCH_INDEXER_CLASS = "core.services.search_indexers.SearchIndexer"
4343
settings.SEARCH_INDEXER_SECRET = "ThisIsAKeyForTest"
4444
settings.SEARCH_INDEXER_URL = "http://localhost:8081/api/v1.0/documents/index/"
4545
settings.SEARCH_INDEXER_QUERY_URL = (

src/backend/core/tests/test_models_documents.py

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
import pytest
2222

2323
from core import factories, models
24-
from core.services.search_indexers import FindDocumentIndexer
24+
from core.services.search_indexers import SearchIndexer
2525

2626
pytestmark = pytest.mark.django_db
2727

@@ -1450,7 +1450,7 @@ def test_models_documents_compute_ancestors_links_paths_mapping_structure(
14501450
}
14511451

14521452

1453-
@mock.patch.object(FindDocumentIndexer, "push")
1453+
@mock.patch.object(SearchIndexer, "push")
14541454
@pytest.mark.django_db(transaction=True)
14551455
def test_models_documents_post_save_indexer(mock_push, indexer_settings):
14561456
"""Test indexation task on document creation"""
@@ -1462,7 +1462,7 @@ def test_models_documents_post_save_indexer(mock_push, indexer_settings):
14621462
accesses = {}
14631463
data = [call.args[0] for call in mock_push.call_args_list]
14641464

1465-
indexer = FindDocumentIndexer()
1465+
indexer = SearchIndexer()
14661466

14671467
assert sorted(data, key=itemgetter("id")) == sorted(
14681468
[
@@ -1479,7 +1479,7 @@ def test_models_documents_post_save_indexer(mock_push, indexer_settings):
14791479
assert cache.get(f"doc-indexer-debounce-{doc3.pk}") == 0
14801480

14811481

1482-
@mock.patch.object(FindDocumentIndexer, "push")
1482+
@mock.patch.object(SearchIndexer, "push")
14831483
@pytest.mark.django_db(transaction=True)
14841484
def test_models_documents_post_save_indexer_not_configured(mock_push, indexer_settings):
14851485
"""Task should not start an indexation when disabled"""
@@ -1492,7 +1492,7 @@ def test_models_documents_post_save_indexer_not_configured(mock_push, indexer_se
14921492
assert mock_push.call_args_list == []
14931493

14941494

1495-
@mock.patch.object(FindDocumentIndexer, "push")
1495+
@mock.patch.object(SearchIndexer, "push")
14961496
@pytest.mark.django_db(transaction=True)
14971497
def test_models_documents_post_save_indexer_with_accesses(mock_push, indexer_settings):
14981498
"""Test indexation task on document creation"""
@@ -1515,7 +1515,7 @@ def test_models_documents_post_save_indexer_with_accesses(mock_push, indexer_set
15151515

15161516
data = [call.args[0] for call in mock_push.call_args_list]
15171517

1518-
indexer = FindDocumentIndexer()
1518+
indexer = SearchIndexer()
15191519

15201520
assert sorted(data, key=itemgetter("id")) == sorted(
15211521
[
@@ -1532,7 +1532,7 @@ def test_models_documents_post_save_indexer_with_accesses(mock_push, indexer_set
15321532
assert cache.get(f"doc-indexer-debounce-{doc3.pk}") == 0
15331533

15341534

1535-
@mock.patch.object(FindDocumentIndexer, "push")
1535+
@mock.patch.object(SearchIndexer, "push")
15361536
@pytest.mark.django_db(transaction=True)
15371537
def test_models_documents_post_save_indexer_deleted(mock_push, indexer_settings):
15381538
"""Indexation task on deleted or ancestor_deleted documents"""
@@ -1575,7 +1575,7 @@ def test_models_documents_post_save_indexer_deleted(mock_push, indexer_settings)
15751575

15761576
data = [call.args[0] for call in mock_push.call_args_list]
15771577

1578-
indexer = FindDocumentIndexer()
1578+
indexer = SearchIndexer()
15791579

15801580
# Even deleted document are re-indexed : only update their status in the future ?
15811581
assert sorted(data, key=itemgetter("id")) == sorted(
@@ -1594,7 +1594,7 @@ def test_models_documents_post_save_indexer_deleted(mock_push, indexer_settings)
15941594
assert cache.get(f"doc-indexer-debounce-{doc_ancestor_deleted.pk}") == 0
15951595

15961596

1597-
@mock.patch.object(FindDocumentIndexer, "push")
1597+
@mock.patch.object(SearchIndexer, "push")
15981598
@pytest.mark.django_db(transaction=True)
15991599
def test_models_documents_post_save_indexer_restored(mock_push, indexer_settings):
16001600
"""Restart indexation task on restored documents"""
@@ -1648,7 +1648,7 @@ def test_models_documents_post_save_indexer_restored(mock_push, indexer_settings
16481648

16491649
data = [call.args[0] for call in mock_push.call_args_list]
16501650

1651-
indexer = FindDocumentIndexer()
1651+
indexer = SearchIndexer()
16521652

16531653
# All docs are re-indexed
16541654
assert sorted(data, key=itemgetter("id")) == sorted(
@@ -1668,10 +1668,10 @@ def test_models_documents_post_save_indexer_debounce(indexer_settings):
16681668
"""Test indexation task skipping on document update"""
16691669
indexer_settings.SEARCH_INDEXER_COUNTDOWN = 0
16701670

1671-
indexer = FindDocumentIndexer()
1671+
indexer = SearchIndexer()
16721672
user = factories.UserFactory()
16731673

1674-
with mock.patch.object(FindDocumentIndexer, "push"):
1674+
with mock.patch.object(SearchIndexer, "push"):
16751675
with transaction.atomic():
16761676
doc = factories.DocumentFactory()
16771677
factories.UserDocumentAccessFactory(document=doc, user=user)
@@ -1680,7 +1680,7 @@ def test_models_documents_post_save_indexer_debounce(indexer_settings):
16801680
str(doc.path): {"users": [user.sub]},
16811681
}
16821682

1683-
with mock.patch.object(FindDocumentIndexer, "push") as mock_push:
1683+
with mock.patch.object(SearchIndexer, "push") as mock_push:
16841684
# Simulate 1 waiting task
16851685
cache.set(f"doc-indexer-debounce-{doc.pk}", 1)
16861686

@@ -1691,7 +1691,7 @@ def test_models_documents_post_save_indexer_debounce(indexer_settings):
16911691

16921692
assert [call.args[0] for call in mock_push.call_args_list] == []
16931693

1694-
with mock.patch.object(FindDocumentIndexer, "push") as mock_push:
1694+
with mock.patch.object(SearchIndexer, "push") as mock_push:
16951695
# No waiting task
16961696
cache.set(f"doc-indexer-debounce-{doc.pk}", 0)
16971697

@@ -1709,10 +1709,10 @@ def test_models_documents_access_post_save_indexer(indexer_settings):
17091709
"""Test indexation task on DocumentAccess update"""
17101710
indexer_settings.SEARCH_INDEXER_COUNTDOWN = 0
17111711

1712-
indexer = FindDocumentIndexer()
1712+
indexer = SearchIndexer()
17131713
user = factories.UserFactory()
17141714

1715-
with mock.patch.object(FindDocumentIndexer, "push"):
1715+
with mock.patch.object(SearchIndexer, "push"):
17161716
with transaction.atomic():
17171717
doc = factories.DocumentFactory()
17181718
doc_access = factories.UserDocumentAccessFactory(document=doc, user=user)
@@ -1721,9 +1721,9 @@ def test_models_documents_access_post_save_indexer(indexer_settings):
17211721
str(doc.path): {"users": [user.sub]},
17221722
}
17231723

1724-
indexer = FindDocumentIndexer()
1724+
indexer = SearchIndexer()
17251725

1726-
with mock.patch.object(FindDocumentIndexer, "push") as mock_push:
1726+
with mock.patch.object(SearchIndexer, "push") as mock_push:
17271727
with transaction.atomic():
17281728
doc_access.save()
17291729

0 commit comments

Comments
 (0)