Skip to content

Commit 669d4bc

Browse files
author
Daan van der Kallen
authored
Merge pull request #204 from CodeYellowBV/annotation-optimizations
Optimize annotations
2 parents d2d9b50 + 0f900e0 commit 669d4bc

File tree

2 files changed

+202
-33
lines changed

2 files changed

+202
-33
lines changed

binder/plugins/views/file_hash_view.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ def _get_params(self, obj, field):
5555
else:
5656
return '?' + '&'.join(params)
5757

58-
def _get_objs(self, queryset, request=None, annotations=None):
58+
def _get_objs(self, queryset, *args, **kwargs):
5959
params = {
6060
obj.pk: {
6161
field: self._get_params(obj, field)
@@ -66,7 +66,7 @@ def _get_objs(self, queryset, request=None, annotations=None):
6666
for obj in queryset
6767
}
6868

69-
data = super()._get_objs(queryset, request, annotations)
69+
data = super()._get_objs(queryset, *args, **kwargs)
7070

7171
for obj in data:
7272
obj.update({

binder/views.py

Lines changed: 200 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,50 @@
3535
from .json import JsonResponse, jsonloads
3636

3737

38+
def get_joins_from_queryset(queryset):
39+
"""
40+
Given a queryset returns a set of lines that are used to determine which
41+
tables will be joined and how. In essence this is the FROM-statement and
42+
every JOIN-statement in a set as a string.
43+
44+
This is useful to compare the joins between querysets.
45+
"""
46+
# So to generate sql we need the compiler and connection for the right db
47+
compiler = queryset.query.get_compiler(queryset.db)
48+
connection = connections[queryset.db]
49+
# Now we will just go through all tables in the alias_map
50+
lines = set()
51+
for alias in queryset.query.alias_map.values():
52+
line, params = alias.as_sql(compiler, connection)
53+
# We assert we have no params for now, you need to do custom stuff for
54+
# these to appear in joins and substituting params into the sql
55+
# is not something we can easily do safely for now, just passing them
56+
# along with the str will make use potentially have unhashable lines
57+
# which will ruin the set.
58+
assert not params
59+
lines.add(line)
60+
return lines
61+
62+
63+
def q_get_flat_filters(q):
64+
"""
65+
Given a Q-object returns an iterator of all filters used in this Q-object.
66+
67+
So for example for Q(foo=1, bar=2) this would yield 'foo' and 'bar', but it
68+
will also work for more complicated nested Q-objects.
69+
70+
This is useful to detect which fields are used in a Q-object.
71+
"""
72+
for child in q.children:
73+
if isinstance(child, Q):
74+
# If the child is another Q-object we can just yield recursively
75+
yield from q_get_flat_filters(child)
76+
else:
77+
# So now the child is a 2-tuple of filter & value, we just need the
78+
# filter so we yield that
79+
yield child[0]
80+
81+
3882
def split_par_aware(content):
3983
start = 0
4084
depth = 0
@@ -533,7 +577,7 @@ def _get_reverse_relations(self):
533577
# Kinda like model_to_dict() for multiple objects.
534578
# Return a list of dictionaries, one per object in the queryset.
535579
# Includes a list of ids for all m2m fields (including reverse relations).
536-
def _get_objs(self, queryset, request, annotations=None):
580+
def _get_objs(self, queryset, request, annotations=None, to_annotate={}):
537581
datas = []
538582
datas_by_id = {} # Save datas so we can annotate m2m fields later (avoiding a query)
539583
objs_by_id = {} # Same for original objects
@@ -551,6 +595,52 @@ def _get_objs(self, queryset, request, annotations=None):
551595
else:
552596
annotations &= set(self.shown_annotations)
553597

598+
# So now annotations are only being used for showing, so we filter out
599+
# all that do not have to be shown
600+
to_annotate = {
601+
key: value
602+
for key, value in to_annotate.items()
603+
if key in annotations
604+
}
605+
606+
# So now we will divide annotations based on the joins they do
607+
base_joins = get_joins_from_queryset(queryset)
608+
annotation_sets = []
609+
610+
for name, expr in list(to_annotate.items()):
611+
annotation_joins = get_joins_from_queryset(
612+
self.model.objects.annotate(**{name: expr})
613+
)
614+
annotation_annotations = {name: expr}
615+
# First check if the queryset already does all joins, in that case
616+
# we can just add it to the main queryset without any performance
617+
# hits
618+
if annotation_joins <= base_joins:
619+
queryset = queryset.annotate(**annotation_annotations)
620+
to_annotate.pop(name)
621+
continue
622+
# Then try to merge it into the annotation sets
623+
i = 0
624+
while i < len(annotation_sets):
625+
set_joins, set_annotations = annotation_sets[i]
626+
# If our joins are a subset of the annotation set we just add
627+
# our annotation to the set and break
628+
if annotation_joins <= set_joins:
629+
set_annotations.update(annotation_annotations)
630+
break
631+
# If our joins are a superset of the annotation set we take its
632+
# annotations and add it to ours
633+
elif set_joins <= annotation_joins:
634+
annotation_annotations.update(set_annotations)
635+
annotation_sets.pop(i)
636+
# Go on to the next
637+
else:
638+
i += 1
639+
# If no annotation set existed that matched our joins we create a
640+
# new one
641+
else:
642+
annotation_sets.append((annotation_joins, annotation_annotations))
643+
554644
for obj in queryset:
555645
# So we tend to make binder call queryset.distinct when necessary
556646
# to prevent duplicate results, this is however not always possible
@@ -580,7 +670,8 @@ def _get_objs(self, queryset, request, annotations=None):
580670
data[f.name] = getattr(obj, f.attname)
581671

582672
for a in annotations:
583-
data[a] = getattr(obj, a)
673+
if a not in to_annotate:
674+
data[a] = getattr(obj, a)
584675

585676
for prop in self.shown_properties:
586677
data[prop] = getattr(obj, prop)
@@ -592,6 +683,18 @@ def _get_objs(self, queryset, request, annotations=None):
592683
datas_by_id[obj.pk] = data
593684
objs_by_id[obj.pk] = obj
594685

686+
for _, set_annotations in annotation_sets:
687+
for set_values in (
688+
self.model.objects
689+
.filter(pk__in=datas_by_id)
690+
.annotate(**set_annotations)
691+
.values('pk', *set_annotations)
692+
):
693+
pk_ = set_values.pop('pk')
694+
for name, value in set_values.items():
695+
datas_by_id[pk_][name] = value
696+
setattr(objs_by_id[pk_], name, value)
697+
595698
self._annotate_objs(datas_by_id, objs_by_id)
596699

597700
return datas
@@ -631,10 +734,15 @@ def _annotate_objs(self, datas_by_id, objs_by_id):
631734
def _get_obj(self, pk, request, include_annotations=None):
632735
if include_annotations is None:
633736
include_annotations = self._parse_include_annotations(request)
737+
annotations = include_annotations.get('')
634738
results = self._get_objs(
635-
annotate(self.get_queryset(request).filter(pk=pk), request, include_annotations.get('')),
739+
self.get_queryset(request).filter(pk=pk),
636740
request=request,
637-
annotations=include_annotations.get(''),
741+
annotations=annotations,
742+
to_annotate={
743+
name: value['expr']
744+
for name, value in get_annotations(self.model, request, annotations).items()
745+
},
638746
)
639747
if results:
640748
return results[0]
@@ -806,9 +914,13 @@ def withs_to_nested_set(withs, result={}):
806914
view.router = self.router
807915
for annotations, with_pks in annotation_ids.items():
808916
objs = view._get_objs(
809-
annotate(view.get_queryset(request).filter(pk__in=with_pks), request, annotations),
917+
view.get_queryset(request).filter(pk__in=with_pks),
810918
request=request,
811919
annotations=annotations,
920+
to_annotate={
921+
name: value['expr']
922+
for name, value in get_annotations(view.model, request, annotations).items()
923+
},
812924
)
813925
for obj in objs:
814926
view._annotate_obj_with_related_withs(obj, withs_per_model[model_name])
@@ -1161,10 +1273,9 @@ def _parse_order_by(self, queryset, field, request, partial=''):
11611273
return (queryset, partial + head, nulls_last)
11621274

11631275

1164-
1165-
def search(self, queryset, search, request):
1276+
def _search_base(self, search, request):
11661277
if not search:
1167-
return queryset
1278+
return ~Q(pk__in=[])
11681279

11691280
if not (self.searches or self.transformed_searches):
11701281
raise BinderRequestError('No search fields defined for this view.')
@@ -1178,7 +1289,12 @@ def search(self, queryset, search, request):
11781289
q |= Q(**{s: transform(search)})
11791290
except ValueError:
11801291
pass
1181-
return queryset.filter(q)
1292+
1293+
return q
1294+
1295+
1296+
def search(self, queryset, search, request):
1297+
return queryset.filter(self._search_base(search, request))
11821298

11831299

11841300
def filter_deleted(self, queryset, pk, deleted, request):
@@ -1239,13 +1355,23 @@ def get_queryset(self, request):
12391355

12401356

12411357

1242-
def order_by(self, queryset, request):
1358+
def _order_by_base(self, queryset, request, annotations):
12431359
#### order_by
12441360
order_bys = list(filter(None, request.GET.get('order_by', '').split(',')))
12451361

12461362
orders = []
12471363
if order_bys:
12481364
for o in order_bys:
1365+
# We split of a leading - (descending sorting) and the
1366+
# suffixes nulls_last and nulls_first
1367+
head = re.match(r'^-?(.*?)(__nulls_last|__nulls_first)?$', o).group(1)
1368+
try:
1369+
expr = annotations.pop(head)
1370+
except KeyError:
1371+
pass
1372+
else:
1373+
queryset = queryset.annotate(**{head: expr})
1374+
12491375
if o.startswith('-'):
12501376
queryset, order, nulls_last = self._parse_order_by(queryset, o[1:], request, partial='-')
12511377
else:
@@ -1281,6 +1407,10 @@ def order_by(self, queryset, request):
12811407
return queryset
12821408

12831409

1410+
def order_by(self, queryset, request):
1411+
return self._order_by_base(queryset, request, {})
1412+
1413+
12841414
def _annotate_obj_with_related_withs(self, obj, field_results):
12851415
for (w, (view, ids_dict, is_singular)) in field_results.items():
12861416
if '.' not in w:
@@ -1300,16 +1430,24 @@ def _generate_meta(self, include_meta, queryset, request, pk=None):
13001430
# Only 'pk' values should reduce DB server memory a (little?) bit, making
13011431
# things faster. Not prefetching related models here makes it faster still.
13021432
# See also https://code.djangoproject.com/ticket/23771 and related tickets.
1303-
meta['total_records'] = queryset.prefetch_related(None).values('pk').count()
1433+
meta['total_records'] = queryset.order_by().prefetch_related(None).values('pk').count()
13041434

13051435
return meta
13061436

1437+
def _apply_q_with_possible_annotations(self, queryset, q, annotations):
1438+
for filter in q_get_flat_filters(q):
1439+
head = filter.split('__', 1)[0]
1440+
try:
1441+
expr = annotations.pop(head)
1442+
except KeyError:
1443+
pass
1444+
else:
1445+
queryset = queryset.annotate(**{head: expr})
13071446

1308-
def get_filtered_queryset(self, request, pk=None, include_annotations=None):
1309-
"""
1310-
Returns a scoped queryset with filtering and sorting applied as
1311-
specified by the request.
1312-
"""
1447+
return queryset.filter(q)
1448+
1449+
1450+
def _get_filtered_queryset_base(self, request, pk=None, include_annotations=None):
13131451
queryset = self.get_queryset(request)
13141452
if pk:
13151453
queryset = queryset.filter(pk=int(pk))
@@ -1326,43 +1464,64 @@ def get_filtered_queryset(self, request, pk=None, include_annotations=None):
13261464
if include_annotations is None:
13271465
include_annotations = self._parse_include_annotations(request)
13281466

1329-
queryset = annotate(queryset, request, include_annotations.get(''))
1467+
annotations = {
1468+
name: value['expr']
1469+
for name, value in get_annotations(queryset.model, request, include_annotations.get('')).items()
1470+
}
13301471

13311472
#### filters
13321473
filters = {k.lstrip('.'): v for k, v in request.GET.lists() if k.startswith('.')}
13331474
for field, values in filters.items():
1475+
13341476
for v in values:
13351477
q, distinct = self._parse_filter(field, v, request, include_annotations)
1336-
queryset = queryset.filter(q)
1478+
queryset = self._apply_q_with_possible_annotations(queryset, q, annotations)
13371479
if distinct:
13381480
queryset = queryset.distinct()
13391481

13401482
#### search
13411483
if 'search' in request.GET:
1342-
queryset = self.search(queryset, request.GET['search'], request)
1484+
q = self._search_base(request.GET['search'], request)
1485+
queryset = self._apply_q_with_possible_annotations(queryset, q, annotations)
13431486

1344-
queryset = self.order_by(queryset, request)
1487+
return queryset, annotations
13451488

1489+
def get_filtered_queryset(self, request, *args, **kwargs):
1490+
"""
1491+
Returns a scoped queryset with filtering and sorting applied as
1492+
specified by the request.
1493+
"""
1494+
queryset, annotations = self._get_filtered_queryset_base(request, *args, **kwargs)
1495+
queryset = queryset.annotate(**annotations)
1496+
queryset = self.order_by(queryset, request)
13461497
return queryset
13471498

1348-
13491499
def get(self, request, pk=None, withs=None, include_annotations=None):
13501500
include_meta = request.GET.get('include_meta', 'total_records').split(',')
13511501
if include_annotations is None:
13521502
include_annotations = self._parse_include_annotations(request)
13531503

1354-
queryset = self.get_filtered_queryset(request, pk, include_annotations)
1504+
queryset, annotations = self._get_filtered_queryset_base(request, pk, include_annotations)
13551505

13561506
meta = self._generate_meta(include_meta, queryset, request, pk)
13571507

1508+
queryset = self._order_by_base(queryset, request, annotations)
13581509
queryset = self._paginate(queryset, request)
13591510

1360-
#### with
1361-
# parse wheres from request
1362-
data = self._get_objs(queryset, request=request, annotations=include_annotations.get(''))
1511+
# We fetch the data with only the currently applied annotations
1512+
data = self._get_objs(
1513+
queryset,
1514+
request=request,
1515+
annotations=include_annotations.get(''),
1516+
to_annotate=annotations,
1517+
)
13631518

1364-
pks = [obj['id'] for obj in data]
1519+
# Now we add all remaining annotations to this data
1520+
data_by_pk = {obj['id']: obj for obj in data}
1521+
pks = set(data_by_pk)
13651522

1523+
#### with
1524+
# parse wheres from request
13661525
extras, extras_mapping, extras_reverse_mapping, field_results = self._get_withs(pks, withs, request=request, include_annotations=include_annotations)
13671526

13681527
for obj in data:
@@ -1505,10 +1664,15 @@ def store_m2m_field(obj, field, value, request):
15051664

15061665
# Permission checks are done at this point, so we can avoid get_queryset()
15071666
include_annotations = self._parse_include_annotations(request)
1667+
annotations = include_annotations.get('')
15081668
data = self._get_objs(
1509-
annotate(self.model.objects.filter(pk=obj.pk), request, include_annotations.get('')),
1669+
self.model.objects.filter(pk=obj.pk),
15101670
request=request,
1511-
annotations=include_annotations.get(''),
1671+
annotations=annotations,
1672+
to_annotate={
1673+
name: value['expr']
1674+
for name, value in get_annotations(self.model, request, annotations).items()
1675+
},
15121676
)[0]
15131677
data['_meta'] = {'ignored_fields': ignored_fields}
15141678
return data
@@ -2374,10 +2538,15 @@ def put(self, request, pk=None):
23742538
obj = self.get_queryset(request).select_for_update().get(pk=int(pk))
23752539
# Permission checks are done at this point, so we can avoid get_queryset()
23762540
include_annotations = self._parse_include_annotations(request)
2541+
annotations = include_annotations.get('')
23772542
old = self._get_objs(
2378-
annotate(self.model.objects.filter(pk=int(pk)), request, include_annotations.get('')),
2379-
request,
2380-
include_annotations.get(''),
2543+
self.model.objects.filter(pk=int(pk)),
2544+
request=request,
2545+
annotations=annotations,
2546+
to_annotate={
2547+
name: value['expr']
2548+
for name, value in get_annotations(self.model, request, annotations).items()
2549+
},
23812550
)[0]
23822551
except ObjectDoesNotExist:
23832552
raise BinderNotFound()

0 commit comments

Comments
 (0)