From e3b1d2b01ebbac1e8011cab13769f97f919a6987 Mon Sep 17 00:00:00 2001 From: Mohamed Beltagy Date: Tue, 9 Jul 2024 06:17:46 +0200 Subject: [PATCH 01/19] "Refactor SQL query time calculation and reorder imports - Ordered Django model imports alphabetically for better readability. - Refactored `time_spent_on_sql_queries` property in the `Request` model: - Replaced manual summation of `time_taken` with a Django aggregate query. - Used `ExpressionWrapper` to calculate the total time spent on SQL queries by subtracting `start_time` from `end_time`. - Ensured `None` values are handled gracefully using the `Coalesce` function. - Updated relevant test case to reflect the changes. This improves the efficiency and readability of the code." --- project/tests/test_models.py | 7 +++---- silk/models.py | 29 +++++++++++++++++++---------- 2 files changed, 22 insertions(+), 14 deletions(-) diff --git a/project/tests/test_models.py b/project/tests/test_models.py index efdb0d0a..7e28d68e 100644 --- a/project/tests/test_models.py +++ b/project/tests/test_models.py @@ -69,16 +69,15 @@ def test_time_spent_on_sql_queries_if_has_no_related_SQLQueries(self): self.assertEqual(self.obj.time_spent_on_sql_queries, 0) - # FIXME probably a bug def test_time_spent_on_sql_queries_if_has_related_SQLQueries_with_no_time_taken(self): query = SQLQueryFactory() self.obj.queries.add(query) self.assertEqual(query.time_taken, None) - - with self.assertRaises(TypeError): - self.obj.time_spent_on_sql_queries + + # No exception should be raised, and the result should be 0.0 + self.assertEqual(self.obj.time_spent_on_sql_queries, 0.0) def test_time_spent_on_sql_queries_if_has_related_SQLQueries_and_time_taken(self): diff --git a/silk/models.py b/silk/models.py index 075c5e9a..8b356fbb 100644 --- a/silk/models.py +++ b/silk/models.py @@ -11,17 +11,22 @@ BooleanField, CharField, DateTimeField, + ExpressionWrapper, + F, FileField, FloatField, ForeignKey, IntegerField, ManyToManyField, OneToOneField, - TextField, + Sum, + TextField ) + from django.utils import timezone from django.utils.safestring import mark_safe + from silk.config import SilkyConfig from silk.utils.profile_parser import parse_profile @@ -124,16 +129,20 @@ def profile_table(self): @property def time_spent_on_sql_queries(self): + """" + Calculate the total time spent on SQL queries using Django aggregates. """ - TODO: Perhaps there is a nicer way to do this with Django aggregates? - My initial thought was to perform: - SQLQuery.objects.filter.aggregate(Sum(F('end_time')) - Sum(F('start_time'))) - However this feature isnt available yet, however there has been talk - for use of F objects within aggregates for four years - here: https://code.djangoproject.com/ticket/14030. It looks - like this will go in soon at which point this should be changed. - """ - return sum(x.time_taken for x in SQLQuery.objects.filter(request=self)) + + total_time = SQLQuery.objects.filter(request=self).aggregate( + total_time=Sum( + ExpressionWrapper( + F('end_time') - F('start_time'), + output_field=FloatField() + ) + ) + )['total_time'] + return total_time or 0.0 + @property def headers(self): From 1e01bd9a827d5253911f6355ea95710247cb06df Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 9 Jul 2024 04:22:19 +0000 Subject: [PATCH 02/19] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- project/tests/test_models.py | 2 +- silk/models.py | 5 +---- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/project/tests/test_models.py b/project/tests/test_models.py index 7e28d68e..bf967c44 100644 --- a/project/tests/test_models.py +++ b/project/tests/test_models.py @@ -75,7 +75,7 @@ def test_time_spent_on_sql_queries_if_has_related_SQLQueries_with_no_time_taken( self.obj.queries.add(query) self.assertEqual(query.time_taken, None) - + # No exception should be raised, and the result should be 0.0 self.assertEqual(self.obj.time_spent_on_sql_queries, 0.0) diff --git a/silk/models.py b/silk/models.py index 8b356fbb..3bd27b1e 100644 --- a/silk/models.py +++ b/silk/models.py @@ -20,13 +20,11 @@ ManyToManyField, OneToOneField, Sum, - TextField + TextField, ) - from django.utils import timezone from django.utils.safestring import mark_safe - from silk.config import SilkyConfig from silk.utils.profile_parser import parse_profile @@ -142,7 +140,6 @@ def time_spent_on_sql_queries(self): ) )['total_time'] return total_time or 0.0 - @property def headers(self): From 2ce501aec625467713ac461f7909ad5d77444a2f Mon Sep 17 00:00:00 2001 From: Mohamed Beltagy Date: Tue, 9 Jul 2024 12:02:12 +0200 Subject: [PATCH 03/19] fix : bug use datetime and convert to milliseconds --- silk/models.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/silk/models.py b/silk/models.py index 3bd27b1e..d68bb608 100644 --- a/silk/models.py +++ b/silk/models.py @@ -10,6 +10,7 @@ from django.db.models import ( BooleanField, CharField, + DateField, DateTimeField, ExpressionWrapper, F, @@ -130,16 +131,16 @@ def time_spent_on_sql_queries(self): """" Calculate the total time spent on SQL queries using Django aggregates. """ - - total_time = SQLQuery.objects.filter(request=self).aggregate( + total_time_seconds = SQLQuery.objects.filter(request=self).aggregate( total_time=Sum( ExpressionWrapper( - F('end_time') - F('start_time'), - output_field=FloatField() + (F('end_time') - F('start_time')), + output_field=DateField() ) ) )['total_time'] - return total_time or 0.0 + return total_time_seconds.total_seconds() * 1000 if total_time_seconds is not None else 0.0 + @property def headers(self): From 42587a4bce6fdc0821b8db7f3fc753d29facf1a5 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 9 Jul 2024 10:02:26 +0000 Subject: [PATCH 04/19] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- silk/models.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/silk/models.py b/silk/models.py index d68bb608..1583dd4d 100644 --- a/silk/models.py +++ b/silk/models.py @@ -139,8 +139,7 @@ def time_spent_on_sql_queries(self): ) ) )['total_time'] - return total_time_seconds.total_seconds() * 1000 if total_time_seconds is not None else 0.0 - + return total_time_seconds.total_seconds() * 1000 if total_time_seconds is not None else 0.0 @property def headers(self): From f68af2fd1535c87ba922aa65ff20958b3822c56b Mon Sep 17 00:00:00 2001 From: Mohamed Beltagy Date: Tue, 9 Jul 2024 20:38:24 +0200 Subject: [PATCH 05/19] test: increase verboisity to debug tox --- .github/workflows/test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 3b97b007..dc4d0721 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -89,7 +89,7 @@ jobs: - name: Tox tests run: | - tox -v + tox -vvv env: DJANGO: ${{ matrix.django-version }} From 69cc9a43beef97a5f6a299dd5bc4d350e3f1c592 Mon Sep 17 00:00:00 2001 From: Mohamed Beltagy Date: Tue, 9 Jul 2024 20:40:12 +0200 Subject: [PATCH 06/19] test: only enable one test to debug tox --- pytest.ini | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pytest.ini b/pytest.ini index 861e79eb..4dd2b58a 100644 --- a/pytest.ini +++ b/pytest.ini @@ -1,4 +1,6 @@ [pytest] addopts = --cov silk --cov-config .coveragerc --cov-append --cov-report term --cov-report=xml -python_files = test.py tests.py test_*.py tests_*.py *_tests.py *_test.py +python_files = tests/test_app_config.py +; python_files = test.py tests.py test_*.py tests_*.py *_tests.py *_test.py + DJANGO_SETTINGS_MODULE = project.settings From 0ecaa921954afdf20b6417177b6d1848a54f53ea Mon Sep 17 00:00:00 2001 From: Mohamed Beltagy Date: Tue, 9 Jul 2024 20:42:41 +0200 Subject: [PATCH 07/19] comment: update comment --- silk/models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/silk/models.py b/silk/models.py index 1583dd4d..a6c6d82a 100644 --- a/silk/models.py +++ b/silk/models.py @@ -129,7 +129,7 @@ def profile_table(self): @property def time_spent_on_sql_queries(self): """" - Calculate the total time spent on SQL queries using Django aggregates. + Calculate the total time spent in milli seconds on SQL queries using Django aggregates. """ total_time_seconds = SQLQuery.objects.filter(request=self).aggregate( total_time=Sum( From a33dda79dd7185468ff7d8c608b36b708b4e294e Mon Sep 17 00:00:00 2001 From: Mohamed Beltagy Date: Tue, 9 Jul 2024 20:43:38 +0200 Subject: [PATCH 08/19] test: trigger tox --- silk/models.py | 1 + 1 file changed, 1 insertion(+) diff --git a/silk/models.py b/silk/models.py index a6c6d82a..14ae3283 100644 --- a/silk/models.py +++ b/silk/models.py @@ -139,6 +139,7 @@ def time_spent_on_sql_queries(self): ) ) )['total_time'] + print (' ') return total_time_seconds.total_seconds() * 1000 if total_time_seconds is not None else 0.0 @property From 691df216278070dc2e0c2f9cebb747134ee644e4 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 9 Jul 2024 18:43:54 +0000 Subject: [PATCH 09/19] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- silk/models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/silk/models.py b/silk/models.py index 14ae3283..64546fa4 100644 --- a/silk/models.py +++ b/silk/models.py @@ -139,7 +139,7 @@ def time_spent_on_sql_queries(self): ) ) )['total_time'] - print (' ') + print(' ') return total_time_seconds.total_seconds() * 1000 if total_time_seconds is not None else 0.0 @property From b7fdfcf3d224d996ca512f917df0a99a77fe7be1 Mon Sep 17 00:00:00 2001 From: Mohamed Beltagy Date: Tue, 9 Jul 2024 20:57:36 +0200 Subject: [PATCH 10/19] run all tests for tox --- pytest.ini | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/pytest.ini b/pytest.ini index 4dd2b58a..efc78737 100644 --- a/pytest.ini +++ b/pytest.ini @@ -1,6 +1,4 @@ [pytest] addopts = --cov silk --cov-config .coveragerc --cov-append --cov-report term --cov-report=xml -python_files = tests/test_app_config.py -; python_files = test.py tests.py test_*.py tests_*.py *_tests.py *_test.py - -DJANGO_SETTINGS_MODULE = project.settings +python_files = test.py tests.py test_*.py tests_*.py *_tests.py *_test.py +DJANGO_SETTINGS_MODULE = project.settings \ No newline at end of file From d1234ddeb455e34058e8deeb10e954f336e8f46e Mon Sep 17 00:00:00 2001 From: Mohamed Beltagy Date: Tue, 9 Jul 2024 20:58:19 +0200 Subject: [PATCH 11/19] trigger tests --- silk/models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/silk/models.py b/silk/models.py index 14ae3283..06758259 100644 --- a/silk/models.py +++ b/silk/models.py @@ -139,7 +139,7 @@ def time_spent_on_sql_queries(self): ) ) )['total_time'] - print (' ') + print ('_') return total_time_seconds.total_seconds() * 1000 if total_time_seconds is not None else 0.0 @property From 4f41ca64d0bb890e2895a1e4e5bd84d16a27ff1e Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 9 Jul 2024 19:01:29 +0000 Subject: [PATCH 12/19] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- pytest.ini | 2 +- silk/models.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pytest.ini b/pytest.ini index efc78737..861e79eb 100644 --- a/pytest.ini +++ b/pytest.ini @@ -1,4 +1,4 @@ [pytest] addopts = --cov silk --cov-config .coveragerc --cov-append --cov-report term --cov-report=xml python_files = test.py tests.py test_*.py tests_*.py *_tests.py *_test.py -DJANGO_SETTINGS_MODULE = project.settings \ No newline at end of file +DJANGO_SETTINGS_MODULE = project.settings diff --git a/silk/models.py b/silk/models.py index f0390f71..dea35742 100644 --- a/silk/models.py +++ b/silk/models.py @@ -139,7 +139,7 @@ def time_spent_on_sql_queries(self): ) ) )['total_time'] - print ('_') + print('_') return total_time_seconds.total_seconds() * 1000 if total_time_seconds is not None else 0.0 @property From 93b26c2160da93e3479f41e3af06a980b0e5ab8b Mon Sep 17 00:00:00 2001 From: Mohamed Beltagy Date: Tue, 9 Jul 2024 21:19:18 +0200 Subject: [PATCH 13/19] revert changes to models.py --- silk/models.py | 772 ++++++++++++++++++++++++------------------------- 1 file changed, 386 insertions(+), 386 deletions(-) diff --git a/silk/models.py b/silk/models.py index f0390f71..a45524bf 100644 --- a/silk/models.py +++ b/silk/models.py @@ -1,386 +1,386 @@ -import base64 -import json -import random -import re -from uuid import uuid4 - -import sqlparse -from django.conf import settings -from django.db import models, transaction -from django.db.models import ( - BooleanField, - CharField, - DateField, - DateTimeField, - ExpressionWrapper, - F, - FileField, - FloatField, - ForeignKey, - IntegerField, - ManyToManyField, - OneToOneField, - Sum, - TextField, -) -from django.utils import timezone -from django.utils.safestring import mark_safe - -from silk.config import SilkyConfig -from silk.utils.profile_parser import parse_profile - -try: - # New in Django 4.2 - from django.core.files.storage import storages - from django.core.files.storage.handler import InvalidStorageError - try: - silk_storage = storages['SILKY_STORAGE'] - except InvalidStorageError: - from django.utils.module_loading import import_string - storage_class = SilkyConfig().SILKY_STORAGE_CLASS or settings.DEFAULT_FILE_STORAGE - silk_storage = import_string(storage_class)() -except ImportError: - # Deprecated since Django 4.2, Removed in Django 5.1 - from django.core.files.storage import get_storage_class - silk_storage = get_storage_class(SilkyConfig().SILKY_STORAGE_CLASS)() - - -# Seperated out so can use in tests w/o models -def _time_taken(start_time, end_time): - d = end_time - start_time - return d.seconds * 1000 + d.microseconds / 1000 - - -def time_taken(self): - return _time_taken(self.start_time, self.end_time) - - -class CaseInsensitiveDictionary(dict): - def __getitem__(self, key): - return super().__getitem__(key.lower()) - - def __setitem__(self, key, value): - super().__setitem__(key.lower(), value) - - def update(self, other=None, **kwargs): - for k, v in other.items(): - self[k] = v - for k, v in kwargs.items(): - self[k] = v - - def __init__(self, d): - super().__init__() - for k, v in d.items(): - self[k] = v - - -class Request(models.Model): - id = CharField(max_length=36, default=uuid4, primary_key=True) - path = CharField(max_length=190, db_index=True) - query_params = TextField(blank=True, default='') - raw_body = TextField(blank=True, default='') - body = TextField(blank=True, default='') - method = CharField(max_length=10) - start_time = DateTimeField(default=timezone.now, db_index=True) - view_name = CharField( - max_length=190, db_index=True, blank=True, - default='', null=True - ) - end_time = DateTimeField(null=True, blank=True) - time_taken = FloatField(blank=True, null=True) - encoded_headers = TextField(blank=True, default='') # stores json - meta_time = FloatField(null=True, blank=True) - meta_num_queries = IntegerField(null=True, blank=True) - meta_time_spent_queries = FloatField(null=True, blank=True) - pyprofile = TextField(blank=True, default='') - prof_file = FileField(max_length=300, blank=True, storage=silk_storage) - - # Useful method to create shortened copies of strings without losing start and end context - # Used to ensure path and view_name don't exceed 190 characters - def _shorten(self, string): - return f'{string[:94]}...{string[len(string) - 93:]}' - - @property - def total_meta_time(self): - return (self.meta_time or 0) + (self.meta_time_spent_queries or 0) - - @property - def profile_table(self): - for n, columns in enumerate(parse_profile(self.pyprofile)): - location = columns[-1] - if n and '{' not in location and '<' not in location: - r = re.compile(r'(?P.*\.py)\:(?P[0-9]+).*') - m = r.search(location) - group = m.groupdict() - src = group['src'] - num = group['num'] - name = 'c%d' % n - fmt = '{location}' - rep = fmt.format(**dict(group, **locals())) - yield columns[:-1] + [mark_safe(rep)] - else: - yield columns - - # defined in atomic transaction within SQLQuery save()/delete() as well - # as in bulk_create of SQLQueryManager - # TODO: This is probably a bad way to do this, .count() will prob do? - num_sql_queries = IntegerField(default=0) # TODO replace with count() - - @property - def time_spent_on_sql_queries(self): - """" - Calculate the total time spent in milli seconds on SQL queries using Django aggregates. - """ - total_time_seconds = SQLQuery.objects.filter(request=self).aggregate( - total_time=Sum( - ExpressionWrapper( - (F('end_time') - F('start_time')), - output_field=DateField() - ) - ) - )['total_time'] - print ('_') - return total_time_seconds.total_seconds() * 1000 if total_time_seconds is not None else 0.0 - - @property - def headers(self): - if self.encoded_headers: - raw = json.loads(self.encoded_headers) - else: - raw = {} - - return CaseInsensitiveDictionary(raw) - - @property - def content_type(self): - return self.headers.get('content-type', None) - - @classmethod - def garbage_collect(cls, force=False): - """ Remove Request/Responses when we are at the SILKY_MAX_RECORDED_REQUESTS limit - Note that multiple in-flight requests may call this at once causing a - double collection """ - check_percent = SilkyConfig().SILKY_MAX_RECORDED_REQUESTS_CHECK_PERCENT - check_percent /= 100.0 - if check_percent < random.random() and not force: - return - target_count = SilkyConfig().SILKY_MAX_RECORDED_REQUESTS - - # Since garbage collection is probabilistic, the target count should - # be lowered to account for requests before the next garbage collection - if check_percent != 0: - target_count -= int(1 / check_percent) - - # Make sure we can delete everything if needed by settings - if target_count <= 0: - cls.objects.all().delete() - return - - try: - time_cutoff = cls.objects.order_by( - '-start_time' - ).values_list( - 'start_time', - flat=True - )[target_count] - except IndexError: - return - - cls.objects.filter(start_time__lte=time_cutoff).delete() - - def save(self, *args, **kwargs): - # sometimes django requests return the body as 'None' - if self.raw_body is None: - self.raw_body = '' - - if self.body is None: - self.body = '' - - if self.end_time and self.start_time: - interval = self.end_time - self.start_time - self.time_taken = interval.total_seconds() * 1000 - - # We can't save if either path or view_name exceed 190 characters - if self.path and len(self.path) > 190: - self.path = self._shorten(self.path) - - if self.view_name and len(self.view_name) > 190: - self.view_name = self._shorten(self.view_name) - - super().save(*args, **kwargs) - Request.garbage_collect(force=False) - - -class Response(models.Model): - id = CharField(max_length=36, default=uuid4, primary_key=True) - request = OneToOneField( - Request, related_name='response', db_index=True, - on_delete=models.CASCADE, - ) - status_code = IntegerField() - raw_body = TextField(blank=True, default='') - body = TextField(blank=True, default='') - encoded_headers = TextField(blank=True, default='') - - @property - def content_type(self): - return self.headers.get('content-type', None) - - @property - def headers(self): - if self.encoded_headers: - raw = json.loads(self.encoded_headers) - else: - raw = {} - return CaseInsensitiveDictionary(raw) - - @property - def raw_body_decoded(self): - return base64.b64decode(self.raw_body) - - -# TODO rewrite docstring -class SQLQueryManager(models.Manager): - @transaction.atomic - def bulk_create(self, *args, **kwargs): - """ensure that num_sql_queries remains consistent. Bulk create does not call - the model save() method and hence we must add this logic here too""" - if len(args): - objs = args[0] - else: - objs = kwargs.get('objs') - for obj in objs: - obj.prepare_save() - - return super().bulk_create(*args, **kwargs) - - -class SQLQuery(models.Model): - query = TextField() - start_time = DateTimeField(null=True, blank=True, default=timezone.now) - end_time = DateTimeField(null=True, blank=True) - time_taken = FloatField(blank=True, null=True) - identifier = IntegerField(default=-1) - request = ForeignKey( - Request, related_name='queries', null=True, - blank=True, db_index=True, on_delete=models.CASCADE, - ) - traceback = TextField() - analysis = TextField(null=True, blank=True) - objects = SQLQueryManager() - - # TODO docstring - @property - def traceback_ln_only(self): - return '\n'.join(self.traceback.split('\n')[::2]) - - @property - def formatted_query(self): - return sqlparse.format(self.query, reindent=True, keyword_case='upper') - - @property - def num_joins(self): - parsed_query = sqlparse.parse(self.query) - count = 0 - for statement in parsed_query: - count += sum(map(lambda t: t.match(sqlparse.tokens.Keyword, r'\.*join\.*', regex=True), statement.flatten())) - return count - - @property - def first_keywords(self): - parsed_query = sqlparse.parse(self.query) - keywords = [] - for statement in parsed_query[0].tokens: - if not statement.is_keyword: - break - keywords.append(statement.value) - return ' '.join(keywords) - - @property - def tables_involved(self): - """ - A really another rudimentary way to work out tables involved in a - query. - TODO: Can probably parse the SQL using sqlparse etc and pull out table - info that way? - """ - components = [x.strip() for x in self.query.split()] - tables = [] - - for idx, component in enumerate(components): - # TODO: If django uses aliases on column names they will be falsely - # identified as tables... - if component.lower() == 'from' or component.lower() == 'join' or component.lower() == 'as': - try: - _next = components[idx + 1] - if not _next.startswith('('): # Subquery - stripped = _next.strip().strip(',') - - if stripped: - tables.append(stripped) - except IndexError: # Reach the end - pass - return tables - - def prepare_save(self): - if self.end_time and self.start_time: - interval = self.end_time - self.start_time - self.time_taken = interval.total_seconds() * 1000 - - if not self.pk: - if self.request: - self.request.num_sql_queries += 1 - self.request.save(update_fields=['num_sql_queries']) - - @transaction.atomic() - def save(self, *args, **kwargs): - self.prepare_save() - super().save(*args, **kwargs) - - @transaction.atomic() - def delete(self, *args, **kwargs): - self.request.num_sql_queries -= 1 - self.request.save() - super().delete(*args, **kwargs) - - -class BaseProfile(models.Model): - name = CharField(max_length=300, blank=True, default='') - start_time = DateTimeField(default=timezone.now) - end_time = DateTimeField(null=True, blank=True) - request = ForeignKey( - Request, null=True, blank=True, db_index=True, - on_delete=models.CASCADE, - ) - time_taken = FloatField(blank=True, null=True) - - class Meta: - abstract = True - - def save(self, *args, **kwargs): - if self.end_time and self.start_time: - interval = self.end_time - self.start_time - self.time_taken = interval.total_seconds() * 1000 - super().save(*args, **kwargs) - - -class Profile(BaseProfile): - file_path = CharField(max_length=300, blank=True, default='') - line_num = IntegerField(null=True, blank=True) - end_line_num = IntegerField(null=True, blank=True) - func_name = CharField(max_length=300, blank=True, default='') - exception_raised = BooleanField(default=False) - queries = ManyToManyField(SQLQuery, related_name='profiles', db_index=True) - dynamic = BooleanField(default=False) - - @property - def is_function_profile(self): - return self.func_name is not None - - @property - def is_context_profile(self): - return self.func_name is None - - @property - def time_spent_on_sql_queries(self): - return sum(x.time_taken for x in self.queries.all()) +import base64 +import json +import random +import re +from uuid import uuid4 + +import sqlparse +from django.conf import settings +from django.db import models, transaction +from django.db.models import ( + BooleanField, + CharField, + DateField, + DateTimeField, + ExpressionWrapper, + F, + FileField, + FloatField, + ForeignKey, + IntegerField, + ManyToManyField, + OneToOneField, + Sum, + TextField +) +from django.utils import timezone +from django.utils.safestring import mark_safe + +from silk.config import SilkyConfig +from silk.utils.profile_parser import parse_profile + +try: + # New in Django 4.2 + from django.core.files.storage import storages + from django.core.files.storage.handler import InvalidStorageError + try: + silk_storage = storages['SILKY_STORAGE'] + except InvalidStorageError: + from django.utils.module_loading import import_string + storage_class = SilkyConfig().SILKY_STORAGE_CLASS or settings.DEFAULT_FILE_STORAGE + silk_storage = import_string(storage_class)() +except ImportError: + # Deprecated since Django 4.2, Removed in Django 5.1 + from django.core.files.storage import get_storage_class + silk_storage = get_storage_class(SilkyConfig().SILKY_STORAGE_CLASS)() + + +# Seperated out so can use in tests w/o models +def _time_taken(start_time, end_time): + d = end_time - start_time + return d.seconds * 1000 + d.microseconds / 1000 + + +def time_taken(self): + return _time_taken(self.start_time, self.end_time) + + +class CaseInsensitiveDictionary(dict): + def __getitem__(self, key): + return super().__getitem__(key.lower()) + + def __setitem__(self, key, value): + super().__setitem__(key.lower(), value) + + def update(self, other=None, **kwargs): + for k, v in other.items(): + self[k] = v + for k, v in kwargs.items(): + self[k] = v + + def __init__(self, d): + super().__init__() + for k, v in d.items(): + self[k] = v + + +class Request(models.Model): + id = CharField(max_length=36, default=uuid4, primary_key=True) + path = CharField(max_length=190, db_index=True) + query_params = TextField(blank=True, default='') + raw_body = TextField(blank=True, default='') + body = TextField(blank=True, default='') + method = CharField(max_length=10) + start_time = DateTimeField(default=timezone.now, db_index=True) + view_name = CharField( + max_length=190, db_index=True, blank=True, + default='', null=True + ) + end_time = DateTimeField(null=True, blank=True) + time_taken = FloatField(blank=True, null=True) + encoded_headers = TextField(blank=True, default='') # stores json + meta_time = FloatField(null=True, blank=True) + meta_num_queries = IntegerField(null=True, blank=True) + meta_time_spent_queries = FloatField(null=True, blank=True) + pyprofile = TextField(blank=True, default='') + prof_file = FileField(max_length=300, blank=True, storage=silk_storage) + + # Useful method to create shortened copies of strings without losing start and end context + # Used to ensure path and view_name don't exceed 190 characters + def _shorten(self, string): + return f'{string[:94]}...{string[len(string) - 93:]}' + + @property + def total_meta_time(self): + return (self.meta_time or 0) + (self.meta_time_spent_queries or 0) + + @property + def profile_table(self): + for n, columns in enumerate(parse_profile(self.pyprofile)): + location = columns[-1] + if n and '{' not in location and '<' not in location: + r = re.compile(r'(?P.*\.py)\:(?P[0-9]+).*') + m = r.search(location) + group = m.groupdict() + src = group['src'] + num = group['num'] + name = 'c%d' % n + fmt = '{location}' + rep = fmt.format(**dict(group, **locals())) + yield columns[:-1] + [mark_safe(rep)] + else: + yield columns + + # defined in atomic transaction within SQLQuery save()/delete() as well + # as in bulk_create of SQLQueryManager + # TODO: This is probably a bad way to do this, .count() will prob do? + num_sql_queries = IntegerField(default=0) # TODO replace with count() + + @property + def time_spent_on_sql_queries(self): + """" + Calculate the total time spent in milli seconds on SQL queries using Django aggregates. + """ + total_time_seconds = SQLQuery.objects.filter(request=self).aggregate( + total_time=Sum( + ExpressionWrapper( + (F('end_time') - F('start_time')), + output_field=DateField() + ) + ) + )['total_time'] + return total_time_seconds.total_seconds() * 1000 if total_time_seconds is not None else 0.0 + + + @property + def headers(self): + if self.encoded_headers: + raw = json.loads(self.encoded_headers) + else: + raw = {} + + return CaseInsensitiveDictionary(raw) + + @property + def content_type(self): + return self.headers.get('content-type', None) + + @classmethod + def garbage_collect(cls, force=False): + """ Remove Request/Responses when we are at the SILKY_MAX_RECORDED_REQUESTS limit + Note that multiple in-flight requests may call this at once causing a + double collection """ + check_percent = SilkyConfig().SILKY_MAX_RECORDED_REQUESTS_CHECK_PERCENT + check_percent /= 100.0 + if check_percent < random.random() and not force: + return + target_count = SilkyConfig().SILKY_MAX_RECORDED_REQUESTS + + # Since garbage collection is probabilistic, the target count should + # be lowered to account for requests before the next garbage collection + if check_percent != 0: + target_count -= int(1 / check_percent) + + # Make sure we can delete everything if needed by settings + if target_count <= 0: + cls.objects.all().delete() + return + + try: + time_cutoff = cls.objects.order_by( + '-start_time' + ).values_list( + 'start_time', + flat=True + )[target_count] + except IndexError: + return + + cls.objects.filter(start_time__lte=time_cutoff).delete() + + def save(self, *args, **kwargs): + # sometimes django requests return the body as 'None' + if self.raw_body is None: + self.raw_body = '' + + if self.body is None: + self.body = '' + + if self.end_time and self.start_time: + interval = self.end_time - self.start_time + self.time_taken = interval.total_seconds() * 1000 + + # We can't save if either path or view_name exceed 190 characters + if self.path and len(self.path) > 190: + self.path = self._shorten(self.path) + + if self.view_name and len(self.view_name) > 190: + self.view_name = self._shorten(self.view_name) + + super().save(*args, **kwargs) + Request.garbage_collect(force=False) + + +class Response(models.Model): + id = CharField(max_length=36, default=uuid4, primary_key=True) + request = OneToOneField( + Request, related_name='response', db_index=True, + on_delete=models.CASCADE, + ) + status_code = IntegerField() + raw_body = TextField(blank=True, default='') + body = TextField(blank=True, default='') + encoded_headers = TextField(blank=True, default='') + + @property + def content_type(self): + return self.headers.get('content-type', None) + + @property + def headers(self): + if self.encoded_headers: + raw = json.loads(self.encoded_headers) + else: + raw = {} + return CaseInsensitiveDictionary(raw) + + @property + def raw_body_decoded(self): + return base64.b64decode(self.raw_body) + + +# TODO rewrite docstring +class SQLQueryManager(models.Manager): + @transaction.atomic + def bulk_create(self, *args, **kwargs): + """ensure that num_sql_queries remains consistent. Bulk create does not call + the model save() method and hence we must add this logic here too""" + if len(args): + objs = args[0] + else: + objs = kwargs.get('objs') + for obj in objs: + obj.prepare_save() + + return super().bulk_create(*args, **kwargs) + + +class SQLQuery(models.Model): + query = TextField() + start_time = DateTimeField(null=True, blank=True, default=timezone.now) + end_time = DateTimeField(null=True, blank=True) + time_taken = FloatField(blank=True, null=True) + identifier = IntegerField(default=-1) + request = ForeignKey( + Request, related_name='queries', null=True, + blank=True, db_index=True, on_delete=models.CASCADE, + ) + traceback = TextField() + analysis = TextField(null=True, blank=True) + objects = SQLQueryManager() + + # TODO docstring + @property + def traceback_ln_only(self): + return '\n'.join(self.traceback.split('\n')[::2]) + + @property + def formatted_query(self): + return sqlparse.format(self.query, reindent=True, keyword_case='upper') + + @property + def num_joins(self): + parsed_query = sqlparse.parse(self.query) + count = 0 + for statement in parsed_query: + count += sum(map(lambda t: t.match(sqlparse.tokens.Keyword, r'\.*join\.*', regex=True), statement.flatten())) + return count + + @property + def first_keywords(self): + parsed_query = sqlparse.parse(self.query) + keywords = [] + for statement in parsed_query[0].tokens: + if not statement.is_keyword: + break + keywords.append(statement.value) + return ' '.join(keywords) + + @property + def tables_involved(self): + """ + A really another rudimentary way to work out tables involved in a + query. + TODO: Can probably parse the SQL using sqlparse etc and pull out table + info that way? + """ + components = [x.strip() for x in self.query.split()] + tables = [] + + for idx, component in enumerate(components): + # TODO: If django uses aliases on column names they will be falsely + # identified as tables... + if component.lower() == 'from' or component.lower() == 'join' or component.lower() == 'as': + try: + _next = components[idx + 1] + if not _next.startswith('('): # Subquery + stripped = _next.strip().strip(',') + + if stripped: + tables.append(stripped) + except IndexError: # Reach the end + pass + return tables + + def prepare_save(self): + if self.end_time and self.start_time: + interval = self.end_time - self.start_time + self.time_taken = interval.total_seconds() * 1000 + + if not self.pk: + if self.request: + self.request.num_sql_queries += 1 + self.request.save(update_fields=['num_sql_queries']) + + @transaction.atomic() + def save(self, *args, **kwargs): + self.prepare_save() + super().save(*args, **kwargs) + + @transaction.atomic() + def delete(self, *args, **kwargs): + self.request.num_sql_queries -= 1 + self.request.save() + super().delete(*args, **kwargs) + + +class BaseProfile(models.Model): + name = CharField(max_length=300, blank=True, default='') + start_time = DateTimeField(default=timezone.now) + end_time = DateTimeField(null=True, blank=True) + request = ForeignKey( + Request, null=True, blank=True, db_index=True, + on_delete=models.CASCADE, + ) + time_taken = FloatField(blank=True, null=True) + + class Meta: + abstract = True + + def save(self, *args, **kwargs): + if self.end_time and self.start_time: + interval = self.end_time - self.start_time + self.time_taken = interval.total_seconds() * 1000 + super().save(*args, **kwargs) + + +class Profile(BaseProfile): + file_path = CharField(max_length=300, blank=True, default='') + line_num = IntegerField(null=True, blank=True) + end_line_num = IntegerField(null=True, blank=True) + func_name = CharField(max_length=300, blank=True, default='') + exception_raised = BooleanField(default=False) + queries = ManyToManyField(SQLQuery, related_name='profiles', db_index=True) + dynamic = BooleanField(default=False) + + @property + def is_function_profile(self): + return self.func_name is not None + + @property + def is_context_profile(self): + return self.func_name is None + + @property + def time_spent_on_sql_queries(self): + return sum(x.time_taken for x in self.queries.all()) From f446d91a65a7d40ad6c705f772475f75fadab498 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 9 Jul 2024 19:23:42 +0000 Subject: [PATCH 14/19] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- silk/models.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/silk/models.py b/silk/models.py index a45524bf..443fc985 100644 --- a/silk/models.py +++ b/silk/models.py @@ -21,7 +21,7 @@ ManyToManyField, OneToOneField, Sum, - TextField + TextField, ) from django.utils import timezone from django.utils.safestring import mark_safe @@ -139,8 +139,7 @@ def time_spent_on_sql_queries(self): ) ) )['total_time'] - return total_time_seconds.total_seconds() * 1000 if total_time_seconds is not None else 0.0 - + return total_time_seconds.total_seconds() * 1000 if total_time_seconds is not None else 0.0 @property def headers(self): From d660d53167337c918bdbda4527f36baa02375c78 Mon Sep 17 00:00:00 2001 From: Mohamed Beltagy Date: Tue, 9 Jul 2024 22:57:41 +0200 Subject: [PATCH 15/19] fix aggregation --- silk/models.py | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/silk/models.py b/silk/models.py index 443fc985..f442658b 100644 --- a/silk/models.py +++ b/silk/models.py @@ -131,15 +131,11 @@ def time_spent_on_sql_queries(self): """" Calculate the total time spent in milli seconds on SQL queries using Django aggregates. """ - total_time_seconds = SQLQuery.objects.filter(request=self).aggregate( - total_time=Sum( - ExpressionWrapper( - (F('end_time') - F('start_time')), - output_field=DateField() - ) - ) - )['total_time'] - return total_time_seconds.total_seconds() * 1000 if total_time_seconds is not None else 0.0 + result = SQLQuery.objects.filter(request=self).aggregate( + total_time=Sum('time_taken', output_field=FloatField()) + ) + return result['total_time'] or 0.0 + @property def headers(self): From 0ff26f0d5f38a5628f8653b3d1c626e1a9cf5a8e Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 9 Jul 2024 20:59:35 +0000 Subject: [PATCH 16/19] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- silk/models.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/silk/models.py b/silk/models.py index f442658b..f25726e2 100644 --- a/silk/models.py +++ b/silk/models.py @@ -10,10 +10,7 @@ from django.db.models import ( BooleanField, CharField, - DateField, DateTimeField, - ExpressionWrapper, - F, FileField, FloatField, ForeignKey, @@ -136,7 +133,6 @@ def time_spent_on_sql_queries(self): ) return result['total_time'] or 0.0 - @property def headers(self): if self.encoded_headers: From bba41cedc14b07b80fb34f75461891634444f149 Mon Sep 17 00:00:00 2001 From: Mohamed Beltagy Date: Tue, 9 Jul 2024 23:14:04 +0200 Subject: [PATCH 17/19] use db aggregation --- silk/models.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/silk/models.py b/silk/models.py index f25726e2..c0bbbe7b 100644 --- a/silk/models.py +++ b/silk/models.py @@ -126,7 +126,7 @@ def profile_table(self): @property def time_spent_on_sql_queries(self): """" - Calculate the total time spent in milli seconds on SQL queries using Django aggregates. + Calculate the total time spent in milli seconds on SQL queries using Django aggregates. """ result = SQLQuery.objects.filter(request=self).aggregate( total_time=Sum('time_taken', output_field=FloatField()) @@ -374,4 +374,10 @@ def is_context_profile(self): @property def time_spent_on_sql_queries(self): - return sum(x.time_taken for x in self.queries.all()) + """ + Calculate the total time spent in milliseconds on SQL queries using Django aggregates. + """ + result = self.queries.aggregate( + total_time=Sum('time_taken', output_field=FloatField()) + ) + return result['total_time'] or 0.0 \ No newline at end of file From 7c9db9fcfb85202b70769d270523f89970d412d0 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 9 Jul 2024 21:34:55 +0000 Subject: [PATCH 18/19] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- silk/models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/silk/models.py b/silk/models.py index c0bbbe7b..95af8ef7 100644 --- a/silk/models.py +++ b/silk/models.py @@ -380,4 +380,4 @@ def time_spent_on_sql_queries(self): result = self.queries.aggregate( total_time=Sum('time_taken', output_field=FloatField()) ) - return result['total_time'] or 0.0 \ No newline at end of file + return result['total_time'] or 0.0 From e4d2e030ad29d1a04a8bec49cdd501740c9be887 Mon Sep 17 00:00:00 2001 From: Mohamed Beltagy Date: Tue, 9 Jul 2024 23:35:34 +0200 Subject: [PATCH 19/19] remove extra logging --- .github/workflows/test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index dc4d0721..3b97b007 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -89,7 +89,7 @@ jobs: - name: Tox tests run: | - tox -vvv + tox -v env: DJANGO: ${{ matrix.django-version }}