From 437629959751174d558159523c2b28edc2f09cf8 Mon Sep 17 00:00:00 2001 From: Geoffrey Eisenbarth Date: Sat, 4 Mar 2023 18:26:05 -0600 Subject: [PATCH 1/9] Address #91. --- postgres_copy/copy_from.py | 54 +++++++++++++++++++++++++++++++------- 1 file changed, 45 insertions(+), 9 deletions(-) diff --git a/postgres_copy/copy_from.py b/postgres_copy/copy_from.py index 6026ddf..40d44fb 100644 --- a/postgres_copy/copy_from.py +++ b/postgres_copy/copy_from.py @@ -9,6 +9,7 @@ import logging from collections import OrderedDict from io import TextIOWrapper +import warnings from django.db import NotSupportedError from django.db import connections, router from django.core.exceptions import FieldDoesNotExist @@ -33,6 +34,7 @@ def __init__( force_null=None, encoding=None, ignore_conflicts=False, + on_conflict={}, static_mapping=None, temp_table_name=None ): @@ -57,8 +59,9 @@ def __init__( self.force_not_null = force_not_null self.force_null = force_null self.encoding = encoding - self.supports_ignore_conflicts = True + self.supports_on_conflict = True self.ignore_conflicts = ignore_conflicts + self.on_conflict = on_conflict if static_mapping is not None: self.static_mapping = OrderedDict(static_mapping) else: @@ -76,10 +79,18 @@ def __init__( if self.conn.vendor != 'postgresql': raise TypeError("Only PostgreSQL backends supported") - # Check if it is PSQL 9.5 or greater, which determines if ignore_conflicts is supported - self.supports_ignore_conflicts = self.is_postgresql_9_5() - if self.ignore_conflicts and not self.supports_ignore_conflicts: - raise NotSupportedError('This database backend does not support ignoring conflicts.') + # Check if it is PSQL 9.5 or greater, which determines if on_conflict is supported + self.supports_on_conflict = self.is_postgresql_9_5() + if self.ignore_conflicts: + self.on_conflict = { + 'action': 'ignore', + } + warnings.warn( + "The `ignore_conflicts` kwarg has been replaced with " + "on_conflict={'action': 'ignore'}." + ) + if self.on_conflict and not self.supports_on_conflict: + raise NotSupportedError('This database backend does not support conflict logic.') # Pull the CSV headers self.headers = self.get_headers() @@ -317,10 +328,35 @@ def insert_suffix(self): """ Preps the suffix to the insert query. """ - if self.ignore_conflicts: - return """ - ON CONFLICT DO NOTHING; - """ + if self.on_conflict: + try: + action = self.on_conflict['action'] + except KeyError: + raise ValueError("Must specify an `action` when passing `on_conflict`.") + if action is None: + target, action = "", "DO NOTHING" + elif action == 'update': + try: + target = self.on_conflict['target'] + except KeyError: + raise ValueError("Must specify `target` when action == 'update'.") + if target in [f.name for f in self.model._meta.fields]: + target = "({0})".format(target) + elif target in [c.name for c in self.model._meta.constraints]: + target = "ON CONSTRAINT {0}".format(target) + else: + raise ValueError("`target` must be a field name or constraint name.") + + if 'columns' in self.on_conflict: + columns = ', '.join([ + "{0} = excluded.{0}".format(col) + for col in self.on_conflict['columns'] + ]) + else: + raise ValueError("Must specify `columns` when action == 'update'.") + + action = "DO UPDATE SET {0}".format(columns) + return "ON CONFLICT {0} {1};".format(target, action) else: return ";" From 18cbae3652252b7e5c88c19bb915fb92a5ee8cf3 Mon Sep 17 00:00:00 2001 From: Geoffrey Eisenbarth Date: Sat, 4 Mar 2023 18:32:16 -0600 Subject: [PATCH 2/9] Address #122. --- postgres_copy/managers.py | 36 ++++++++++++++++++++++++++++++------ 1 file changed, 30 insertions(+), 6 deletions(-) diff --git a/postgres_copy/managers.py b/postgres_copy/managers.py index 6df7244..60a35b5 100644 --- a/postgres_copy/managers.py +++ b/postgres_copy/managers.py @@ -57,12 +57,18 @@ def drop_constraints(self): # Remove any field constraints for field in self.constrained_fields: - logger.debug("Dropping constraints from {}".format(field)) + logger.debug("Dropping field constraint from {}".format(field)) field_copy = field.__copy__() field_copy.db_constraint = False args = (self.model, field, field_copy) self.edit_schema(schema_editor, 'alter_field', args) + # Remove remaining constraints + for constraint in getattr(self.model._meta, 'constraints', []): + logger.debug("Dropping constraint '{}'".format(constraint.name)) + args = (self.model, constraint) + self.edit_schema(schema_editor, 'remove_constraint', args) + def drop_indexes(self): """ Drop indexes on the model and its fields. @@ -70,19 +76,25 @@ def drop_indexes(self): logger.debug("Dropping indexes from {}".format(self.model.__name__)) with connection.schema_editor() as schema_editor: # Remove any "index_together" constraints - logger.debug("Dropping index_together of {}".format(self.model._meta.index_together)) if self.model._meta.index_together: + logger.debug("Dropping index_together of {}".format(self.model._meta.index_together)) args = (self.model, self.model._meta.index_together, ()) self.edit_schema(schema_editor, 'alter_index_together', args) # Remove any field indexes for field in self.indexed_fields: - logger.debug("Dropping index from {}".format(field)) + logger.debug("Dropping field index from {}".format(field)) field_copy = field.__copy__() field_copy.db_index = False args = (self.model, field, field_copy) self.edit_schema(schema_editor, 'alter_field', args) + # Remove remaining indexes + for index in getattr(self.model._meta, 'indexes', []): + logger.debug("Dropping index '{}'".format(index.name)) + args = (self.model, index) + self.edit_schema(schema_editor, 'remove_index', args) + def restore_constraints(self): """ Restore constraints on the model and its fields. @@ -95,14 +107,20 @@ def restore_constraints(self): args = (self.model, (), self.model._meta.unique_together) self.edit_schema(schema_editor, 'alter_unique_together', args) - # Add any constraints to the fields + # Add any field constraints for field in self.constrained_fields: - logger.debug("Adding constraints to {}".format(field)) + logger.debug("Adding field constraint to {}".format(field)) field_copy = field.__copy__() field_copy.db_constraint = False args = (self.model, field_copy, field) self.edit_schema(schema_editor, 'alter_field', args) + # Add remaining constraints + for constraint in getattr(self.model._meta, 'constraints', []): + logger.debug("Adding constraint '{}'".format(constraint.name)) + args = (self.model, constraint) + self.edit_schema(schema_editor, 'add_constraint', args) + def restore_indexes(self): """ Restore indexes on the model and its fields. @@ -117,12 +135,18 @@ def restore_indexes(self): # Add any indexes to the fields for field in self.indexed_fields: - logger.debug("Restoring index to {}".format(field)) + logger.debug("Restoring field index to {}".format(field)) field_copy = field.__copy__() field_copy.db_index = False args = (self.model, field_copy, field) self.edit_schema(schema_editor, 'alter_field', args) + # Add remaining indexes + for index in getattr(self.model._meta, 'indexes', []): + logger.debug("Adding index '{}'".format(index.name)) + args = (self.model, index) + self.edit_schema(schema_editor, 'add_index', args) + class CopyQuerySet(ConstraintQuerySet): """ From 6fbf134b24e78f0ef038f64dd5cb017caae4e8ae Mon Sep 17 00:00:00 2001 From: Geoffrey Eisenbarth Date: Wed, 8 Mar 2023 08:32:32 -0600 Subject: [PATCH 3/9] Make sure is defined. --- postgres_copy/copy_from.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/postgres_copy/copy_from.py b/postgres_copy/copy_from.py index 40d44fb..db052f7 100644 --- a/postgres_copy/copy_from.py +++ b/postgres_copy/copy_from.py @@ -334,6 +334,8 @@ def insert_suffix(self): except KeyError: raise ValueError("Must specify an `action` when passing `on_conflict`.") if action is None: + return ";" + elif action == 'ignore': target, action = "", "DO NOTHING" elif action == 'update': try: @@ -356,6 +358,8 @@ def insert_suffix(self): raise ValueError("Must specify `columns` when action == 'update'.") action = "DO UPDATE SET {0}".format(columns) + else: + raise ValueError("Action must be one of None, 'ignore', or 'update'.") return "ON CONFLICT {0} {1};".format(target, action) else: return ";" From ac44f76eba7cea3697e4e959a6bda868a1e3908b Mon Sep 17 00:00:00 2001 From: Geoffrey Eisenbarth Date: Wed, 8 Mar 2023 11:19:36 -0600 Subject: [PATCH 4/9] Add tests. --- postgres_copy/copy_from.py | 26 ++++++++++------ postgres_copy/managers.py | 7 +++++ tests/models.py | 16 +++++++++- tests/tests.py | 62 ++++++++++++++++++++++++++++++++++++-- 4 files changed, 97 insertions(+), 14 deletions(-) diff --git a/postgres_copy/copy_from.py b/postgres_copy/copy_from.py index db052f7..0b82a65 100644 --- a/postgres_copy/copy_from.py +++ b/postgres_copy/copy_from.py @@ -342,25 +342,31 @@ def insert_suffix(self): target = self.on_conflict['target'] except KeyError: raise ValueError("Must specify `target` when action == 'update'.") + try: + columns = self.on_conflict['columns'] + except KeyError: + raise ValueError("Must specify `columns` when action == 'update'.") + if target in [f.name for f in self.model._meta.fields]: target = "({0})".format(target) elif target in [c.name for c in self.model._meta.constraints]: - target = "ON CONSTRAINT {0}".format(target) + target = "ON CONSTRAINT \"{0}\"".format(target) else: raise ValueError("`target` must be a field name or constraint name.") - if 'columns' in self.on_conflict: - columns = ', '.join([ - "{0} = excluded.{0}".format(col) - for col in self.on_conflict['columns'] - ]) - else: - raise ValueError("Must specify `columns` when action == 'update'.") - + # Convert to db_column names and set values from the `excluded` table + columns = ', '.join([ + "{0} = excluded.{0}".format( + self.model._meta.get_field(col).column + ) + for col in columns + ]) action = "DO UPDATE SET {0}".format(columns) else: raise ValueError("Action must be one of None, 'ignore', or 'update'.") - return "ON CONFLICT {0} {1};".format(target, action) + return """ + ON CONFLICT {0} {1}; + """.format(target, action) else: return ";" diff --git a/postgres_copy/managers.py b/postgres_copy/managers.py index 60a35b5..00fb923 100644 --- a/postgres_copy/managers.py +++ b/postgres_copy/managers.py @@ -170,6 +170,13 @@ def from_csv(self, csv_path, mapping=None, drop_constraints=True, drop_indexes=T "anyway. Either remove the transaction block, or set " "drop_constraints=False and drop_indexes=False.") + # NOTE: See GH Issue #117 + # We could remove this block if drop_constraints' default was False + if drop_constraints and (on_conflict := kwargs.get('on_conflict')): + if target := on_conflict.get('target'): + if target in [c.name for c in self.model._meta.constraints]: + drop_constraints = False + mapping = CopyMapping(self.model, csv_path, mapping, **kwargs) if drop_constraints: diff --git a/tests/models.py b/tests/models.py index a330f29..75a438a 100644 --- a/tests/models.py +++ b/tests/models.py @@ -114,6 +114,20 @@ class SecondaryMockObject(models.Model): objects = CopyManager() -class UniqueMockObject(models.Model): +class UniqueFieldConstraintMockObject(models.Model): name = models.CharField(max_length=500, unique=True) objects = CopyManager() + + +class UniqueModelConstraintMockObject(models.Model): + name = models.CharField(max_length=500) + number = MyIntegerField(null=True, db_column='num') + objects = CopyManager() + + class Meta: + constraints = [ + models.UniqueConstraint( + name='constraint', + fields=['name', 'number'] + ), + ] diff --git a/tests/tests.py b/tests/tests.py index 4db9563..d13e121 100644 --- a/tests/tests.py +++ b/tests/tests.py @@ -12,7 +12,8 @@ OverloadMockObject, HookedCopyMapping, SecondaryMockObject, - UniqueMockObject + UniqueFieldConstraintMockObject, + UniqueModelConstraintMockObject, ) from django.test import TestCase from django.db import transaction @@ -589,17 +590,72 @@ def test_encoding_save(self, _): @mock.patch("django.db.connection.validate_no_atomic_block") def test_ignore_conflicts(self, _): - UniqueMockObject.objects.from_csv( + UniqueFieldConstraintMockObject.objects.from_csv( self.name_path, dict(name='NAME'), ignore_conflicts=True ) - UniqueMockObject.objects.from_csv( + UniqueFieldConstraintMockObject.objects.from_csv( self.name_path, dict(name='NAME'), ignore_conflicts=True ) + @mock.patch("django.db.connection.validate_no_atomic_block") + def test_on_conflict_ignore(self, _): + UniqueModelConstraintMockObject.objects.from_csv( + self.name_path, + dict(name='NAME'), + on_conflict={'action': 'ignore'}, + ) + UniqueModelConstraintMockObject.objects.from_csv( + self.name_path, + dict(name='NAME'), + on_conflict={'action': 'ignore'}, + ) + + @mock.patch("django.db.connection.validate_no_atomic_block") + def test_on_conflict_target_field_update(self, _): + UniqueFieldConstraintMockObject.objects.from_csv( + self.name_path, + dict(name='NAME'), + on_conflict={ + 'action': 'update', + 'target': 'name', + 'columns': ['name'], + }, + ) + UniqueFieldConstraintMockObject.objects.from_csv( + self.name_path, + dict(name='NAME'), + on_conflict={ + 'action': 'update', + 'target': 'name', + 'columns': ['name'], + }, + ) + + @mock.patch("django.db.connection.validate_no_atomic_block") + def test_on_conflict_target_constraint_update(self, _): + UniqueModelConstraintMockObject.objects.from_csv( + self.name_path, + dict(name='NAME', number='NUMBER'), + on_conflict={ + 'action': 'update', + 'target': 'constraint', + 'columns': ['name', 'number'], + }, + ) + UniqueModelConstraintMockObject.objects.from_csv( + self.name_path, + dict(name='NAME', number='NUMBER'), + on_conflict={ + 'action': 'update', + 'target': 'constraint', + 'columns': ['name', 'number'], + }, + ) + @mock.patch("django.db.connection.validate_no_atomic_block") def test_static_values(self, _): ExtendedMockObject.objects.from_csv( From 1150cca9689e27dce727a9d552db464382f61629 Mon Sep 17 00:00:00 2001 From: Geoffrey Eisenbarth Date: Fri, 10 Mar 2023 14:28:52 -0600 Subject: [PATCH 5/9] Fix Model Constraint issue. --- postgres_copy/copy_from.py | 25 +++++++++++++++---------- postgres_copy/managers.py | 4 +++- tests/models.py | 17 ++++++++++++++++- tests/tests.py | 27 +++++++++++++++++++++++++-- 4 files changed, 59 insertions(+), 14 deletions(-) diff --git a/postgres_copy/copy_from.py b/postgres_copy/copy_from.py index 0b82a65..7e251d6 100644 --- a/postgres_copy/copy_from.py +++ b/postgres_copy/copy_from.py @@ -333,9 +333,7 @@ def insert_suffix(self): action = self.on_conflict['action'] except KeyError: raise ValueError("Must specify an `action` when passing `on_conflict`.") - if action is None: - return ";" - elif action == 'ignore': + if action == 'ignore': target, action = "", "DO NOTHING" elif action == 'update': try: @@ -347,12 +345,19 @@ def insert_suffix(self): except KeyError: raise ValueError("Must specify `columns` when action == 'update'.") - if target in [f.name for f in self.model._meta.fields]: - target = "({0})".format(target) - elif target in [c.name for c in self.model._meta.constraints]: - target = "ON CONSTRAINT \"{0}\"".format(target) - else: - raise ValueError("`target` must be a field name or constraint name.") + # As recommended in PostgreSQL's INSERT documentation, we use "index inference" + # rather than naming a constraint directly. Currently, if an `include` param + # is provided to a django.models.Constraint, Django creates a UNIQUE INDEX instead + # of a CONSTRAINT, another eason to use "index inference" by just specifying columns. + constraints = {c.name: c for c in self.model._meta.constraints} + if isinstance(target, str): + if constraint := constraints.get(target): + target = constraint.fields + else: + target = [target] + elif not isinstance(target, list): + raise ValueError("`target` must be a string or a list.") + target = "({0})".format(', '.join(target)) # Convert to db_column names and set values from the `excluded` table columns = ', '.join([ @@ -363,7 +368,7 @@ def insert_suffix(self): ]) action = "DO UPDATE SET {0}".format(columns) else: - raise ValueError("Action must be one of None, 'ignore', or 'update'.") + raise ValueError("Action must be one of 'ignore' or 'update'.") return """ ON CONFLICT {0} {1}; """.format(target, action) diff --git a/postgres_copy/managers.py b/postgres_copy/managers.py index 00fb923..2509d6a 100644 --- a/postgres_copy/managers.py +++ b/postgres_copy/managers.py @@ -172,10 +172,12 @@ def from_csv(self, csv_path, mapping=None, drop_constraints=True, drop_indexes=T # NOTE: See GH Issue #117 # We could remove this block if drop_constraints' default was False - if drop_constraints and (on_conflict := kwargs.get('on_conflict')): + if on_conflict := kwargs.get('on_conflict'): if target := on_conflict.get('target'): if target in [c.name for c in self.model._meta.constraints]: drop_constraints = False + elif on_conflict.get('action') == 'ignore': + drop_constraints = False mapping = CopyMapping(self.model, csv_path, mapping, **kwargs) diff --git a/tests/models.py b/tests/models.py index 75a438a..f94597a 100644 --- a/tests/models.py +++ b/tests/models.py @@ -128,6 +128,21 @@ class Meta: constraints = [ models.UniqueConstraint( name='constraint', - fields=['name', 'number'] + fields=['name'], + ), + ] + + +class UniqueModelConstraintAsIndexMockObject(models.Model): + name = models.CharField(max_length=500) + number = MyIntegerField(null=True, db_column='num') + objects = CopyManager() + + class Meta: + constraints = [ + models.UniqueConstraint( + name='constraint_as_index', + fields=['name'], + include=['number'], # Converts Constraint to Index ), ] diff --git a/tests/tests.py b/tests/tests.py index d13e121..9c3d771 100644 --- a/tests/tests.py +++ b/tests/tests.py @@ -14,6 +14,7 @@ SecondaryMockObject, UniqueFieldConstraintMockObject, UniqueModelConstraintMockObject, + UniqueModelConstraintAsIndexMockObject, ) from django.test import TestCase from django.db import transaction @@ -605,12 +606,12 @@ def test_ignore_conflicts(self, _): def test_on_conflict_ignore(self, _): UniqueModelConstraintMockObject.objects.from_csv( self.name_path, - dict(name='NAME'), + dict(name='NAME', number='NUMBER'), on_conflict={'action': 'ignore'}, ) UniqueModelConstraintMockObject.objects.from_csv( self.name_path, - dict(name='NAME'), + dict(name='NAME', number='NUMBER'), on_conflict={'action': 'ignore'}, ) @@ -656,6 +657,28 @@ def test_on_conflict_target_constraint_update(self, _): }, ) + @mock.patch("django.db.connection.validate_no_atomic_block") + def test_on_conflict_target_constraint_as_index_update(self, _): + UniqueModelConstraintAsIndexMockObject.objects.from_csv( + self.name_path, + dict(name='NAME', number='NUMBER'), + on_conflict={ + 'action': 'update', + 'target': 'constraint_as_index', + 'columns': ['name', 'number'], + }, + ) + UniqueModelConstraintAsIndexMockObject.objects.from_csv( + self.name_path, + dict(name='NAME', number='NUMBER'), + on_conflict={ + 'action': 'update', + 'target': 'constraint_as_index', + 'columns': ['name', 'number'], + }, + ) + + @mock.patch("django.db.connection.validate_no_atomic_block") def test_static_values(self, _): ExtendedMockObject.objects.from_csv( From 8a6b4d50ef3ca6acf79edd62b518388512b368bc Mon Sep 17 00:00:00 2001 From: Geoffrey Eisenbarth Date: Fri, 10 Mar 2023 15:27:25 -0600 Subject: [PATCH 6/9] Add on_conflict to docs. --- docs/index.rst | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/docs/index.rst b/docs/index.rst index 125b24b..8055ec5 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -272,7 +272,17 @@ Keyword Argument Description parameters. ``ignore_conflicts`` Specify True to ignore unique constraint or exclusion - constraint violation errors. The default is False. + constraint violation errors. The default is False. This + is depreciated in favor of `on_conflict={'action': 'ignore'}`. + +``on_conflict`` Specifies how PostgreSQL handles conflicts. For example, + `on_conflict={'action': 'ignore'}` will ignore any + conflicts. If setting `'action'` to `'update'`, you + must also specify `'target'` (the source of the + constraint: either a model field name, a constraint name, + or a list of model field names) as well as `'columns'` + (a list of model fields to update). The default is None, + which will raise conflict errors if they occur. ``using`` Sets the database to use when importing data. Default is None, which will use the ``'default'`` From 15392f1f2f9b034e3dfd0a76d7100e457b68be45 Mon Sep 17 00:00:00 2001 From: Geoffrey Eisenbarth Date: Mon, 13 Mar 2023 11:46:04 -0500 Subject: [PATCH 7/9] Remove blank line. --- tests/tests.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/tests.py b/tests/tests.py index 9c3d771..14667b4 100644 --- a/tests/tests.py +++ b/tests/tests.py @@ -678,7 +678,6 @@ def test_on_conflict_target_constraint_as_index_update(self, _): }, ) - @mock.patch("django.db.connection.validate_no_atomic_block") def test_static_values(self, _): ExtendedMockObject.objects.from_csv( From 472cdadc0f5601649898969f933af652fa1e19d5 Mon Sep 17 00:00:00 2001 From: Geoffrey Eisenbarth Date: Thu, 15 Jun 2023 09:11:03 -0500 Subject: [PATCH 8/9] Add example requested in #168. --- docs/index.rst | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/docs/index.rst b/docs/index.rst index 8055ec5..3e40906 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -520,6 +520,39 @@ Now you can run that subclass directly rather than via a manager. The only diffe # Then save it. c.save() +For example, if you wish to return a QuerySet of the models imported into the database, you could do the following: + +.. code-block:: python + from django.db import models + from postgres_copy import CopyMapping, CopyQuerySet + + + class ResultsCopyMapping(CopyMapping): + def insert_suffix(self) -> str: + """Add `RETURNING` sql clause to get newly created/updated ids.""" + suffix = super().insert_suffix() + suffix = suffix.split(';')[0] + ' RETURNING id;' + return suffix + + def post_insert(self, cursor) -> None: + """Extend to store results from `RETURNING` clause.""" + self.obj_ids = [r[0] for r in cursor.fetchall()] + + + class ResultsCopyQuerySet(CopyQuerySet): + def from_csv(self, csv_path_or_obj, mapping=None, **kwargs): + mapping = ResultsCopyMapping(self.model, csv_path_or_obj, mapping=None, **kwargs) + count = mapping.save(silent=True) + objs = self.model.objects.filter(id__in=mapping.obj_ids) + return objs, count + + + class Person(models.Model): + name = models.CharField(max_length=500) + number = models.IntegerField() + source_csv = models.CharField(max_length=500) + objects = ResultsCopyQuerySet.as_manager() + Export options ============== From 449d06d3762ee7ff4a984acd84db84c3e58c141a Mon Sep 17 00:00:00 2001 From: Geoffrey Eisenbarth Date: Thu, 15 Jun 2023 09:18:16 -0500 Subject: [PATCH 9/9] Fix spacing. --- docs/index.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/index.rst b/docs/index.rst index 3e40906..4f2387d 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -523,6 +523,8 @@ Now you can run that subclass directly rather than via a manager. The only diffe For example, if you wish to return a QuerySet of the models imported into the database, you could do the following: .. code-block:: python + + from django.db import models from postgres_copy import CopyMapping, CopyQuerySet