Skip to content

Commit 9c5d3c5

Browse files
pi-sigmaswrichards
authored andcommitted
[#2913] Check for duplicate keys in ZGW imports
1 parent 4724311 commit 9c5d3c5

File tree

3 files changed

+89
-31
lines changed

3 files changed

+89
-31
lines changed

src/open_inwoner/openzaak/import_export.py

+43-22
Original file line numberDiff line numberDiff line change
@@ -27,16 +27,19 @@
2727

2828
class ZGWImportError(Exception):
2929
@classmethod
30-
def extract_error_data(cls, exc: Exception, jsonl: str):
31-
exc_source = type(exc.__context__)
32-
data = json.loads(jsonl) if jsonl else {}
30+
def extract_error_data(
31+
cls, exception: Exception | None = None, jsonl: str | None = None
32+
):
33+
data = json.loads(jsonl) if jsonl is not None else {}
3334
source_config = apps.get_model(data["model"])
3435

35-
# error type
36-
if exc_source is CatalogusConfig.DoesNotExist or source_config.DoesNotExist:
37-
error_type = ObjectDoesNotExist
38-
if exc_source is source_config.MultipleObjectsReturned:
39-
error_type = MultipleObjectsReturned
36+
error_type = None
37+
if exception:
38+
exc_source = type(exception.__context__)
39+
if exc_source is CatalogusConfig.DoesNotExist or source_config.DoesNotExist:
40+
error_type = ObjectDoesNotExist
41+
if exc_source is source_config.MultipleObjectsReturned:
42+
error_type = MultipleObjectsReturned
4043

4144
# metadata about source_config
4245
items = []
@@ -60,8 +63,6 @@ def extract_error_data(cls, exc: Exception, jsonl: str):
6063
items = [
6164
f"omschrijving = {fields['omschrijving']!r}",
6265
f"ZaakTypeConfig identificatie = {fields['zaaktype_config'][0]!r}",
63-
f"Catalogus domein = {fields['zaaktype_config'][1]!r}",
64-
f"Catalogus rsin = {fields['zaaktype_config'][2]!r}",
6566
]
6667

6768
return {
@@ -71,8 +72,8 @@ def extract_error_data(cls, exc: Exception, jsonl: str):
7172
}
7273

7374
@classmethod
74-
def from_exception_and_jsonl(cls, exception: Exception, jsonl: str) -> Self:
75-
error_data = cls.extract_error_data(exception, jsonl)
75+
def from_exception_and_jsonl(cls, jsonl: str, exception: Exception) -> Self:
76+
error_data = cls.extract_error_data(exception=exception, jsonl=jsonl)
7677

7778
error_template = (
7879
"%(source_config_name)s not found in target environment: %(info)s"
@@ -82,16 +83,26 @@ def from_exception_and_jsonl(cls, exception: Exception, jsonl: str) -> Self:
8283

8384
return cls(error_template % error_data)
8485

86+
@classmethod
87+
def from_jsonl(cls, jsonl: str) -> Self:
88+
error_data = cls.extract_error_data(jsonl=jsonl)
89+
error_template = (
90+
"%(source_config_name)s was processed multiple times because it contains "
91+
"duplicate natural keys: %(info)s"
92+
)
93+
return cls(error_template % error_data)
94+
8595

8696
def check_catalogus_config_exists(source_config: CatalogusConfig, jsonl: str):
8797
try:
8898
CatalogusConfig.objects.get_by_natural_key(
8999
domein=source_config.domein, rsin=source_config.rsin
90100
)
91-
except CatalogusConfig.MultipleObjectsReturned as exc:
92-
raise ZGWImportError.from_exception_and_jsonl(exc, jsonl)
93-
except CatalogusConfig.DoesNotExist as exc:
94-
raise ZGWImportError.from_exception_and_jsonl(exc, jsonl)
101+
except (
102+
CatalogusConfig.DoesNotExist,
103+
CatalogusConfig.MultipleObjectsReturned,
104+
) as exc:
105+
raise ZGWImportError.from_exception_and_jsonl(exception=exc, jsonl=jsonl)
95106

96107

97108
def _update_config(source, target, exclude_fields):
@@ -113,10 +124,13 @@ def _update_zaaktype_config(source_config: ZaakTypeConfig, jsonl: str):
113124
catalogus_domein=source_config.catalogus.domein,
114125
catalogus_rsin=source_config.catalogus.rsin,
115126
)
116-
except ZaakTypeConfig.MultipleObjectsReturned as exc:
117-
raise ZGWImportError.from_exception_and_jsonl(exc, jsonl)
118-
except (CatalogusConfig.DoesNotExist, ZaakTypeConfig.DoesNotExist) as exc:
119-
raise ZGWImportError.from_exception_and_jsonl(exc, jsonl)
127+
except (
128+
CatalogusConfig.DoesNotExist,
129+
CatalogusConfig.MultipleObjectsReturned,
130+
ZaakTypeConfig.DoesNotExist,
131+
ZaakTypeConfig.MultipleObjectsReturned,
132+
) as exc:
133+
raise ZGWImportError.from_exception_and_jsonl(exception=exc, jsonl=jsonl)
120134
else:
121135
exclude_fields = [
122136
"id",
@@ -146,7 +160,7 @@ def _update_nested_zgw_config(
146160
catalogus_rsin=catalogus_rsin,
147161
)
148162
except (source_config.DoesNotExist, source_config.MultipleObjectsReturned) as exc:
149-
raise ZGWImportError.from_exception_and_jsonl(exc, jsonl)
163+
raise ZGWImportError.from_exception_and_jsonl(exception=exc, jsonl=jsonl)
150164
else:
151165
_update_config(source_config, target, exclude_fields)
152166

@@ -320,6 +334,7 @@ def from_jsonl_stream_or_string(cls, stream_or_string: IO | str) -> Self:
320334

321335
rows_successfully_processed = 0
322336
import_errors = []
337+
natural_keys_seen = set()
323338
for line in cls._lines_iter_from_jsonl_stream_or_string(stream_or_string):
324339
try:
325340
(deserialized_object,) = serializers.deserialize(
@@ -329,11 +344,17 @@ def from_jsonl_stream_or_string(cls, stream_or_string: IO | str) -> Self:
329344
use_natural_foreign_keys=True,
330345
)
331346
except DeserializationError as exc:
332-
error = ZGWImportError.from_exception_and_jsonl(exc, line)
347+
error = ZGWImportError.from_exception_and_jsonl(
348+
exception=exc, jsonl=line
349+
)
333350
logger.error(error)
334351
import_errors.append(error)
335352
else:
336353
source_config = deserialized_object.object
354+
if (natural_key := source_config.natural_key()) in natural_keys_seen:
355+
import_errors.append(ZGWImportError.from_jsonl(line))
356+
continue
357+
natural_keys_seen.add(natural_key)
337358
try:
338359
match source_config:
339360
case CatalogusConfig():

src/open_inwoner/openzaak/tests/test_admin.py

+5-5
Original file line numberDiff line numberDiff line change
@@ -293,18 +293,18 @@ def test_import_flow_reports_errors(self) -> None:
293293
messages[1],
294294
)
295295
self.assertIn(
296-
"ZaakTypeStatusTypeConfig not found in target environment: omschrijving = 'status omschrijving', "
297-
"ZaakTypeConfig identificatie = 'ztc-id-a-0', Catalogus domein = 'DM-0', Catalogus rsin = '123456789'",
296+
"ZaakTypeStatusTypeConfig not found in target environment: omschrijving = 'bogus', "
297+
"ZaakTypeConfig identificatie = 'ztc-id-a-0'",
298298
messages[1],
299299
)
300300
self.assertIn(
301301
"ZaakTypeStatusTypeConfig not found in target environment: omschrijving = 'bogus', ZaakTypeConfig "
302-
"identificatie = 'ztc-id-a-0', Catalogus domein = 'DM-0', Catalogus rsin = '123456789'",
302+
"identificatie = 'ztc-id-a-0'",
303303
messages[1],
304304
)
305305
self.assertIn(
306306
"ZaakTypeInformatieObjectTypeConfig not found in target environment: omschrijving = 'informatieobject', "
307-
"ZaakTypeConfig identificatie = 'ztc-id-a-0', Catalogus domein = 'DM-0', Catalogus rsin = '123456789'",
307+
"ZaakTypeConfig identificatie = 'ztc-id-a-0'",
308308
messages[1],
309309
)
310310
self.assertIn(
@@ -313,7 +313,7 @@ def test_import_flow_reports_errors(self) -> None:
313313
)
314314
self.assertIn(
315315
"ZaakTypeResultaatTypeConfig not found in target environment: omschrijving = 'resultaat', "
316-
"ZaakTypeConfig identificatie = 'ztc-id-a-0', Catalogus domein = 'DM-0', Catalogus rsin = '123456789'",
316+
"ZaakTypeConfig identificatie = 'ztc-id-a-0'",
317317
messages[1],
318318
)
319319

src/open_inwoner/openzaak/tests/test_import_export.py

+41-4
Original file line numberDiff line numberDiff line change
@@ -274,7 +274,11 @@ def setUp(self):
274274
'{"model": "openzaak.zaaktypestatustypeconfig", "fields": {"zaaktype_config": ["ztc-id-a-0", "DM-0", "123456789"], "statustype_url": "https://bar.maykinmedia.nl", "omschrijving": "status omschrijving", "statustekst": "statustekst nieuw", "zaaktype_uuids": "[]", "status_indicator": "", "status_indicator_text": "", "document_upload_description": "", "description": "status", "notify_status_change": true, "action_required": false, "document_upload_enabled": true, "call_to_action_url": "", "call_to_action_text": "", "case_link_text": ""}}',
275275
'{"model": "openzaak.zaaktyperesultaattypeconfig", "fields": {"zaaktype_config": ["ztc-id-a-0", "DM-0", "123456789"], "resultaattype_url": "https://bar.maykinmedia.nl", "omschrijving": "resultaat", "zaaktype_uuids": "[]", "description": "description new"}}',
276276
]
277+
self.json_dupes = [
278+
'{"model": "openzaak.zaaktypestatustypeconfig", "fields": {"zaaktype_config": ["ztc-id-a-0", "DM-0", "123456789"], "statustype_url": "https://bar.maykinmedia.nl", "omschrijving": "status omschrijving", "statustekst": "statustekst nieuw", "zaaktype_uuids": "[]", "status_indicator": "", "status_indicator_text": "", "document_upload_description": "", "description": "status", "notify_status_change": true, "action_required": false, "document_upload_enabled": true, "call_to_action_url": "", "call_to_action_text": "", "case_link_text": ""}}',
279+
]
277280
self.jsonl = "\n".join(self.json_lines)
281+
self.jsonl_with_dupes = "\n".join(self.json_lines + self.json_dupes)
278282

279283
def test_import_jsonl_update_success(self):
280284
mocks = ZGWExportImportMockData()
@@ -373,7 +377,7 @@ def test_import_jsonl_missing_statustype_config(self):
373377
)
374378
expected_error = ZGWImportError(
375379
"ZaakTypeStatusTypeConfig not found in target environment: omschrijving = 'bogus', "
376-
"ZaakTypeConfig identificatie = 'ztc-id-a-0', Catalogus domein = 'DM-0', Catalogus rsin = '123456789'"
380+
"ZaakTypeConfig identificatie = 'ztc-id-a-0'"
377381
)
378382
import_expected = dataclasses.asdict(
379383
CatalogusConfigImport(
@@ -419,7 +423,7 @@ def test_import_jsonl_update_statustype_config_missing_zt_config(self):
419423
)
420424
expected_error = ZGWImportError(
421425
"ZaakTypeStatusTypeConfig not found in target environment: omschrijving = 'status omschrijving', "
422-
"ZaakTypeConfig identificatie = 'bogus', Catalogus domein = 'DM-1', Catalogus rsin = '666666666'"
426+
"ZaakTypeConfig identificatie = 'bogus'"
423427
)
424428
import_expected = dataclasses.asdict(
425429
CatalogusConfigImport(
@@ -445,7 +449,7 @@ def test_import_jsonl_update_statustype_config_missing_zt_config(self):
445449
self.assertEqual(ZaakTypeStatusTypeConfig.objects.count(), 1)
446450
self.assertEqual(ZaakTypeResultaatTypeConfig.objects.count(), 1)
447451

448-
def test_import_jsonl_update_reports_duplicates(self):
452+
def test_import_jsonl_update_reports_duplicate_db_records(self):
449453
mocks = ZGWExportImportMockData()
450454

451455
ZaakTypeResultaatTypeConfigFactory(
@@ -465,7 +469,7 @@ def test_import_jsonl_update_reports_duplicates(self):
465469
)
466470
expected_error = ZGWImportError(
467471
"Got multiple results for ZaakTypeResultaatTypeConfig: omschrijving = 'resultaat', "
468-
"ZaakTypeConfig identificatie = 'ztc-id-a-0', Catalogus domein = 'DM-0', Catalogus rsin = '123456789'"
472+
"ZaakTypeConfig identificatie = 'ztc-id-a-0'"
469473
)
470474
import_expected = dataclasses.asdict(
471475
CatalogusConfigImport(
@@ -484,6 +488,39 @@ def test_import_jsonl_update_reports_duplicates(self):
484488
# check import
485489
self.assertEqual(import_result, import_expected)
486490

491+
def test_import_jsonl_update_reports_duplicate_natural_keys_in_upload_file(self):
492+
mocks = ZGWExportImportMockData()
493+
494+
self.storage.save("import.jsonl", io.StringIO(self.jsonl_with_dupes))
495+
496+
# we use `asdict` and replace the Exceptions with string representations
497+
# because for Exceptions raised from within dataclasses, equality ==/is identity
498+
import_result = dataclasses.asdict(
499+
CatalogusConfigImport.import_from_jsonl_file_in_django_storage(
500+
"import.jsonl", self.storage
501+
)
502+
)
503+
expected_error = ZGWImportError(
504+
"ZaakTypeStatusTypeConfig was processed multiple times because it contains duplicate "
505+
"natural keys: omschrijving = 'status omschrijving', ZaakTypeConfig identificatie = 'ztc-id-a-0'"
506+
)
507+
import_expected = dataclasses.asdict(
508+
CatalogusConfigImport(
509+
total_rows_processed=6,
510+
catalogus_configs_imported=1,
511+
zaaktype_configs_imported=1,
512+
zaak_informatie_object_type_configs_imported=1,
513+
zaak_status_type_configs_imported=1,
514+
zaak_resultaat_type_configs_imported=1,
515+
import_errors=[expected_error],
516+
),
517+
)
518+
import_result["import_errors"][0] = str(import_result["import_errors"][0])
519+
import_expected["import_errors"][0] = str(import_expected["import_errors"][0])
520+
521+
# check import
522+
self.assertEqual(import_result, import_expected)
523+
487524
def test_import_jsonl_fails_with_catalogus_domein_rsin_mismatch(self):
488525
service = ServiceFactory(slug="service-0")
489526
CatalogusConfigFactory(

0 commit comments

Comments
 (0)