Skip to content

Commit e589b3a

Browse files
authored
Merge pull request #1497 from maykinmedia/task/2871-zgw-import-export
[#2871] Refactor ZGW imports to work on different environments
2 parents b534838 + 24fffb7 commit e589b3a

9 files changed

+697
-404
lines changed

src/open_inwoner/openzaak/admin.py

+23-3
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
from django.template.defaultfilters import filesizeformat
1111
from django.template.response import TemplateResponse
1212
from django.urls import path, reverse
13+
from django.utils.html import format_html, format_html_join
1314
from django.utils.translation import gettext_lazy as _, ngettext
1415

1516
from import_export.admin import ImportExportMixin
@@ -174,11 +175,30 @@ def process_file_view(self, request):
174175
self.message_user(
175176
request,
176177
_(
177-
"Successfully processed %(num_rows)d items"
178-
% {"num_rows": import_result.total_rows_processed}
178+
"%(num_rows)d item(s) processed in total, with %(error_rows)d failing row(s)."
179+
% {
180+
"num_rows": import_result.total_rows_processed,
181+
"error_rows": len(import_result.import_errors),
182+
}
179183
),
180-
messages.SUCCESS,
184+
messages.SUCCESS
185+
if not import_result.import_errors
186+
else messages.WARNING,
181187
)
188+
if errors := import_result.import_errors:
189+
msgs_deduped = set(error.__str__() for error in errors)
190+
error_msg_iterator = ([msg] for msg in msgs_deduped)
191+
192+
error_msg_html = format_html_join(
193+
"\n", "<li>{}</li>", error_msg_iterator
194+
)
195+
error_msg_html_ordered = format_html(
196+
_("It was not possible to import the following items:")
197+
+ f"<ol> {error_msg_html} </ol>"
198+
)
199+
self.message_user(
200+
request, error_msg_html_ordered, messages.ERROR
201+
)
182202

183203
return HttpResponseRedirect(
184204
reverse(

src/open_inwoner/openzaak/import_export.py

+197-67
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,11 @@
66
from typing import IO, Any, Generator, Self
77
from urllib.parse import urlparse
88

9+
from django.apps import apps
910
from django.core import serializers
11+
from django.core.exceptions import MultipleObjectsReturned, ObjectDoesNotExist
1012
from django.core.files.storage import Storage
13+
from django.core.serializers.base import DeserializationError
1114
from django.db import transaction
1215
from django.db.models import QuerySet
1316

@@ -22,6 +25,132 @@
2225
logger = logging.getLogger(__name__)
2326

2427

28+
class ZGWImportError(Exception):
29+
@classmethod
30+
def extract_error_data(cls, exc: Exception, jsonl: str):
31+
exc_source = type(exc.__context__)
32+
data = json.loads(jsonl) if jsonl else {}
33+
source_config = apps.get_model(data["model"])
34+
35+
# error type
36+
if exc_source is CatalogusConfig.DoesNotExist or source_config.DoesNotExist:
37+
error_type = ObjectDoesNotExist
38+
if exc_source is source_config.MultipleObjectsReturned:
39+
error_type = MultipleObjectsReturned
40+
41+
# metadata about source_config
42+
items = []
43+
fields = data.get("fields", None)
44+
if source_config is CatalogusConfig:
45+
items = [
46+
f"Domein = {fields['domein']}",
47+
f"Rsin = {fields['rsin']}",
48+
]
49+
if source_config is ZaakTypeConfig:
50+
items = [
51+
f"Identificatie = {fields['identificatie']}",
52+
f"Catalogus domein = {fields['catalogus'][0]}",
53+
f"Catalogus rsin = {fields['catalogus'][1]}",
54+
]
55+
if source_config in {
56+
ZaakTypeStatusTypeConfig,
57+
ZaakTypeResultaatTypeConfig,
58+
ZaakTypeInformatieObjectTypeConfig,
59+
}:
60+
items = [
61+
f"omschrijving = {fields['omschrijving']}",
62+
f"ZaakTypeConfig identificatie = {fields['zaaktype_config'][0]}",
63+
f"Catalogus domein = {fields['zaaktype_config'][1]}",
64+
f"Catalogus rsin = {fields['zaaktype_config'][2]}",
65+
]
66+
67+
return {
68+
"error_type": error_type,
69+
"source_config_name": source_config.__name__,
70+
"info": ", ".join(items),
71+
}
72+
73+
@classmethod
74+
def from_exception_and_jsonl(cls, exception: Exception, jsonl: str) -> Self:
75+
error_data = cls.extract_error_data(exception, jsonl)
76+
77+
error_template = (
78+
"%(source_config_name)s not found in target environment: %(info)s"
79+
)
80+
if error_data["error_type"] is MultipleObjectsReturned:
81+
error_template = "Got multiple results for %(source_config_name)s: %(info)s"
82+
83+
return cls(error_template % error_data)
84+
85+
86+
def check_catalogus_config_exists(source_config: CatalogusConfig, jsonl: str):
87+
try:
88+
CatalogusConfig.objects.get_by_natural_key(
89+
domein=source_config.domein, rsin=source_config.rsin
90+
)
91+
except CatalogusConfig.MultipleObjectsReturned as exc:
92+
raise ZGWImportError.from_exception_and_jsonl(exc, jsonl)
93+
except CatalogusConfig.DoesNotExist as exc:
94+
raise ZGWImportError.from_exception_and_jsonl(exc, jsonl)
95+
96+
97+
def _update_config(source, target, exclude_fields):
98+
for field in source._meta.fields:
99+
field_name = field.name
100+
101+
if field_name in exclude_fields:
102+
continue
103+
104+
val = getattr(source, field_name, None)
105+
setattr(target, field_name, val)
106+
target.save()
107+
108+
109+
def _update_zaaktype_config(source_config: ZaakTypeConfig, jsonl: str):
110+
try:
111+
target = ZaakTypeConfig.objects.get_by_natural_key(
112+
identificatie=source_config.identificatie,
113+
catalogus_domein=source_config.catalogus.domein,
114+
catalogus_rsin=source_config.catalogus.rsin,
115+
)
116+
except ZaakTypeConfig.MultipleObjectsReturned as exc:
117+
raise ZGWImportError.from_exception_and_jsonl(exc, jsonl)
118+
except (CatalogusConfig.DoesNotExist, ZaakTypeConfig.DoesNotExist) as exc:
119+
raise ZGWImportError.from_exception_and_jsonl(exc, jsonl)
120+
else:
121+
exclude_fields = [
122+
"id",
123+
"catalogus",
124+
"urls",
125+
"zaaktype_uuids",
126+
]
127+
_update_config(source_config, target, exclude_fields)
128+
129+
130+
def _update_nested_zgw_config(
131+
source_config: ZaakTypeStatusTypeConfig
132+
| ZaakTypeResultaatTypeConfig
133+
| ZaakTypeInformatieObjectTypeConfig,
134+
exclude_fields: list[str],
135+
jsonl: str,
136+
):
137+
zaaktype_config_identificatie = source_config.zaaktype_config.identificatie
138+
catalogus_domein = source_config.zaaktype_config.catalogus.domein
139+
catalogus_rsin = source_config.zaaktype_config.catalogus.rsin
140+
141+
try:
142+
target = source_config.__class__.objects.get_by_natural_key(
143+
omschrijving=source_config.omschrijving,
144+
zaaktype_config_identificatie=zaaktype_config_identificatie,
145+
catalogus_domein=catalogus_domein,
146+
catalogus_rsin=catalogus_rsin,
147+
)
148+
except (source_config.DoesNotExist, source_config.MultipleObjectsReturned) as exc:
149+
raise ZGWImportError.from_exception_and_jsonl(exc, jsonl)
150+
else:
151+
_update_config(source_config, target, exclude_fields)
152+
153+
25154
@dataclasses.dataclass(frozen=True)
26155
class CatalogusConfigExport:
27156
"""Gather and export CatalogusConfig(s) and all associated relations."""
@@ -113,9 +242,10 @@ class CatalogusConfigImport:
113242
total_rows_processed: int = 0
114243
catalogus_configs_imported: int = 0
115244
zaaktype_configs_imported: int = 0
116-
zaak_inormatie_object_type_configs_imported: int = 0
245+
zaak_informatie_object_type_configs_imported: int = 0
117246
zaak_status_type_configs_imported: int = 0
118247
zaak_resultaat_type_configs_imported: int = 0
248+
import_errors: list | None = None
119249

120250
@staticmethod
121251
def _get_url_root(url: str) -> str:
@@ -149,85 +279,85 @@ def _lines_iter_from_jsonl_stream_or_string(
149279
# Reset the stream in case it gets re-used
150280
lines.seek(0)
151281

152-
@classmethod
153-
def _rewrite_jsonl_url_references(
154-
cls, stream_or_string: IO | str
155-
) -> Generator[str, Any, None]:
156-
# The assumption is that the exporting and importing instance both have
157-
# a `Service` with the same slug as the `Service` referenced in the
158-
# `configued_from` attribute of the imported CatalogusConfig. The
159-
# assumption is further that all URLs in the imported objects are
160-
# prefixed by an URL that matches the API root in the service. Because
161-
# of this, the import file will contain URLs with a base URL pointing to
162-
# the `api_root`` of the `configured_from` Service on the _source_
163-
# instance, and has to be re-written to match the `api_root` of the
164-
# `configured_from` Service on the _target_ instance. Put differently,
165-
# we assume that we are migrating ZGW objects that _do not differ_ as
166-
# far as the ZGW objects themselves are concerned (apart from the URL,
167-
# they essentially point to the same ZGW backend), but that they _do_
168-
# differ in terms of additional model fields that do not have their
169-
# source of truth in the ZGW backends.
170-
#
171-
# This expectation is also encoded in our API clients: you can only
172-
# fetch ZGW objects using the ApePie clients if the root of those
173-
# objects matches the configured API root.
174-
175-
base_url_mapping = {}
176-
for deserialized_object in serializers.deserialize(
177-
"jsonl",
178-
filter(
179-
lambda row: ('"model": "openzaak.catalogusconfig"' in row),
180-
cls._lines_iter_from_jsonl_stream_or_string(stream_or_string),
181-
),
182-
):
183-
object_type: str = deserialized_object.object.__class__.__name__
184-
185-
if object_type == "CatalogusConfig":
186-
target_base_url = cls._get_url_root(
187-
deserialized_object.object.service.api_root
188-
)
189-
source_base_url = cls._get_url_root(deserialized_object.object.url)
190-
base_url_mapping[source_base_url] = target_base_url
191-
else:
192-
# https://www.xkcd.com/2200/
193-
logger.error(
194-
"Tried to filter for catalogus config objects, but also got: %s",
195-
object_type,
196-
)
197-
198-
for line in cls._lines_iter_from_jsonl_stream_or_string(stream_or_string):
199-
source_url_found = False
200-
for source, target in base_url_mapping.items():
201-
line = line.replace(source, target)
202-
source_url_found = True
203-
204-
if not source_url_found:
205-
raise ValueError("Unable to rewrite ZGW urls")
206-
207-
yield line
208-
209282
@classmethod
210283
@transaction.atomic()
211284
def from_jsonl_stream_or_string(cls, stream_or_string: IO | str) -> Self:
212285
model_to_counter_mapping = {
213286
"CatalogusConfig": "catalogus_configs_imported",
214287
"ZaakTypeConfig": "zaaktype_configs_imported",
215-
"ZaakTypeInformatieObjectTypeConfig": "zaak_inormatie_object_type_configs_imported",
288+
"ZaakTypeInformatieObjectTypeConfig": "zaak_informatie_object_type_configs_imported",
216289
"ZaakTypeStatusTypeConfig": "zaak_status_type_configs_imported",
217290
"ZaakTypeResultaatTypeConfig": "zaak_resultaat_type_configs_imported",
218291
}
219-
220292
object_type_counts = defaultdict(int)
221293

222-
for deserialized_object in serializers.deserialize(
223-
"jsonl", cls._rewrite_jsonl_url_references(stream_or_string)
224-
):
225-
deserialized_object.save()
226-
object_type = deserialized_object.object.__class__.__name__
227-
object_type_counts[object_type] += 1
294+
rows_successfully_processed = 0
295+
import_errors = []
296+
for line in cls._lines_iter_from_jsonl_stream_or_string(stream_or_string):
297+
try:
298+
(deserialized_object,) = serializers.deserialize(
299+
"jsonl",
300+
line,
301+
use_natural_primary_keys=True,
302+
use_natural_foreign_keys=True,
303+
)
304+
except DeserializationError as exc:
305+
error = ZGWImportError.from_exception_and_jsonl(exc, line)
306+
logger.error(error)
307+
import_errors.append(error)
308+
else:
309+
source_config = deserialized_object.object
310+
try:
311+
match source_config:
312+
case CatalogusConfig():
313+
check_catalogus_config_exists(
314+
source_config=source_config, jsonl=line
315+
)
316+
case ZaakTypeConfig():
317+
_update_zaaktype_config(
318+
source_config=source_config, jsonl=line
319+
)
320+
case ZaakTypeInformatieObjectTypeConfig():
321+
exclude_fields = [
322+
"id",
323+
"zaaktype_config",
324+
"zaaktype_uuids",
325+
"informatieobjecttype_url",
326+
]
327+
_update_nested_zgw_config(
328+
source_config, exclude_fields, line
329+
)
330+
case ZaakTypeStatusTypeConfig():
331+
exclude_fields = [
332+
"id",
333+
"zaaktype_config",
334+
"zaaktype_uuids",
335+
"statustype_url",
336+
]
337+
_update_nested_zgw_config(
338+
source_config, exclude_fields, line
339+
)
340+
case ZaakTypeResultaatTypeConfig():
341+
exclude_fields = [
342+
"id",
343+
"zaaktype_config",
344+
"zaaktype_uuids",
345+
"resultaattype_url",
346+
]
347+
_update_nested_zgw_config(
348+
source_config, exclude_fields, line
349+
)
350+
except ZGWImportError as exc:
351+
logger.error(exc)
352+
import_errors.append(exc)
353+
else:
354+
object_type = source_config.__class__.__name__
355+
object_type_counts[object_type] += 1
356+
rows_successfully_processed += 1
228357

229358
creation_kwargs = {
230-
"total_rows_processed": sum(object_type_counts.values()),
359+
"total_rows_processed": rows_successfully_processed + len(import_errors),
360+
"import_errors": import_errors,
231361
}
232362

233363
for model_name, counter_field in model_to_counter_mapping.items():

0 commit comments

Comments
 (0)