Skip to content

Commit 438d2bd

Browse files
authored
Merge pull request #517 from Police-Data-Accessibility-Project/mc_516_record_formats_access_types_not_null
Set alter record formats and access types columns to be not null, default to empty array.
2 parents 7bc6348 + 08159e0 commit 438d2bd

File tree

10 files changed

+121
-46
lines changed

10 files changed

+121
-46
lines changed
Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
"""Update record_formats and access_types to be not null
2+
3+
Revision ID: de0305465e2c
4+
Revises: a57c3b5b6e93
5+
Create Date: 2025-11-15 14:41:45.619148
6+
7+
"""
8+
from typing import Sequence, Union
9+
10+
from alembic import op
11+
import sqlalchemy as sa
12+
13+
14+
# revision identifiers, used by Alembic.
15+
revision: str = 'de0305465e2c'
16+
down_revision: Union[str, None] = 'a57c3b5b6e93'
17+
branch_labels: Union[str, Sequence[str], None] = None
18+
depends_on: Union[str, Sequence[str], None] = None
19+
20+
21+
TABLE_NAME = "url_optional_data_source_metadata"
22+
23+
24+
def upgrade() -> None:
25+
_update_record_formats()
26+
_update_access_types()
27+
_alter_record_formats_column()
28+
_alter_access_types_column()
29+
30+
def _alter_record_formats_column():
31+
op.alter_column(
32+
table_name=TABLE_NAME,
33+
column_name="record_formats",
34+
nullable=False,
35+
server_default='{}'
36+
)
37+
38+
39+
def _alter_access_types_column():
40+
op.alter_column(
41+
table_name=TABLE_NAME,
42+
column_name="access_types",
43+
nullable=False,
44+
server_default='{}'
45+
)
46+
47+
48+
49+
def _update_access_types():
50+
op.execute("""
51+
UPDATE url_optional_data_source_metadata
52+
SET access_types = '{}'
53+
WHERE access_types is null
54+
55+
""")
56+
57+
58+
def _update_record_formats():
59+
op.execute("""
60+
UPDATE url_optional_data_source_metadata
61+
SET record_formats = '{}'
62+
WHERE record_formats is null
63+
""")
64+
65+
66+
def downgrade() -> None:
67+
pass

src/api/endpoints/collector/manual/query.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -84,9 +84,10 @@ async def run(self, session: AsyncSession) -> ManualBatchResponseDTO:
8484

8585
optional_metadata = URLOptionalDataSourceMetadata(
8686
url_id=url.id,
87-
record_formats=entry.record_formats,
87+
record_formats=entry.record_formats or [],
8888
data_portal_type=entry.data_portal_type,
8989
supplying_entity=entry.supplying_entity,
90+
access_types=[]
9091
)
9192
session.add(optional_metadata)
9293
url_ids.append(url.id)

src/api/endpoints/contributions/user/queries/core.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -33,15 +33,15 @@ async def run(self, session: AsyncSession) -> ContributionsUserResponse:
3333
agency_agree.agreement.label("agency"),
3434
url_type_agree.agreement.label("url_type")
3535
)
36-
.join(
36+
.outerjoin(
3737
record_type_agree.cte,
3838
contributions_cte.user_id == record_type_agree.user_id
3939
)
40-
.join(
40+
.outerjoin(
4141
agency_agree.cte,
4242
contributions_cte.user_id == agency_agree.user_id
4343
)
44-
.join(
44+
.outerjoin(
4545
url_type_agree.cte,
4646
contributions_cte.user_id == url_type_agree.user_id
4747
)

src/api/endpoints/review/approve/query_/core.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -66,9 +66,10 @@ async def _optionally_update_optional_metdata(self, url: URL) -> None:
6666
optional_metadata = url.optional_data_source_metadata
6767
if optional_metadata is None:
6868
url.optional_data_source_metadata = URLOptionalDataSourceMetadata(
69-
record_formats=self.approval_info.record_formats,
69+
record_formats=self.approval_info.record_formats or [],
7070
data_portal_type=self.approval_info.data_portal_type,
71-
supplying_entity=self.approval_info.supplying_entity
71+
supplying_entity=self.approval_info.supplying_entity,
72+
access_types=[]
7273
)
7374
else:
7475
update_if_not_none(

src/core/tasks/scheduled/loader.py

Lines changed: 30 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -127,8 +127,8 @@ async def load_entries(self) -> list[ScheduledTaskEntry]:
127127
enabled=self.setup_flag("REFRESH_MATERIALIZED_VIEWS_TASK_FLAG")
128128
),
129129
# Sync
130-
## Agency
131-
### Add
130+
## Adds
131+
### Agency
132132
ScheduledTaskEntry(
133133
operator=DSAppSyncAgenciesAddTaskOperator(
134134
adb_client=self.adb_client,
@@ -137,78 +137,79 @@ async def load_entries(self) -> list[ScheduledTaskEntry]:
137137
interval_minutes=IntervalEnum.HOURLY.value,
138138
enabled=self.setup_flag("DS_APP_SYNC_AGENCY_ADD_TASK_FLAG")
139139
),
140-
### Update
140+
### Meta URL
141141
ScheduledTaskEntry(
142-
operator=DSAppSyncAgenciesUpdateTaskOperator(
142+
operator=DSAppSyncMetaURLsAddTaskOperator(
143143
adb_client=self.adb_client,
144144
pdap_client=self.pdap_client
145145
),
146146
interval_minutes=IntervalEnum.HOURLY.value,
147-
enabled=self.setup_flag("DS_APP_SYNC_AGENCY_UPDATE_TASK_FLAG")
147+
enabled=self.setup_flag("DS_APP_SYNC_META_URL_ADD_TASK_FLAG")
148148
),
149-
### Delete
149+
### Data Source
150150
ScheduledTaskEntry(
151-
operator=DSAppSyncAgenciesDeleteTaskOperator(
151+
operator=DSAppSyncDataSourcesAddTaskOperator(
152152
adb_client=self.adb_client,
153153
pdap_client=self.pdap_client
154154
),
155155
interval_minutes=IntervalEnum.HOURLY.value,
156-
enabled=self.setup_flag("DS_APP_SYNC_AGENCY_DELETE_TASK_FLAG")
156+
enabled=self.setup_flag("DS_APP_SYNC_DATA_SOURCE_ADD_TASK_FLAG")
157157
),
158-
## Data Source
159-
### Add
158+
## Updates
159+
### Agency
160160
ScheduledTaskEntry(
161-
operator=DSAppSyncDataSourcesAddTaskOperator(
161+
operator=DSAppSyncAgenciesUpdateTaskOperator(
162162
adb_client=self.adb_client,
163163
pdap_client=self.pdap_client
164164
),
165165
interval_minutes=IntervalEnum.HOURLY.value,
166-
enabled=self.setup_flag("DS_APP_SYNC_DATA_SOURCE_ADD_TASK_FLAG")
166+
enabled=self.setup_flag("DS_APP_SYNC_AGENCY_UPDATE_TASK_FLAG")
167167
),
168-
### Update
168+
### Meta URL
169169
ScheduledTaskEntry(
170-
operator=DSAppSyncDataSourcesUpdateTaskOperator(
170+
operator=DSAppSyncMetaURLsUpdateTaskOperator(
171171
adb_client=self.adb_client,
172172
pdap_client=self.pdap_client
173173
),
174174
interval_minutes=IntervalEnum.HOURLY.value,
175-
enabled=self.setup_flag("DS_APP_SYNC_DATA_SOURCE_UPDATE_TASK_FLAG")
175+
enabled=self.setup_flag("DS_APP_SYNC_META_URL_UPDATE_TASK_FLAG")
176176
),
177-
### Delete
177+
### Data Source
178178
ScheduledTaskEntry(
179-
operator=DSAppSyncDataSourcesDeleteTaskOperator(
179+
operator=DSAppSyncDataSourcesUpdateTaskOperator(
180180
adb_client=self.adb_client,
181181
pdap_client=self.pdap_client
182182
),
183183
interval_minutes=IntervalEnum.HOURLY.value,
184-
enabled=self.setup_flag("DS_APP_SYNC_DATA_SOURCE_DELETE_TASK_FLAG")
184+
enabled=self.setup_flag("DS_APP_SYNC_DATA_SOURCE_UPDATE_TASK_FLAG")
185185
),
186-
## Meta URL
187-
### Add
186+
## Deletes
187+
### Data Source
188188
ScheduledTaskEntry(
189-
operator=DSAppSyncMetaURLsAddTaskOperator(
189+
operator=DSAppSyncDataSourcesDeleteTaskOperator(
190190
adb_client=self.adb_client,
191191
pdap_client=self.pdap_client
192192
),
193193
interval_minutes=IntervalEnum.HOURLY.value,
194-
enabled=self.setup_flag("DS_APP_SYNC_META_URL_ADD_TASK_FLAG")
194+
enabled=self.setup_flag("DS_APP_SYNC_DATA_SOURCE_DELETE_TASK_FLAG")
195195
),
196-
### Update
196+
### Meta URL
197197
ScheduledTaskEntry(
198-
operator=DSAppSyncMetaURLsUpdateTaskOperator(
198+
operator=DSAppSyncMetaURLsDeleteTaskOperator(
199199
adb_client=self.adb_client,
200200
pdap_client=self.pdap_client
201201
),
202202
interval_minutes=IntervalEnum.HOURLY.value,
203-
enabled=self.setup_flag("DS_APP_SYNC_META_URL_UPDATE_TASK_FLAG")
203+
enabled=self.setup_flag("DS_APP_SYNC_META_URL_DELETE_TASK_FLAG")
204204
),
205-
### Delete
205+
### Agency
206206
ScheduledTaskEntry(
207-
operator=DSAppSyncMetaURLsDeleteTaskOperator(
207+
operator=DSAppSyncAgenciesDeleteTaskOperator(
208208
adb_client=self.adb_client,
209209
pdap_client=self.pdap_client
210210
),
211211
interval_minutes=IntervalEnum.HOURLY.value,
212-
enabled=self.setup_flag("DS_APP_SYNC_META_URL_DELETE_TASK_FLAG")
213-
)
212+
enabled=self.setup_flag("DS_APP_SYNC_AGENCY_DELETE_TASK_FLAG")
213+
),
214+
214215
]

src/db/client/async_.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -371,9 +371,10 @@ async def add_miscellaneous_metadata(self, session: AsyncSession, tdos: list[URL
371371
for tdo in tdos:
372372
metadata_object = URLOptionalDataSourceMetadata(
373373
url_id=tdo.url_id,
374-
record_formats=tdo.record_formats,
374+
record_formats=tdo.record_formats or [],
375375
data_portal_type=tdo.data_portal_type,
376-
supplying_entity=tdo.supplying_entity
376+
supplying_entity=tdo.supplying_entity,
377+
access_types=[],
377378
)
378379
session.add(metadata_object)
379380

src/db/models/impl/url/optional_ds_metadata/sqlalchemy.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ class URLOptionalDataSourceMetadata(
1515
):
1616
__tablename__ = 'url_optional_data_source_metadata'
1717

18-
record_formats = Column(ARRAY(String), nullable=True)
18+
record_formats = Column(ARRAY(String), nullable=False, default=[])
1919
data_portal_type = Column(String, nullable=True)
2020
supplying_entity = Column(String, nullable=True)
2121
coverage_start = Column(Date, nullable=True)
@@ -38,7 +38,7 @@ class URLOptionalDataSourceMetadata(
3838
native_enum=True,
3939
values_callable=lambda AccessTypeEnum: [e.value for e in AccessTypeEnum]
4040
)
41-
), nullable=True)
41+
), nullable=False, default=[])
4242
data_portal_type_other = Column(String, nullable=True)
4343

4444
# Relationships

tests/automated/integration/api/test_manual_batch.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -121,7 +121,10 @@ def check_url(url: URL, url_only: bool):
121121

122122
def check_opt_metadata(metadata: URLOptionalDataSourceMetadata, no_optional: bool):
123123
assert metadata.url_id is not None
124-
other_attributes = ["record_formats", "data_portal_type", "supplying_entity"]
124+
other_attributes = [
125+
"data_portal_type",
126+
"supplying_entity"
127+
]
125128
return check_attributes(metadata, other_attributes, no_optional)
126129

127130
# Confirm 50 have nothing but URL id

tests/automated/integration/tasks/scheduled/impl/sync_to_ds/data_source/test_add.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
from src.db.client.async_ import AsyncDatabaseClient
66
from src.db.models.impl.url.data_source.sqlalchemy import DSAppLinkDataSource
77
from src.external.pdap.client import PDAPClient
8+
from src.external.pdap.enums import DataSourcesURLStatus
89
from src.external.pdap.impl.sync.data_sources._shared.content import DataSourceSyncContentModel
910
from src.external.pdap.impl.sync.data_sources.add.request import AddDataSourcesOuterRequest, AddDataSourcesInnerRequest
1011
from src.external.pdap.impl.sync.shared.models.add.response import DSAppSyncAddResponseModel, \
@@ -78,7 +79,7 @@ async def test_add(
7879
assert content.access_notes is None
7980
assert content.access_types is None
8081
assert content.data_portal_type_other is None
81-
assert content.url_status is None
82+
assert content.url_status == DataSourcesURLStatus.OK
8283

8384
assert content.agency_ids == [test_agency_id]
8485

tests/automated/integration/tasks/url/impl/test_url_miscellaneous_metadata_task.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -122,12 +122,12 @@ async def test_url_miscellaneous_metadata_task(db_data_creator: DBDataCreator):
122122
assert url.description == expected_description, f"For url.id {url.id}, expected description {expected_description}, got {url.description}"
123123

124124
expected_urls = {
125-
common_crawler_url_id: (None, None, None),
126-
auto_googler_url_id: (None, None, None),
125+
common_crawler_url_id: ([], None, None),
126+
auto_googler_url_id: ([], None, None),
127127
ckan_url_id: (["CSV", "JSON"], "Test Data Portal Type", "Test Supplying Entity"),
128-
muckrock_simple_url_id: (None, None, None),
129-
muckrock_county_url_id: (None, None, None),
130-
muckrock_all_url_id: (None, None, None),
128+
muckrock_simple_url_id: ([], None, None),
129+
muckrock_county_url_id: ([], None, None),
130+
muckrock_all_url_id: ([], None, None),
131131
}
132132

133133
metadatas: list[URLOptionalDataSourceMetadata] = await db_data_creator.adb_client.get_all(URLOptionalDataSourceMetadata)

0 commit comments

Comments
 (0)