Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
"""Update record_formats and access_types to be not null

Revision ID: de0305465e2c
Revises: a57c3b5b6e93
Create Date: 2025-11-15 14:41:45.619148

"""
from typing import Sequence, Union

from alembic import op
import sqlalchemy as sa

Check warning on line 11 in alembic/versions/2025_11_15_1441-de0305465e2c_update_record_formats_and_access_types_.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] alembic/versions/2025_11_15_1441-de0305465e2c_update_record_formats_and_access_types_.py#L11 <401>

'sqlalchemy as sa' imported but unused
Raw output
./alembic/versions/2025_11_15_1441-de0305465e2c_update_record_formats_and_access_types_.py:11:1: F401 'sqlalchemy as sa' imported but unused


# revision identifiers, used by Alembic.
revision: str = 'de0305465e2c'
down_revision: Union[str, None] = 'a57c3b5b6e93'
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None


TABLE_NAME = "url_optional_data_source_metadata"


def upgrade() -> None:

Check warning on line 24 in alembic/versions/2025_11_15_1441-de0305465e2c_update_record_formats_and_access_types_.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] alembic/versions/2025_11_15_1441-de0305465e2c_update_record_formats_and_access_types_.py#L24 <103>

Missing docstring in public function
Raw output
./alembic/versions/2025_11_15_1441-de0305465e2c_update_record_formats_and_access_types_.py:24:1: D103 Missing docstring in public function
_update_record_formats()
_update_access_types()
_alter_record_formats_column()
_alter_access_types_column()

def _alter_record_formats_column():
op.alter_column(
table_name=TABLE_NAME,
column_name="record_formats",
nullable=False,
server_default='{}'
)


def _alter_access_types_column():
op.alter_column(
table_name=TABLE_NAME,
column_name="access_types",
nullable=False,
server_default='{}'
)



def _update_access_types():

Check failure on line 49 in alembic/versions/2025_11_15_1441-de0305465e2c_update_record_formats_and_access_types_.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] alembic/versions/2025_11_15_1441-de0305465e2c_update_record_formats_and_access_types_.py#L49 <303>

too many blank lines (3)
Raw output
./alembic/versions/2025_11_15_1441-de0305465e2c_update_record_formats_and_access_types_.py:49:1: E303 too many blank lines (3)
op.execute("""
UPDATE url_optional_data_source_metadata
SET access_types = '{}'
WHERE access_types is null

""")


def _update_record_formats():
op.execute("""
UPDATE url_optional_data_source_metadata
SET record_formats = '{}'
WHERE record_formats is null
""")


def downgrade() -> None:

Check warning on line 66 in alembic/versions/2025_11_15_1441-de0305465e2c_update_record_formats_and_access_types_.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] alembic/versions/2025_11_15_1441-de0305465e2c_update_record_formats_and_access_types_.py#L66 <103>

Missing docstring in public function
Raw output
./alembic/versions/2025_11_15_1441-de0305465e2c_update_record_formats_and_access_types_.py:66:1: D103 Missing docstring in public function
pass
3 changes: 2 additions & 1 deletion src/api/endpoints/collector/manual/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,9 +84,10 @@ async def run(self, session: AsyncSession) -> ManualBatchResponseDTO:

optional_metadata = URLOptionalDataSourceMetadata(
url_id=url.id,
record_formats=entry.record_formats,
record_formats=entry.record_formats or [],
data_portal_type=entry.data_portal_type,
supplying_entity=entry.supplying_entity,
access_types=[]
)
session.add(optional_metadata)
url_ids.append(url.id)
Expand Down
6 changes: 3 additions & 3 deletions src/api/endpoints/contributions/user/queries/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,15 +33,15 @@ async def run(self, session: AsyncSession) -> ContributionsUserResponse:
agency_agree.agreement.label("agency"),
url_type_agree.agreement.label("url_type")
)
.join(
.outerjoin(
record_type_agree.cte,
contributions_cte.user_id == record_type_agree.user_id
)
.join(
.outerjoin(
agency_agree.cte,
contributions_cte.user_id == agency_agree.user_id
)
.join(
.outerjoin(
url_type_agree.cte,
contributions_cte.user_id == url_type_agree.user_id
)
Expand Down
5 changes: 3 additions & 2 deletions src/api/endpoints/review/approve/query_/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,9 +66,10 @@ async def _optionally_update_optional_metdata(self, url: URL) -> None:
optional_metadata = url.optional_data_source_metadata
if optional_metadata is None:
url.optional_data_source_metadata = URLOptionalDataSourceMetadata(
record_formats=self.approval_info.record_formats,
record_formats=self.approval_info.record_formats or [],
data_portal_type=self.approval_info.data_portal_type,
supplying_entity=self.approval_info.supplying_entity
supplying_entity=self.approval_info.supplying_entity,
access_types=[]
)
else:
update_if_not_none(
Expand Down
59 changes: 30 additions & 29 deletions src/core/tasks/scheduled/loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,8 +127,8 @@
enabled=self.setup_flag("REFRESH_MATERIALIZED_VIEWS_TASK_FLAG")
),
# Sync
## Agency
### Add
## Adds

Check failure on line 130 in src/core/tasks/scheduled/loader.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/core/tasks/scheduled/loader.py#L130 <266>

too many leading '#' for block comment
Raw output
./src/core/tasks/scheduled/loader.py:130:13: E266 too many leading '#' for block comment
### Agency

Check failure on line 131 in src/core/tasks/scheduled/loader.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/core/tasks/scheduled/loader.py#L131 <266>

too many leading '#' for block comment
Raw output
./src/core/tasks/scheduled/loader.py:131:13: E266 too many leading '#' for block comment
ScheduledTaskEntry(
operator=DSAppSyncAgenciesAddTaskOperator(
adb_client=self.adb_client,
Expand All @@ -137,78 +137,79 @@
interval_minutes=IntervalEnum.HOURLY.value,
enabled=self.setup_flag("DS_APP_SYNC_AGENCY_ADD_TASK_FLAG")
),
### Update
### Meta URL

Check failure on line 140 in src/core/tasks/scheduled/loader.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/core/tasks/scheduled/loader.py#L140 <266>

too many leading '#' for block comment
Raw output
./src/core/tasks/scheduled/loader.py:140:13: E266 too many leading '#' for block comment
ScheduledTaskEntry(
operator=DSAppSyncAgenciesUpdateTaskOperator(
operator=DSAppSyncMetaURLsAddTaskOperator(
adb_client=self.adb_client,
pdap_client=self.pdap_client
),
interval_minutes=IntervalEnum.HOURLY.value,
enabled=self.setup_flag("DS_APP_SYNC_AGENCY_UPDATE_TASK_FLAG")
enabled=self.setup_flag("DS_APP_SYNC_META_URL_ADD_TASK_FLAG")
),
### Delete
### Data Source

Check failure on line 149 in src/core/tasks/scheduled/loader.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/core/tasks/scheduled/loader.py#L149 <266>

too many leading '#' for block comment
Raw output
./src/core/tasks/scheduled/loader.py:149:13: E266 too many leading '#' for block comment
ScheduledTaskEntry(
operator=DSAppSyncAgenciesDeleteTaskOperator(
operator=DSAppSyncDataSourcesAddTaskOperator(
adb_client=self.adb_client,
pdap_client=self.pdap_client
),
interval_minutes=IntervalEnum.HOURLY.value,
enabled=self.setup_flag("DS_APP_SYNC_AGENCY_DELETE_TASK_FLAG")
enabled=self.setup_flag("DS_APP_SYNC_DATA_SOURCE_ADD_TASK_FLAG")
),
## Data Source
### Add
## Updates

Check failure on line 158 in src/core/tasks/scheduled/loader.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/core/tasks/scheduled/loader.py#L158 <266>

too many leading '#' for block comment
Raw output
./src/core/tasks/scheduled/loader.py:158:13: E266 too many leading '#' for block comment
### Agency

Check failure on line 159 in src/core/tasks/scheduled/loader.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/core/tasks/scheduled/loader.py#L159 <266>

too many leading '#' for block comment
Raw output
./src/core/tasks/scheduled/loader.py:159:13: E266 too many leading '#' for block comment
ScheduledTaskEntry(
operator=DSAppSyncDataSourcesAddTaskOperator(
operator=DSAppSyncAgenciesUpdateTaskOperator(
adb_client=self.adb_client,
pdap_client=self.pdap_client
),
interval_minutes=IntervalEnum.HOURLY.value,
enabled=self.setup_flag("DS_APP_SYNC_DATA_SOURCE_ADD_TASK_FLAG")
enabled=self.setup_flag("DS_APP_SYNC_AGENCY_UPDATE_TASK_FLAG")
),
### Update
### Meta URL

Check failure on line 168 in src/core/tasks/scheduled/loader.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/core/tasks/scheduled/loader.py#L168 <266>

too many leading '#' for block comment
Raw output
./src/core/tasks/scheduled/loader.py:168:13: E266 too many leading '#' for block comment
ScheduledTaskEntry(
operator=DSAppSyncDataSourcesUpdateTaskOperator(
operator=DSAppSyncMetaURLsUpdateTaskOperator(
adb_client=self.adb_client,
pdap_client=self.pdap_client
),
interval_minutes=IntervalEnum.HOURLY.value,
enabled=self.setup_flag("DS_APP_SYNC_DATA_SOURCE_UPDATE_TASK_FLAG")
enabled=self.setup_flag("DS_APP_SYNC_META_URL_UPDATE_TASK_FLAG")
),
### Delete
### Data Source

Check failure on line 177 in src/core/tasks/scheduled/loader.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/core/tasks/scheduled/loader.py#L177 <266>

too many leading '#' for block comment
Raw output
./src/core/tasks/scheduled/loader.py:177:13: E266 too many leading '#' for block comment
ScheduledTaskEntry(
operator=DSAppSyncDataSourcesDeleteTaskOperator(
operator=DSAppSyncDataSourcesUpdateTaskOperator(
adb_client=self.adb_client,
pdap_client=self.pdap_client
),
interval_minutes=IntervalEnum.HOURLY.value,
enabled=self.setup_flag("DS_APP_SYNC_DATA_SOURCE_DELETE_TASK_FLAG")
enabled=self.setup_flag("DS_APP_SYNC_DATA_SOURCE_UPDATE_TASK_FLAG")
),
## Meta URL
### Add
## Deletes

Check failure on line 186 in src/core/tasks/scheduled/loader.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/core/tasks/scheduled/loader.py#L186 <266>

too many leading '#' for block comment
Raw output
./src/core/tasks/scheduled/loader.py:186:13: E266 too many leading '#' for block comment
### Data Source

Check failure on line 187 in src/core/tasks/scheduled/loader.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/core/tasks/scheduled/loader.py#L187 <266>

too many leading '#' for block comment
Raw output
./src/core/tasks/scheduled/loader.py:187:13: E266 too many leading '#' for block comment
ScheduledTaskEntry(
operator=DSAppSyncMetaURLsAddTaskOperator(
operator=DSAppSyncDataSourcesDeleteTaskOperator(
adb_client=self.adb_client,
pdap_client=self.pdap_client
),
interval_minutes=IntervalEnum.HOURLY.value,
enabled=self.setup_flag("DS_APP_SYNC_META_URL_ADD_TASK_FLAG")
enabled=self.setup_flag("DS_APP_SYNC_DATA_SOURCE_DELETE_TASK_FLAG")
),
### Update
### Meta URL

Check failure on line 196 in src/core/tasks/scheduled/loader.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/core/tasks/scheduled/loader.py#L196 <266>

too many leading '#' for block comment
Raw output
./src/core/tasks/scheduled/loader.py:196:13: E266 too many leading '#' for block comment
ScheduledTaskEntry(
operator=DSAppSyncMetaURLsUpdateTaskOperator(
operator=DSAppSyncMetaURLsDeleteTaskOperator(
adb_client=self.adb_client,
pdap_client=self.pdap_client
),
interval_minutes=IntervalEnum.HOURLY.value,
enabled=self.setup_flag("DS_APP_SYNC_META_URL_UPDATE_TASK_FLAG")
enabled=self.setup_flag("DS_APP_SYNC_META_URL_DELETE_TASK_FLAG")
),
### Delete
### Agency

Check failure on line 205 in src/core/tasks/scheduled/loader.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/core/tasks/scheduled/loader.py#L205 <266>

too many leading '#' for block comment
Raw output
./src/core/tasks/scheduled/loader.py:205:13: E266 too many leading '#' for block comment
ScheduledTaskEntry(
operator=DSAppSyncMetaURLsDeleteTaskOperator(
operator=DSAppSyncAgenciesDeleteTaskOperator(
adb_client=self.adb_client,
pdap_client=self.pdap_client
),
interval_minutes=IntervalEnum.HOURLY.value,
enabled=self.setup_flag("DS_APP_SYNC_META_URL_DELETE_TASK_FLAG")
)
enabled=self.setup_flag("DS_APP_SYNC_AGENCY_DELETE_TASK_FLAG")
),

]
5 changes: 3 additions & 2 deletions src/db/client/async_.py
Original file line number Diff line number Diff line change
Expand Up @@ -371,9 +371,10 @@ async def add_miscellaneous_metadata(self, session: AsyncSession, tdos: list[URL
for tdo in tdos:
metadata_object = URLOptionalDataSourceMetadata(
url_id=tdo.url_id,
record_formats=tdo.record_formats,
record_formats=tdo.record_formats or [],
data_portal_type=tdo.data_portal_type,
supplying_entity=tdo.supplying_entity
supplying_entity=tdo.supplying_entity,
access_types=[],
)
session.add(metadata_object)

Expand Down
4 changes: 2 additions & 2 deletions src/db/models/impl/url/optional_ds_metadata/sqlalchemy.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ class URLOptionalDataSourceMetadata(
):
__tablename__ = 'url_optional_data_source_metadata'

record_formats = Column(ARRAY(String), nullable=True)
record_formats = Column(ARRAY(String), nullable=False, default=[])
data_portal_type = Column(String, nullable=True)
supplying_entity = Column(String, nullable=True)
coverage_start = Column(Date, nullable=True)
Expand All @@ -38,7 +38,7 @@ class URLOptionalDataSourceMetadata(
native_enum=True,
values_callable=lambda AccessTypeEnum: [e.value for e in AccessTypeEnum]
)
), nullable=True)
), nullable=False, default=[])
data_portal_type_other = Column(String, nullable=True)

# Relationships
Expand Down
5 changes: 4 additions & 1 deletion tests/automated/integration/api/test_manual_batch.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,10 @@ def check_url(url: URL, url_only: bool):

def check_opt_metadata(metadata: URLOptionalDataSourceMetadata, no_optional: bool):
assert metadata.url_id is not None
other_attributes = ["record_formats", "data_portal_type", "supplying_entity"]
other_attributes = [
"data_portal_type",
"supplying_entity"
]
return check_attributes(metadata, other_attributes, no_optional)

# Confirm 50 have nothing but URL id
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from src.db.client.async_ import AsyncDatabaseClient
from src.db.models.impl.url.data_source.sqlalchemy import DSAppLinkDataSource
from src.external.pdap.client import PDAPClient
from src.external.pdap.enums import DataSourcesURLStatus
from src.external.pdap.impl.sync.data_sources._shared.content import DataSourceSyncContentModel
from src.external.pdap.impl.sync.data_sources.add.request import AddDataSourcesOuterRequest, AddDataSourcesInnerRequest
from src.external.pdap.impl.sync.shared.models.add.response import DSAppSyncAddResponseModel, \
Expand Down Expand Up @@ -78,7 +79,7 @@ async def test_add(
assert content.access_notes is None
assert content.access_types is None
assert content.data_portal_type_other is None
assert content.url_status is None
assert content.url_status == DataSourcesURLStatus.OK

assert content.agency_ids == [test_agency_id]

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -122,12 +122,12 @@ async def test_url_miscellaneous_metadata_task(db_data_creator: DBDataCreator):
assert url.description == expected_description, f"For url.id {url.id}, expected description {expected_description}, got {url.description}"

expected_urls = {
common_crawler_url_id: (None, None, None),
auto_googler_url_id: (None, None, None),
common_crawler_url_id: ([], None, None),
auto_googler_url_id: ([], None, None),
ckan_url_id: (["CSV", "JSON"], "Test Data Portal Type", "Test Supplying Entity"),
muckrock_simple_url_id: (None, None, None),
muckrock_county_url_id: (None, None, None),
muckrock_all_url_id: (None, None, None),
muckrock_simple_url_id: ([], None, None),
muckrock_county_url_id: ([], None, None),
muckrock_all_url_id: ([], None, None),
}

metadatas: list[URLOptionalDataSourceMetadata] = await db_data_creator.adb_client.get_all(URLOptionalDataSourceMetadata)
Expand Down