diff --git a/ENV.md b/ENV.md index d4496dbc..a4ae17a7 100644 --- a/ENV.md +++ b/ENV.md @@ -70,6 +70,7 @@ Note that some tasks/subtasks are themselves enabled by other tasks. | `DELETE_STALE_SCREENSHOTS_TASK_FLAG` | Deletes stale screenshots for URLs already validated. | | `TASK_CLEANUP_TASK_FLAG` | Cleans up tasks that are no longer needed. | | `REFRESH_MATERIALIZED_VIEWS_TASK_FLAG` | Refreshes materialized views. | +| `UPDATE_URL_STATUS_TASK_FLAG` | Updates the status of URLs. | | `DS_APP_SYNC_AGENCY_ADD_TASK_FLAG` | Adds new agencies to the Data Sources App| | `DS_APP_SYNC_AGENCY_UPDATE_TASK_FLAG` | Updates existing agencies in the Data Sources App| | `DS_APP_SYNC_AGENCY_DELETE_TASK_FLAG` | Deletes agencies in the Data Sources App| diff --git a/alembic/versions/2025_11_18_0902-783268bd3daa_add_update_url_status_task.py b/alembic/versions/2025_11_18_0902-783268bd3daa_add_update_url_status_task.py new file mode 100644 index 00000000..986d6187 --- /dev/null +++ b/alembic/versions/2025_11_18_0902-783268bd3daa_add_update_url_status_task.py @@ -0,0 +1,34 @@ +"""Add update_url_status task + +Revision ID: 783268bd3daa +Revises: 88ac26c3b025 +Create Date: 2025-11-18 09:02:54.985705 + +""" +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa + +from src.util.alembic_helpers import add_enum_value + +# revision identifiers, used by Alembic. +revision: str = '783268bd3daa' +down_revision: Union[str, None] = '88ac26c3b025' +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + add_enum_value( + enum_name="url_status", + enum_value="broken" + ) + add_enum_value( + enum_name="task_type", + enum_value="Update URL Status" + ) + + +def downgrade() -> None: + pass diff --git a/src/api/main.py b/src/api/main.py index 8f080d25..141d4e38 100644 --- a/src/api/main.py +++ b/src/api/main.py @@ -6,7 +6,6 @@ from fastapi import FastAPI from pdap_access_manager.access_manager.async_ import AccessManagerAsync from pdap_access_manager.models.auth import AuthInfo -from sqlalchemy.ext.asyncio import create_async_engine from starlette.responses import RedirectResponse from src.api.endpoints.agencies.routes import agencies_router diff --git a/src/collectors/enums.py b/src/collectors/enums.py index f40e5f19..16711a0c 100644 --- a/src/collectors/enums.py +++ b/src/collectors/enums.py @@ -14,3 +14,4 @@ class URLStatus(Enum): OK = "ok" ERROR = "error" DUPLICATE = "duplicate" + BROKEN = "broken" diff --git a/src/core/tasks/scheduled/impl/sync_to_ds/impl/data_sources/add/queries/get.py b/src/core/tasks/scheduled/impl/sync_to_ds/impl/data_sources/add/queries/get.py index ae0a01ec..04710ba6 100644 --- a/src/core/tasks/scheduled/impl/sync_to_ds/impl/data_sources/add/queries/get.py +++ b/src/core/tasks/scheduled/impl/sync_to_ds/impl/data_sources/add/queries/get.py @@ -6,8 +6,10 @@ from src.core.tasks.scheduled.impl.sync_to_ds.constants import PER_REQUEST_ENTITY_LIMIT from src.core.tasks.scheduled.impl.sync_to_ds.impl.data_sources.add.queries.cte import \ DSAppLinkSyncDataSourceAddPrerequisitesCTEContainer +from src.core.tasks.scheduled.impl.sync_to_ds.shared.convert import convert_sm_url_status_to_ds_url_status from src.db.models.impl.link.url_agency.sqlalchemy import LinkURLAgency from src.db.models.impl.url.core.sqlalchemy import URL +from src.db.models.impl.url.internet_archives.probe.sqlalchemy import URLInternetArchivesProbeMetadata from src.db.models.impl.url.optional_ds_metadata.sqlalchemy import URLOptionalDataSourceMetadata from src.db.models.impl.url.record_type.sqlalchemy import URLRecordType from src.db.queries.base.builder import QueryBuilderBase @@ -38,6 +40,7 @@ async def run(self, session: AsyncSession) -> AddDataSourcesOuterRequest: # Required URL.full_url, URL.name, + URL.status, URLRecordType.record_type, agency_id_cte.c.agency_ids, # Optional @@ -56,6 +59,7 @@ async def run(self, session: AsyncSession) -> AddDataSourcesOuterRequest: URLOptionalDataSourceMetadata.scraper_url, URLOptionalDataSourceMetadata.access_notes, URLOptionalDataSourceMetadata.access_types, + URLInternetArchivesProbeMetadata.archive_url, ) .select_from( cte.cte @@ -68,6 +72,10 @@ async def run(self, session: AsyncSession) -> AddDataSourcesOuterRequest: URLOptionalDataSourceMetadata, URL.id == URLOptionalDataSourceMetadata.url_id, ) + .outerjoin( + URLInternetArchivesProbeMetadata, + URL.id == URLInternetArchivesProbeMetadata.url_id, + ) .join( URLRecordType, URLRecordType.url_id == URL.id, @@ -110,7 +118,10 @@ async def run(self, session: AsyncSession) -> AddDataSourcesOuterRequest: scraper_url=mapping[URLOptionalDataSourceMetadata.scraper_url], access_notes=mapping[URLOptionalDataSourceMetadata.access_notes], access_types=mapping[URLOptionalDataSourceMetadata.access_types] or [], - url_status=DataSourcesURLStatus.OK + url_status=convert_sm_url_status_to_ds_url_status( + sm_url_status=mapping[URL.status], + ), + internet_archives_url=mapping[URLInternetArchivesProbeMetadata.archive_url] or None, ) ) ) diff --git a/src/core/tasks/scheduled/impl/sync_to_ds/impl/data_sources/update/queries/get.py b/src/core/tasks/scheduled/impl/sync_to_ds/impl/data_sources/update/queries/get.py index b6b94779..a710b6f7 100644 --- a/src/core/tasks/scheduled/impl/sync_to_ds/impl/data_sources/update/queries/get.py +++ b/src/core/tasks/scheduled/impl/sync_to_ds/impl/data_sources/update/queries/get.py @@ -6,8 +6,10 @@ from src.core.tasks.scheduled.impl.sync_to_ds.constants import PER_REQUEST_ENTITY_LIMIT from src.core.tasks.scheduled.impl.sync_to_ds.impl.data_sources.update.queries.cte import \ DSAppLinkSyncDataSourceUpdatePrerequisitesCTEContainer +from src.core.tasks.scheduled.impl.sync_to_ds.shared.convert import convert_sm_url_status_to_ds_url_status from src.db.models.impl.link.url_agency.sqlalchemy import LinkURLAgency from src.db.models.impl.url.core.sqlalchemy import URL +from src.db.models.impl.url.internet_archives.probe.sqlalchemy import URLInternetArchivesProbeMetadata from src.db.models.impl.url.optional_ds_metadata.sqlalchemy import URLOptionalDataSourceMetadata from src.db.models.impl.url.record_type.sqlalchemy import URLRecordType from src.db.queries.base.builder import QueryBuilderBase @@ -39,6 +41,7 @@ async def run(self, session: AsyncSession) -> UpdateDataSourcesOuterRequest: # Required URL.full_url, URL.name, + URL.status, URLRecordType.record_type, agency_id_cte.c.agency_ids, # Optional @@ -57,7 +60,8 @@ async def run(self, session: AsyncSession) -> UpdateDataSourcesOuterRequest: URLOptionalDataSourceMetadata.scraper_url, URLOptionalDataSourceMetadata.access_notes, URLOptionalDataSourceMetadata.access_types, - URLOptionalDataSourceMetadata.data_portal_type_other + URLOptionalDataSourceMetadata.data_portal_type_other, + URLInternetArchivesProbeMetadata.archive_url, ) .select_from( cte.cte @@ -70,6 +74,10 @@ async def run(self, session: AsyncSession) -> UpdateDataSourcesOuterRequest: URLOptionalDataSourceMetadata, URL.id == URLOptionalDataSourceMetadata.url_id, ) + .outerjoin( + URLInternetArchivesProbeMetadata, + URL.id == URLInternetArchivesProbeMetadata.url_id, + ) .join( URLRecordType, URLRecordType.url_id == URL.id, @@ -113,7 +121,10 @@ async def run(self, session: AsyncSession) -> UpdateDataSourcesOuterRequest: access_notes=mapping[URLOptionalDataSourceMetadata.access_notes], access_types=mapping[URLOptionalDataSourceMetadata.access_types] or [], data_portal_type_other=mapping[URLOptionalDataSourceMetadata.data_portal_type_other], - url_status=DataSourcesURLStatus.OK + url_status=convert_sm_url_status_to_ds_url_status( + sm_url_status=mapping[URL.status], + ), + internet_archives_url=mapping[URLInternetArchivesProbeMetadata.archive_url] or None, ) ) ) diff --git a/src/core/tasks/scheduled/impl/sync_to_ds/impl/meta_urls/add/queries/get.py b/src/core/tasks/scheduled/impl/sync_to_ds/impl/meta_urls/add/queries/get.py index da695cf0..5a784295 100644 --- a/src/core/tasks/scheduled/impl/sync_to_ds/impl/meta_urls/add/queries/get.py +++ b/src/core/tasks/scheduled/impl/sync_to_ds/impl/meta_urls/add/queries/get.py @@ -6,8 +6,10 @@ from src.core.tasks.scheduled.impl.sync_to_ds.constants import PER_REQUEST_ENTITY_LIMIT from src.core.tasks.scheduled.impl.sync_to_ds.impl.meta_urls.add.queries.cte import \ DSAppLinkSyncMetaURLAddPrerequisitesCTEContainer +from src.core.tasks.scheduled.impl.sync_to_ds.shared.convert import convert_sm_url_status_to_ds_url_status from src.db.models.impl.link.url_agency.sqlalchemy import LinkURLAgency from src.db.models.impl.url.core.sqlalchemy import URL +from src.db.models.impl.url.internet_archives.probe.sqlalchemy import URLInternetArchivesProbeMetadata from src.db.queries.base.builder import QueryBuilderBase from src.external.pdap.impl.sync.meta_urls._shared.content import MetaURLSyncContentModel from src.external.pdap.impl.sync.meta_urls.add.request import AddMetaURLsOuterRequest, AddMetaURLsInnerRequest @@ -21,7 +23,8 @@ async def run(self, session: AsyncSession) -> AddMetaURLsOuterRequest: agency_id_cte = ( select( LinkURLAgency.url_id, - func.array_agg(LinkURLAgency.agency_id).label("agency_ids") + func.array_agg(LinkURLAgency.agency_id).label("agency_ids"), + ) .group_by( LinkURLAgency.url_id @@ -33,6 +36,8 @@ async def run(self, session: AsyncSession) -> AddMetaURLsOuterRequest: select( cte.url_id, URL.full_url, + URL.status, + URLInternetArchivesProbeMetadata.archive_url, agency_id_cte.c.agency_ids ) .select_from( @@ -42,6 +47,10 @@ async def run(self, session: AsyncSession) -> AddMetaURLsOuterRequest: URL, URL.id == cte.url_id, ) + .outerjoin( + URLInternetArchivesProbeMetadata, + URL.id == URLInternetArchivesProbeMetadata.url_id, + ) .join( agency_id_cte, cte.url_id == agency_id_cte.c.url_id @@ -61,7 +70,11 @@ async def run(self, session: AsyncSession) -> AddMetaURLsOuterRequest: request_id=mapping[cte.url_id], content=MetaURLSyncContentModel( url=mapping["full_url"], - agency_ids=mapping["agency_ids"] + agency_ids=mapping["agency_ids"], + internet_archives_url=mapping[URLInternetArchivesProbeMetadata.archive_url] or None, + url_status=convert_sm_url_status_to_ds_url_status( + sm_url_status=mapping[URL.status], + ), ) ) ) diff --git a/src/core/tasks/scheduled/impl/sync_to_ds/impl/meta_urls/update/queries/get.py b/src/core/tasks/scheduled/impl/sync_to_ds/impl/meta_urls/update/queries/get.py index 5dfb81bd..8cdb8ed6 100644 --- a/src/core/tasks/scheduled/impl/sync_to_ds/impl/meta_urls/update/queries/get.py +++ b/src/core/tasks/scheduled/impl/sync_to_ds/impl/meta_urls/update/queries/get.py @@ -6,8 +6,10 @@ from src.core.tasks.scheduled.impl.sync_to_ds.constants import PER_REQUEST_ENTITY_LIMIT from src.core.tasks.scheduled.impl.sync_to_ds.impl.meta_urls.update.queries.cte import \ DSAppLinkSyncMetaURLUpdatePrerequisitesCTEContainer +from src.core.tasks.scheduled.impl.sync_to_ds.shared.convert import convert_sm_url_status_to_ds_url_status from src.db.models.impl.link.url_agency.sqlalchemy import LinkURLAgency from src.db.models.impl.url.core.sqlalchemy import URL +from src.db.models.impl.url.internet_archives.probe.sqlalchemy import URLInternetArchivesProbeMetadata from src.db.queries.base.builder import QueryBuilderBase from src.external.pdap.impl.sync.meta_urls._shared.content import MetaURLSyncContentModel from src.external.pdap.impl.sync.meta_urls.update.request import UpdateMetaURLsOuterRequest, UpdateMetaURLsInnerRequest @@ -33,7 +35,9 @@ async def run(self, session: AsyncSession) -> UpdateMetaURLsOuterRequest: select( cte.ds_meta_url_id, URL.full_url, - agency_id_cte.c.agency_ids + URL.status, + agency_id_cte.c.agency_ids, + URLInternetArchivesProbeMetadata.archive_url, ) .select_from( cte.cte @@ -42,6 +46,10 @@ async def run(self, session: AsyncSession) -> UpdateMetaURLsOuterRequest: URL, URL.id == cte.url_id, ) + .outerjoin( + URLInternetArchivesProbeMetadata, + URL.id == URLInternetArchivesProbeMetadata.url_id, + ) .outerjoin( agency_id_cte, cte.url_id == agency_id_cte.c.url_id @@ -61,7 +69,11 @@ async def run(self, session: AsyncSession) -> UpdateMetaURLsOuterRequest: app_id=mapping[cte.ds_meta_url_id], content=MetaURLSyncContentModel( url=mapping['full_url'], - agency_ids=mapping["agency_ids"] or [] + agency_ids=mapping["agency_ids"] or [], + internet_archives_url=mapping[URLInternetArchivesProbeMetadata.archive_url] or None, + url_status=convert_sm_url_status_to_ds_url_status( + sm_url_status=mapping[URL.status], + ), ) ) ) diff --git a/src/core/tasks/scheduled/impl/sync_to_ds/shared/__init__.py b/src/core/tasks/scheduled/impl/sync_to_ds/shared/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/core/tasks/scheduled/impl/sync_to_ds/shared/convert.py b/src/core/tasks/scheduled/impl/sync_to_ds/shared/convert.py new file mode 100644 index 00000000..3f586b20 --- /dev/null +++ b/src/core/tasks/scheduled/impl/sync_to_ds/shared/convert.py @@ -0,0 +1,14 @@ +from src.collectors.enums import URLStatus +from src.external.pdap.enums import DataSourcesURLStatus + + +def convert_sm_url_status_to_ds_url_status( + sm_url_status: URLStatus +) -> DataSourcesURLStatus: + match sm_url_status: + case URLStatus.OK: + return DataSourcesURLStatus.OK + case URLStatus.BROKEN: + return DataSourcesURLStatus.BROKEN + case _: + raise ValueError(f"URL status has no corresponding DS Status: {sm_url_status}") \ No newline at end of file diff --git a/src/core/tasks/scheduled/impl/update_url_status/__init__.py b/src/core/tasks/scheduled/impl/update_url_status/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/core/tasks/scheduled/impl/update_url_status/operator.py b/src/core/tasks/scheduled/impl/update_url_status/operator.py new file mode 100644 index 00000000..82285996 --- /dev/null +++ b/src/core/tasks/scheduled/impl/update_url_status/operator.py @@ -0,0 +1,15 @@ +from src.core.tasks.scheduled.impl.update_url_status.query import UpdateURLStatusQueryBuilder +from src.core.tasks.scheduled.templates.operator import ScheduledTaskOperatorBase +from src.db.enums import TaskType + + +class UpdateURLStatusOperator(ScheduledTaskOperatorBase): + + @property + def task_type(self) -> TaskType: + return TaskType.UPDATE_URL_STATUS + + async def inner_task_logic(self) -> None: + await self.adb_client.run_query_builder( + UpdateURLStatusQueryBuilder() + ) \ No newline at end of file diff --git a/src/core/tasks/scheduled/impl/update_url_status/query.py b/src/core/tasks/scheduled/impl/update_url_status/query.py new file mode 100644 index 00000000..963405b6 --- /dev/null +++ b/src/core/tasks/scheduled/impl/update_url_status/query.py @@ -0,0 +1,49 @@ +from sqlalchemy import update, exists, select +from sqlalchemy.ext.asyncio import AsyncSession + +from src.collectors.enums import URLStatus +from src.db.models.impl.url.core.sqlalchemy import URL +from src.db.models.impl.url.web_metadata.sqlalchemy import URLWebMetadata +from src.db.queries.base.builder import QueryBuilderBase + + +class UpdateURLStatusQueryBuilder(QueryBuilderBase): + + async def run(self, session: AsyncSession) -> None: + + # Update broken URLs to nonbroken if their status is not 404 + query_broken_to_ok = ( + update(URL) + .values( + status=URLStatus.OK + ) + .where( + exists( + select(1).where( + URLWebMetadata.url_id == URL.id, # <-- correlate + URLWebMetadata.status_code != 404, + URL.status == URLStatus.BROKEN + ) + ) + ) + ) + + # Update ok URLs to broken if their status is 404 + query_ok_to_broken = ( + update(URL) + .values( + status=URLStatus.BROKEN + ) + .where( + exists( + select(1).where( + URLWebMetadata.url_id == URL.id, # <-- correlate + URLWebMetadata.status_code == 404, + URL.status == URLStatus.OK + ) + ) + ) + ) + + await session.execute(query_broken_to_ok) + await session.execute(query_ok_to_broken) \ No newline at end of file diff --git a/src/core/tasks/scheduled/loader.py b/src/core/tasks/scheduled/loader.py index 3ea4fc94..394a60ce 100644 --- a/src/core/tasks/scheduled/loader.py +++ b/src/core/tasks/scheduled/loader.py @@ -23,6 +23,7 @@ from src.core.tasks.scheduled.impl.sync_to_ds.impl.meta_urls.delete.core import DSAppSyncMetaURLsDeleteTaskOperator from src.core.tasks.scheduled.impl.sync_to_ds.impl.meta_urls.update.core import DSAppSyncMetaURLsUpdateTaskOperator from src.core.tasks.scheduled.impl.task_cleanup.operator import TaskCleanupOperator +from src.core.tasks.scheduled.impl.update_url_status.operator import UpdateURLStatusOperator from src.core.tasks.scheduled.models.entry import ScheduledTaskEntry from src.db.client.async_ import AsyncDatabaseClient from src.external.huggingface.hub.client import HuggingFaceHubClient @@ -211,5 +212,13 @@ async def load_entries(self) -> list[ScheduledTaskEntry]: interval_minutes=IntervalEnum.HOURLY.value, enabled=self.setup_flag("DS_APP_SYNC_AGENCY_DELETE_TASK_FLAG") ), + ### URL + ScheduledTaskEntry( + operator=UpdateURLStatusOperator( + adb_client=self.adb_client + ), + interval_minutes=IntervalEnum.HOURLY.value, + enabled=self.setup_flag("UPDATE_URL_STATUS_TASK_FLAG") + ), ] diff --git a/src/db/enums.py b/src/db/enums.py index 053fdace..034ec0b8 100644 --- a/src/db/enums.py +++ b/src/db/enums.py @@ -62,7 +62,9 @@ class TaskType(PyEnum): RUN_URL_TASKS = "Run URL Task Cycles" TASK_CLEANUP = "Task Cleanup" REFRESH_MATERIALIZED_VIEWS = "Refresh Materialized Views" + UPDATE_URL_STATUS = "Update URL Status" + # Sync Tasks SYNC_AGENCIES_ADD = "Sync Agencies Add" SYNC_AGENCIES_UPDATE = "Sync Agencies Update" SYNC_AGENCIES_DELETE = "Sync Agencies Delete" diff --git a/src/external/pdap/impl/sync/data_sources/_shared/content.py b/src/external/pdap/impl/sync/data_sources/_shared/content.py index 914b6d1e..59d0bcc6 100644 --- a/src/external/pdap/impl/sync/data_sources/_shared/content.py +++ b/src/external/pdap/impl/sync/data_sources/_shared/content.py @@ -37,6 +37,7 @@ class DataSourceSyncContentModel(BaseModel): access_notes: str | None = None access_types: list[AccessTypeEnum] = [] data_portal_type_other: str | None = None - url_status: DataSourcesURLStatus + url_status: DataSourcesURLStatus = DataSourcesURLStatus.OK + internet_archives_url: str | None = None agency_ids: list[int] = [] diff --git a/src/external/pdap/impl/sync/meta_urls/_shared/content.py b/src/external/pdap/impl/sync/meta_urls/_shared/content.py index 9d81b3d7..5db804cd 100644 --- a/src/external/pdap/impl/sync/meta_urls/_shared/content.py +++ b/src/external/pdap/impl/sync/meta_urls/_shared/content.py @@ -1,6 +1,10 @@ from pydantic import BaseModel +from src.external.pdap.enums import DataSourcesURLStatus + class MetaURLSyncContentModel(BaseModel): url: str + url_status: DataSourcesURLStatus = DataSourcesURLStatus.OK + internet_archives_url: str | None = None agency_ids: list[int] = [] diff --git a/tests/automated/integration/tasks/scheduled/impl/update_url_status/__init__.py b/tests/automated/integration/tasks/scheduled/impl/update_url_status/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/automated/integration/tasks/scheduled/impl/update_url_status/test_core.py b/tests/automated/integration/tasks/scheduled/impl/update_url_status/test_core.py new file mode 100644 index 00000000..6b06fe31 --- /dev/null +++ b/tests/automated/integration/tasks/scheduled/impl/update_url_status/test_core.py @@ -0,0 +1,77 @@ +import pytest +from sqlalchemy import update + +from src.collectors.enums import URLStatus +from src.core.tasks.scheduled.impl.update_url_status.operator import UpdateURLStatusOperator +from src.db.client.async_ import AsyncDatabaseClient +from src.db.models.impl.url.core.sqlalchemy import URL +from src.db.models.impl.url.web_metadata.sqlalchemy import URLWebMetadata +from tests.helpers.data_creator.core import DBDataCreator + + +@pytest.mark.asyncio +async def test_update_url_status_task( + test_url_data_source_id: int, + test_url_meta_url_id: int, + adb_client_test: AsyncDatabaseClient, + db_data_creator: DBDataCreator +): + + # Create Operator + operator = UpdateURLStatusOperator( + adb_client=adb_client_test, + ) + + # Add web metadata to URLs + ## Data Source URL: Add 404 + await db_data_creator.create_web_metadata( + url_ids=[test_url_data_source_id], + status_code=404 + ) + + ## Meta URL: Add 200 + await db_data_creator.create_web_metadata( + url_ids=[test_url_meta_url_id], + status_code=200 + ) + + # Run Task + await operator.run_task() + + # Check URLs + urls: list[URL] = await adb_client_test.get_all(URL) + id_status_set_tuple: set[tuple[int, URLStatus]] = { + (url.id, url.status) + for url in urls + } + ## Data Source URL: Status should now be broken + ## Meta URL: Status should be unchanged + assert id_status_set_tuple == { + (test_url_data_source_id, URLStatus.BROKEN), + (test_url_meta_url_id, URLStatus.OK) + } + + # Update Web Metadata for Data Source URL to be 404 + statement = update(URLWebMetadata).where( + URLWebMetadata.url_id == test_url_data_source_id, + ).values( + status_code=200 + ) + await adb_client_test.execute(statement) + + # Run Task + await operator.run_task() + + # Check URLs + urls: list[URL] = await adb_client_test.get_all(URL) + id_status_set_tuple: set[tuple[int, URLStatus]] = { + (url.id, url.status) + for url in urls + } + ## Data Source URL: Status should now be ok + ## Meta URL: Status should be unchanged + assert id_status_set_tuple == { + (test_url_data_source_id, URLStatus.OK), + (test_url_meta_url_id, URLStatus.OK) + } + diff --git a/tests/automated/integration/tasks/scheduled/loader/test_happy_path.py b/tests/automated/integration/tasks/scheduled/loader/test_happy_path.py index 63c64264..ae41bc30 100644 --- a/tests/automated/integration/tasks/scheduled/loader/test_happy_path.py +++ b/tests/automated/integration/tasks/scheduled/loader/test_happy_path.py @@ -2,7 +2,7 @@ from src.core.tasks.scheduled.loader import ScheduledTaskOperatorLoader -NUMBER_OF_ENTRIES = 19 +NUMBER_OF_ENTRIES = 20 @pytest.mark.asyncio async def test_happy_path(