Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions ENV.md
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ Note that some tasks/subtasks are themselves enabled by other tasks.
| `DELETE_STALE_SCREENSHOTS_TASK_FLAG` | Deletes stale screenshots for URLs already validated. |
| `TASK_CLEANUP_TASK_FLAG` | Cleans up tasks that are no longer needed. |
| `REFRESH_MATERIALIZED_VIEWS_TASK_FLAG` | Refreshes materialized views. |
| `UPDATE_URL_STATUS_TASK_FLAG` | Updates the status of URLs. |
| `DS_APP_SYNC_AGENCY_ADD_TASK_FLAG` | Adds new agencies to the Data Sources App|
| `DS_APP_SYNC_AGENCY_UPDATE_TASK_FLAG` | Updates existing agencies in the Data Sources App|
| `DS_APP_SYNC_AGENCY_DELETE_TASK_FLAG` | Deletes agencies in the Data Sources App|
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
"""Add update_url_status task

Revision ID: 783268bd3daa
Revises: 88ac26c3b025
Create Date: 2025-11-18 09:02:54.985705

"""
from typing import Sequence, Union

from alembic import op

Check warning on line 10 in alembic/versions/2025_11_18_0902-783268bd3daa_add_update_url_status_task.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] alembic/versions/2025_11_18_0902-783268bd3daa_add_update_url_status_task.py#L10 <401>

'alembic.op' imported but unused
Raw output
./alembic/versions/2025_11_18_0902-783268bd3daa_add_update_url_status_task.py:10:1: F401 'alembic.op' imported but unused
import sqlalchemy as sa

Check warning on line 11 in alembic/versions/2025_11_18_0902-783268bd3daa_add_update_url_status_task.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] alembic/versions/2025_11_18_0902-783268bd3daa_add_update_url_status_task.py#L11 <401>

'sqlalchemy as sa' imported but unused
Raw output
./alembic/versions/2025_11_18_0902-783268bd3daa_add_update_url_status_task.py:11:1: F401 'sqlalchemy as sa' imported but unused

from src.util.alembic_helpers import add_enum_value

# revision identifiers, used by Alembic.
revision: str = '783268bd3daa'
down_revision: Union[str, None] = '88ac26c3b025'
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None


def upgrade() -> None:

Check warning on line 22 in alembic/versions/2025_11_18_0902-783268bd3daa_add_update_url_status_task.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] alembic/versions/2025_11_18_0902-783268bd3daa_add_update_url_status_task.py#L22 <103>

Missing docstring in public function
Raw output
./alembic/versions/2025_11_18_0902-783268bd3daa_add_update_url_status_task.py:22:1: D103 Missing docstring in public function
add_enum_value(
enum_name="url_status",
enum_value="broken"
)
add_enum_value(
enum_name="task_type",
enum_value="Update URL Status"
)


def downgrade() -> None:

Check warning on line 33 in alembic/versions/2025_11_18_0902-783268bd3daa_add_update_url_status_task.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] alembic/versions/2025_11_18_0902-783268bd3daa_add_update_url_status_task.py#L33 <103>

Missing docstring in public function
Raw output
./alembic/versions/2025_11_18_0902-783268bd3daa_add_update_url_status_task.py:33:1: D103 Missing docstring in public function
pass
1 change: 0 additions & 1 deletion src/api/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
from fastapi import FastAPI
from pdap_access_manager.access_manager.async_ import AccessManagerAsync
from pdap_access_manager.models.auth import AuthInfo
from sqlalchemy.ext.asyncio import create_async_engine
from starlette.responses import RedirectResponse

from src.api.endpoints.agencies.routes import agencies_router
Expand Down
1 change: 1 addition & 0 deletions src/collectors/enums.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,3 +14,4 @@ class URLStatus(Enum):
OK = "ok"
ERROR = "error"
DUPLICATE = "duplicate"
BROKEN = "broken"
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,10 @@
from src.core.tasks.scheduled.impl.sync_to_ds.constants import PER_REQUEST_ENTITY_LIMIT
from src.core.tasks.scheduled.impl.sync_to_ds.impl.data_sources.add.queries.cte import \
DSAppLinkSyncDataSourceAddPrerequisitesCTEContainer
from src.core.tasks.scheduled.impl.sync_to_ds.shared.convert import convert_sm_url_status_to_ds_url_status
from src.db.models.impl.link.url_agency.sqlalchemy import LinkURLAgency
from src.db.models.impl.url.core.sqlalchemy import URL
from src.db.models.impl.url.internet_archives.probe.sqlalchemy import URLInternetArchivesProbeMetadata
from src.db.models.impl.url.optional_ds_metadata.sqlalchemy import URLOptionalDataSourceMetadata
from src.db.models.impl.url.record_type.sqlalchemy import URLRecordType
from src.db.queries.base.builder import QueryBuilderBase
Expand Down Expand Up @@ -38,6 +40,7 @@ async def run(self, session: AsyncSession) -> AddDataSourcesOuterRequest:
# Required
URL.full_url,
URL.name,
URL.status,
URLRecordType.record_type,
agency_id_cte.c.agency_ids,
# Optional
Expand All @@ -56,6 +59,7 @@ async def run(self, session: AsyncSession) -> AddDataSourcesOuterRequest:
URLOptionalDataSourceMetadata.scraper_url,
URLOptionalDataSourceMetadata.access_notes,
URLOptionalDataSourceMetadata.access_types,
URLInternetArchivesProbeMetadata.archive_url,
)
.select_from(
cte.cte
Expand All @@ -68,6 +72,10 @@ async def run(self, session: AsyncSession) -> AddDataSourcesOuterRequest:
URLOptionalDataSourceMetadata,
URL.id == URLOptionalDataSourceMetadata.url_id,
)
.outerjoin(
URLInternetArchivesProbeMetadata,
URL.id == URLInternetArchivesProbeMetadata.url_id,
)
.join(
URLRecordType,
URLRecordType.url_id == URL.id,
Expand Down Expand Up @@ -110,7 +118,10 @@ async def run(self, session: AsyncSession) -> AddDataSourcesOuterRequest:
scraper_url=mapping[URLOptionalDataSourceMetadata.scraper_url],
access_notes=mapping[URLOptionalDataSourceMetadata.access_notes],
access_types=mapping[URLOptionalDataSourceMetadata.access_types] or [],
url_status=DataSourcesURLStatus.OK
url_status=convert_sm_url_status_to_ds_url_status(
sm_url_status=mapping[URL.status],
),
internet_archives_url=mapping[URLInternetArchivesProbeMetadata.archive_url] or None,
)
)
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,10 @@
from src.core.tasks.scheduled.impl.sync_to_ds.constants import PER_REQUEST_ENTITY_LIMIT
from src.core.tasks.scheduled.impl.sync_to_ds.impl.data_sources.update.queries.cte import \
DSAppLinkSyncDataSourceUpdatePrerequisitesCTEContainer
from src.core.tasks.scheduled.impl.sync_to_ds.shared.convert import convert_sm_url_status_to_ds_url_status
from src.db.models.impl.link.url_agency.sqlalchemy import LinkURLAgency
from src.db.models.impl.url.core.sqlalchemy import URL
from src.db.models.impl.url.internet_archives.probe.sqlalchemy import URLInternetArchivesProbeMetadata
from src.db.models.impl.url.optional_ds_metadata.sqlalchemy import URLOptionalDataSourceMetadata
from src.db.models.impl.url.record_type.sqlalchemy import URLRecordType
from src.db.queries.base.builder import QueryBuilderBase
Expand Down Expand Up @@ -39,6 +41,7 @@ async def run(self, session: AsyncSession) -> UpdateDataSourcesOuterRequest:
# Required
URL.full_url,
URL.name,
URL.status,
URLRecordType.record_type,
agency_id_cte.c.agency_ids,
# Optional
Expand All @@ -57,7 +60,8 @@ async def run(self, session: AsyncSession) -> UpdateDataSourcesOuterRequest:
URLOptionalDataSourceMetadata.scraper_url,
URLOptionalDataSourceMetadata.access_notes,
URLOptionalDataSourceMetadata.access_types,
URLOptionalDataSourceMetadata.data_portal_type_other
URLOptionalDataSourceMetadata.data_portal_type_other,
URLInternetArchivesProbeMetadata.archive_url,
)
.select_from(
cte.cte
Expand All @@ -70,6 +74,10 @@ async def run(self, session: AsyncSession) -> UpdateDataSourcesOuterRequest:
URLOptionalDataSourceMetadata,
URL.id == URLOptionalDataSourceMetadata.url_id,
)
.outerjoin(
URLInternetArchivesProbeMetadata,
URL.id == URLInternetArchivesProbeMetadata.url_id,
)
.join(
URLRecordType,
URLRecordType.url_id == URL.id,
Expand Down Expand Up @@ -113,7 +121,10 @@ async def run(self, session: AsyncSession) -> UpdateDataSourcesOuterRequest:
access_notes=mapping[URLOptionalDataSourceMetadata.access_notes],
access_types=mapping[URLOptionalDataSourceMetadata.access_types] or [],
data_portal_type_other=mapping[URLOptionalDataSourceMetadata.data_portal_type_other],
url_status=DataSourcesURLStatus.OK
url_status=convert_sm_url_status_to_ds_url_status(
sm_url_status=mapping[URL.status],
),
internet_archives_url=mapping[URLInternetArchivesProbeMetadata.archive_url] or None,
)
)
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,10 @@
from src.core.tasks.scheduled.impl.sync_to_ds.constants import PER_REQUEST_ENTITY_LIMIT
from src.core.tasks.scheduled.impl.sync_to_ds.impl.meta_urls.add.queries.cte import \
DSAppLinkSyncMetaURLAddPrerequisitesCTEContainer
from src.core.tasks.scheduled.impl.sync_to_ds.shared.convert import convert_sm_url_status_to_ds_url_status
from src.db.models.impl.link.url_agency.sqlalchemy import LinkURLAgency
from src.db.models.impl.url.core.sqlalchemy import URL
from src.db.models.impl.url.internet_archives.probe.sqlalchemy import URLInternetArchivesProbeMetadata
from src.db.queries.base.builder import QueryBuilderBase
from src.external.pdap.impl.sync.meta_urls._shared.content import MetaURLSyncContentModel
from src.external.pdap.impl.sync.meta_urls.add.request import AddMetaURLsOuterRequest, AddMetaURLsInnerRequest
Expand All @@ -21,7 +23,8 @@ async def run(self, session: AsyncSession) -> AddMetaURLsOuterRequest:
agency_id_cte = (
select(
LinkURLAgency.url_id,
func.array_agg(LinkURLAgency.agency_id).label("agency_ids")
func.array_agg(LinkURLAgency.agency_id).label("agency_ids"),

)
.group_by(
LinkURLAgency.url_id
Expand All @@ -33,6 +36,8 @@ async def run(self, session: AsyncSession) -> AddMetaURLsOuterRequest:
select(
cte.url_id,
URL.full_url,
URL.status,
URLInternetArchivesProbeMetadata.archive_url,
agency_id_cte.c.agency_ids
)
.select_from(
Expand All @@ -42,6 +47,10 @@ async def run(self, session: AsyncSession) -> AddMetaURLsOuterRequest:
URL,
URL.id == cte.url_id,
)
.outerjoin(
URLInternetArchivesProbeMetadata,
URL.id == URLInternetArchivesProbeMetadata.url_id,
)
.join(
agency_id_cte,
cte.url_id == agency_id_cte.c.url_id
Expand All @@ -61,7 +70,11 @@ async def run(self, session: AsyncSession) -> AddMetaURLsOuterRequest:
request_id=mapping[cte.url_id],
content=MetaURLSyncContentModel(
url=mapping["full_url"],
agency_ids=mapping["agency_ids"]
agency_ids=mapping["agency_ids"],
internet_archives_url=mapping[URLInternetArchivesProbeMetadata.archive_url] or None,
url_status=convert_sm_url_status_to_ds_url_status(
sm_url_status=mapping[URL.status],
),
)
)
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,10 @@
from src.core.tasks.scheduled.impl.sync_to_ds.constants import PER_REQUEST_ENTITY_LIMIT
from src.core.tasks.scheduled.impl.sync_to_ds.impl.meta_urls.update.queries.cte import \
DSAppLinkSyncMetaURLUpdatePrerequisitesCTEContainer
from src.core.tasks.scheduled.impl.sync_to_ds.shared.convert import convert_sm_url_status_to_ds_url_status
from src.db.models.impl.link.url_agency.sqlalchemy import LinkURLAgency
from src.db.models.impl.url.core.sqlalchemy import URL
from src.db.models.impl.url.internet_archives.probe.sqlalchemy import URLInternetArchivesProbeMetadata
from src.db.queries.base.builder import QueryBuilderBase
from src.external.pdap.impl.sync.meta_urls._shared.content import MetaURLSyncContentModel
from src.external.pdap.impl.sync.meta_urls.update.request import UpdateMetaURLsOuterRequest, UpdateMetaURLsInnerRequest
Expand All @@ -33,7 +35,9 @@ async def run(self, session: AsyncSession) -> UpdateMetaURLsOuterRequest:
select(
cte.ds_meta_url_id,
URL.full_url,
agency_id_cte.c.agency_ids
URL.status,
agency_id_cte.c.agency_ids,
URLInternetArchivesProbeMetadata.archive_url,
)
.select_from(
cte.cte
Expand All @@ -42,6 +46,10 @@ async def run(self, session: AsyncSession) -> UpdateMetaURLsOuterRequest:
URL,
URL.id == cte.url_id,
)
.outerjoin(
URLInternetArchivesProbeMetadata,
URL.id == URLInternetArchivesProbeMetadata.url_id,
)
.outerjoin(
agency_id_cte,
cte.url_id == agency_id_cte.c.url_id
Expand All @@ -61,7 +69,11 @@ async def run(self, session: AsyncSession) -> UpdateMetaURLsOuterRequest:
app_id=mapping[cte.ds_meta_url_id],
content=MetaURLSyncContentModel(
url=mapping['full_url'],
agency_ids=mapping["agency_ids"] or []
agency_ids=mapping["agency_ids"] or [],
internet_archives_url=mapping[URLInternetArchivesProbeMetadata.archive_url] or None,
url_status=convert_sm_url_status_to_ds_url_status(
sm_url_status=mapping[URL.status],
),
)
)
)
Expand Down
Empty file.
14 changes: 14 additions & 0 deletions src/core/tasks/scheduled/impl/sync_to_ds/shared/convert.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
from src.collectors.enums import URLStatus

Check warning on line 1 in src/core/tasks/scheduled/impl/sync_to_ds/shared/convert.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/core/tasks/scheduled/impl/sync_to_ds/shared/convert.py#L1 <100>

Missing docstring in public module
Raw output
./src/core/tasks/scheduled/impl/sync_to_ds/shared/convert.py:1:1: D100 Missing docstring in public module
from src.external.pdap.enums import DataSourcesURLStatus


def convert_sm_url_status_to_ds_url_status(

Check warning on line 5 in src/core/tasks/scheduled/impl/sync_to_ds/shared/convert.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/core/tasks/scheduled/impl/sync_to_ds/shared/convert.py#L5 <103>

Missing docstring in public function
Raw output
./src/core/tasks/scheduled/impl/sync_to_ds/shared/convert.py:5:1: D103 Missing docstring in public function
sm_url_status: URLStatus
) -> DataSourcesURLStatus:
match sm_url_status:
case URLStatus.OK:
return DataSourcesURLStatus.OK
case URLStatus.BROKEN:
return DataSourcesURLStatus.BROKEN
case _:
raise ValueError(f"URL status has no corresponding DS Status: {sm_url_status}")

Check warning on line 14 in src/core/tasks/scheduled/impl/sync_to_ds/shared/convert.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/core/tasks/scheduled/impl/sync_to_ds/shared/convert.py#L14 <292>

no newline at end of file
Raw output
./src/core/tasks/scheduled/impl/sync_to_ds/shared/convert.py:14:92: W292 no newline at end of file
Empty file.
15 changes: 15 additions & 0 deletions src/core/tasks/scheduled/impl/update_url_status/operator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
from src.core.tasks.scheduled.impl.update_url_status.query import UpdateURLStatusQueryBuilder

Check warning on line 1 in src/core/tasks/scheduled/impl/update_url_status/operator.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/core/tasks/scheduled/impl/update_url_status/operator.py#L1 <100>

Missing docstring in public module
Raw output
./src/core/tasks/scheduled/impl/update_url_status/operator.py:1:1: D100 Missing docstring in public module
from src.core.tasks.scheduled.templates.operator import ScheduledTaskOperatorBase
from src.db.enums import TaskType


class UpdateURLStatusOperator(ScheduledTaskOperatorBase):

Check warning on line 6 in src/core/tasks/scheduled/impl/update_url_status/operator.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/core/tasks/scheduled/impl/update_url_status/operator.py#L6 <101>

Missing docstring in public class
Raw output
./src/core/tasks/scheduled/impl/update_url_status/operator.py:6:1: D101 Missing docstring in public class

@property
def task_type(self) -> TaskType:

Check warning on line 9 in src/core/tasks/scheduled/impl/update_url_status/operator.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/core/tasks/scheduled/impl/update_url_status/operator.py#L9 <102>

Missing docstring in public method
Raw output
./src/core/tasks/scheduled/impl/update_url_status/operator.py:9:1: D102 Missing docstring in public method
return TaskType.UPDATE_URL_STATUS

async def inner_task_logic(self) -> None:

Check warning on line 12 in src/core/tasks/scheduled/impl/update_url_status/operator.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/core/tasks/scheduled/impl/update_url_status/operator.py#L12 <102>

Missing docstring in public method
Raw output
./src/core/tasks/scheduled/impl/update_url_status/operator.py:12:1: D102 Missing docstring in public method
await self.adb_client.run_query_builder(
UpdateURLStatusQueryBuilder()
)

Check warning on line 15 in src/core/tasks/scheduled/impl/update_url_status/operator.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/core/tasks/scheduled/impl/update_url_status/operator.py#L15 <292>

no newline at end of file
Raw output
./src/core/tasks/scheduled/impl/update_url_status/operator.py:15:10: W292 no newline at end of file
49 changes: 49 additions & 0 deletions src/core/tasks/scheduled/impl/update_url_status/query.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
from sqlalchemy import update, exists, select

Check warning on line 1 in src/core/tasks/scheduled/impl/update_url_status/query.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/core/tasks/scheduled/impl/update_url_status/query.py#L1 <100>

Missing docstring in public module
Raw output
./src/core/tasks/scheduled/impl/update_url_status/query.py:1:1: D100 Missing docstring in public module
from sqlalchemy.ext.asyncio import AsyncSession

from src.collectors.enums import URLStatus
from src.db.models.impl.url.core.sqlalchemy import URL
from src.db.models.impl.url.web_metadata.sqlalchemy import URLWebMetadata
from src.db.queries.base.builder import QueryBuilderBase


class UpdateURLStatusQueryBuilder(QueryBuilderBase):

Check warning on line 10 in src/core/tasks/scheduled/impl/update_url_status/query.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/core/tasks/scheduled/impl/update_url_status/query.py#L10 <101>

Missing docstring in public class
Raw output
./src/core/tasks/scheduled/impl/update_url_status/query.py:10:1: D101 Missing docstring in public class

async def run(self, session: AsyncSession) -> None:

Check warning on line 12 in src/core/tasks/scheduled/impl/update_url_status/query.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/core/tasks/scheduled/impl/update_url_status/query.py#L12 <102>

Missing docstring in public method
Raw output
./src/core/tasks/scheduled/impl/update_url_status/query.py:12:1: D102 Missing docstring in public method

# Update broken URLs to nonbroken if their status is not 404
query_broken_to_ok = (
update(URL)
.values(
status=URLStatus.OK
)
.where(
exists(
select(1).where(
URLWebMetadata.url_id == URL.id, # <-- correlate
URLWebMetadata.status_code != 404,
URL.status == URLStatus.BROKEN
)
)
)
)

# Update ok URLs to broken if their status is 404
query_ok_to_broken = (
update(URL)
.values(
status=URLStatus.BROKEN
)
.where(
exists(
select(1).where(
URLWebMetadata.url_id == URL.id, # <-- correlate
URLWebMetadata.status_code == 404,
URL.status == URLStatus.OK
)
)
)
)

await session.execute(query_broken_to_ok)
await session.execute(query_ok_to_broken)

Check warning on line 49 in src/core/tasks/scheduled/impl/update_url_status/query.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/core/tasks/scheduled/impl/update_url_status/query.py#L49 <292>

no newline at end of file
Raw output
./src/core/tasks/scheduled/impl/update_url_status/query.py:49:50: W292 no newline at end of file
9 changes: 9 additions & 0 deletions src/core/tasks/scheduled/loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
from src.core.tasks.scheduled.impl.sync_to_ds.impl.meta_urls.delete.core import DSAppSyncMetaURLsDeleteTaskOperator
from src.core.tasks.scheduled.impl.sync_to_ds.impl.meta_urls.update.core import DSAppSyncMetaURLsUpdateTaskOperator
from src.core.tasks.scheduled.impl.task_cleanup.operator import TaskCleanupOperator
from src.core.tasks.scheduled.impl.update_url_status.operator import UpdateURLStatusOperator
from src.core.tasks.scheduled.models.entry import ScheduledTaskEntry
from src.db.client.async_ import AsyncDatabaseClient
from src.external.huggingface.hub.client import HuggingFaceHubClient
Expand Down Expand Up @@ -211,5 +212,13 @@
interval_minutes=IntervalEnum.HOURLY.value,
enabled=self.setup_flag("DS_APP_SYNC_AGENCY_DELETE_TASK_FLAG")
),
### URL

Check failure on line 215 in src/core/tasks/scheduled/loader.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/core/tasks/scheduled/loader.py#L215 <266>

too many leading '#' for block comment
Raw output
./src/core/tasks/scheduled/loader.py:215:13: E266 too many leading '#' for block comment
ScheduledTaskEntry(
operator=UpdateURLStatusOperator(
adb_client=self.adb_client
),
interval_minutes=IntervalEnum.HOURLY.value,
enabled=self.setup_flag("UPDATE_URL_STATUS_TASK_FLAG")
),

]
2 changes: 2 additions & 0 deletions src/db/enums.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,9 @@ class TaskType(PyEnum):
RUN_URL_TASKS = "Run URL Task Cycles"
TASK_CLEANUP = "Task Cleanup"
REFRESH_MATERIALIZED_VIEWS = "Refresh Materialized Views"
UPDATE_URL_STATUS = "Update URL Status"

# Sync Tasks
SYNC_AGENCIES_ADD = "Sync Agencies Add"
SYNC_AGENCIES_UPDATE = "Sync Agencies Update"
SYNC_AGENCIES_DELETE = "Sync Agencies Delete"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ class DataSourceSyncContentModel(BaseModel):
access_notes: str | None = None
access_types: list[AccessTypeEnum] = []
data_portal_type_other: str | None = None
url_status: DataSourcesURLStatus
url_status: DataSourcesURLStatus = DataSourcesURLStatus.OK
internet_archives_url: str | None = None

agency_ids: list[int] = []
4 changes: 4 additions & 0 deletions src/external/pdap/impl/sync/meta_urls/_shared/content.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
from pydantic import BaseModel

from src.external.pdap.enums import DataSourcesURLStatus


class MetaURLSyncContentModel(BaseModel):
url: str
url_status: DataSourcesURLStatus = DataSourcesURLStatus.OK
internet_archives_url: str | None = None
agency_ids: list[int] = []
Loading