Skip to content

Commit ce95750

Browse files
committed
Finish draft
1 parent 13e92ce commit ce95750

File tree

77 files changed

+1311
-196
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

77 files changed

+1311
-196
lines changed

alembic/versions/2025_10_28_1539-a57c3b5b6e93_add_sync_log_table.py

Lines changed: 296 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
from alembic import op
1111
import sqlalchemy as sa
1212

13-
from src.util.alembic_helpers import created_at_column, updated_at_column, create_updated_at_trigger
13+
from src.util.alembic_helpers import created_at_column, updated_at_column, create_updated_at_trigger, remove_enum_value
1414

1515
# revision identifiers, used by Alembic.
1616
revision: str = 'a57c3b5b6e93'
@@ -19,6 +19,211 @@
1919
depends_on: Union[str, Sequence[str], None] = None
2020

2121

22+
def _add_data_portal_type_other_to_ds_optional_metadata():
23+
op.add_column(
24+
'url_optional_data_source_metadata',
25+
sa.Column(
26+
'data_portal_type_other',
27+
sa.String(),
28+
nullable=True
29+
)
30+
)
31+
32+
33+
def upgrade() -> None:
34+
_create_sync_log()
35+
_create_ds_agency_link()
36+
_migrate_agency_ids_to_ds_agency_link()
37+
remove_id_column_from_agencies()
38+
rename_agency_id_to_id()
39+
_rename_existing_tables_to_ds_app_format()
40+
_alter_ds_app_link_data_source_table()
41+
_alter_ds_app_link_meta_url_table()
42+
_add_flag_deletion_tables()
43+
_add_last_synced_at_columns()
44+
_add_link_table_modification_triggers()
45+
_add_updated_at_to_optional_data_source_metadata_table()
46+
_update_sync_tasks()
47+
_alter_agency_jurisdiction_type_column()
48+
_add_updated_at_to_url_record_type_table()
49+
_add_updated_at_trigger_to_url_optional_data_source_metadata()
50+
_add_data_portal_type_other_to_ds_optional_metadata()
51+
52+
def _add_updated_at_trigger_to_url_optional_data_source_metadata():
53+
create_updated_at_trigger(
54+
"url_optional_data_source_metadata"
55+
)
56+
57+
def _add_updated_at_to_url_record_type_table():
58+
op.add_column(
59+
'url_record_type',
60+
updated_at_column()
61+
)
62+
create_updated_at_trigger(
63+
"url_record_type"
64+
)
65+
66+
67+
68+
def _alter_agency_jurisdiction_type_column():
69+
op.alter_column(
70+
'agencies',
71+
'jurisdiction_type',
72+
nullable=False,
73+
)
74+
75+
76+
def _update_sync_tasks():
77+
78+
# Drop Views
79+
op.execute("drop view url_task_count_1_day")
80+
op.execute("drop view url_task_count_1_week")
81+
op.execute("drop materialized view url_status_mat_view")
82+
83+
84+
85+
targets: list[tuple[str, str]] = [
86+
('tasks', 'task_type'),
87+
('url_task_error', 'task_type')
88+
]
89+
90+
remove_enum_value(
91+
enum_name="task_type",
92+
value_to_remove="Sync Agencies",
93+
targets=targets
94+
)
95+
remove_enum_value(
96+
enum_name="task_type",
97+
value_to_remove="Sync Data Sources",
98+
targets=targets
99+
)
100+
new_enum_values: list[str] = [
101+
"Sync Agencies Add",
102+
"Sync Agencies Update",
103+
"Sync Agencies Delete",
104+
"Sync Data Sources Add",
105+
"Sync Data Sources Update",
106+
"Sync Data Sources Delete",
107+
"Sync Meta URLs Add",
108+
"Sync Meta URLs Update",
109+
"Sync Meta URLs Delete",
110+
]
111+
for enum_value in new_enum_values:
112+
op.execute(f"ALTER TYPE task_type ADD VALUE '{enum_value}';")
113+
114+
# Recreate Views
115+
op.execute("""
116+
create view url_task_count_1_day(task_type, count) as
117+
SELECT
118+
t.task_type,
119+
count(ltu.url_id) AS count
120+
FROM
121+
tasks t
122+
JOIN link_task_urls ltu
123+
ON ltu.task_id = t.id
124+
WHERE
125+
t.updated_at > (now() - '1 day'::interval)
126+
GROUP BY
127+
t.task_type;
128+
""")
129+
130+
op.execute("""
131+
create view url_task_count_1_week(task_type, count) as
132+
SELECT
133+
t.task_type,
134+
count(ltu.url_id) AS count
135+
FROM
136+
tasks t
137+
JOIN link_task_urls ltu
138+
ON ltu.task_id = t.id
139+
WHERE
140+
t.updated_at > (now() - '7 days'::interval)
141+
GROUP BY
142+
t.task_type;
143+
""")
144+
145+
op.execute(
146+
"""
147+
CREATE MATERIALIZED VIEW url_status_mat_view as
148+
with
149+
urls_with_relevant_errors as (
150+
select
151+
ute.url_id
152+
from
153+
url_task_error ute
154+
where
155+
ute.task_type in (
156+
'Screenshot',
157+
'HTML',
158+
'URL Probe'
159+
)
160+
)
161+
, status_text as (
162+
select
163+
u.id as url_id,
164+
case
165+
when (
166+
-- Validated as not relevant, individual record, or not found
167+
fuv.type in ('not relevant', 'individual record', 'not found')
168+
) Then 'Accepted'
169+
when (
170+
(fuv.type = 'data source' and uds.url_id is null)
171+
OR
172+
(fuv.type = 'meta url' and udmu.url_id is null)
173+
) Then 'Awaiting Submission'
174+
when (
175+
(fuv.type = 'data source' and uds.url_id is not null)
176+
OR
177+
(fuv.type = 'meta url' and udmu.url_id is not null)
178+
) Then 'Submitted'
179+
when (
180+
-- Has compressed HTML
181+
uch.url_id is not null
182+
AND
183+
-- Has web metadata
184+
uwm.url_id is not null
185+
AND
186+
-- Has screenshot
187+
us.url_id is not null
188+
) THEN 'Community Labeling'
189+
when uwre.url_id is not null then 'Error'
190+
ELSE 'Intake'
191+
END as status
192+
193+
from
194+
urls u
195+
left join urls_with_relevant_errors uwre
196+
on u.id = uwre.url_id
197+
left join url_screenshot us
198+
on u.id = us.url_id
199+
left join url_compressed_html uch
200+
on u.id = uch.url_id
201+
left join url_web_metadata uwm
202+
on u.id = uwm.url_id
203+
left join flag_url_validated fuv
204+
on u.id = fuv.url_id
205+
left join ds_app_link_meta_url udmu
206+
on u.id = udmu.url_id
207+
left join ds_app_link_data_source uds
208+
on u.id = uds.url_id
209+
)
210+
select
211+
url_id,
212+
status,
213+
CASE status
214+
WHEN 'Intake' THEN 100
215+
WHEN 'Error' THEN 110
216+
WHEN 'Community Labeling' THEN 200
217+
WHEN 'Accepted' THEN 300
218+
WHEN 'Awaiting Submission' THEN 380
219+
WHEN 'Submitted' THEN 390
220+
ELSE -1
221+
END as code
222+
from status_text
223+
"""
224+
)
225+
226+
22227
def last_synced_at_column():
23228
return sa.Column(
24229
'last_synced_at',
@@ -35,25 +240,56 @@ def _add_link_table_modification_triggers():
35240
RETURNS trigger
36241
LANGUAGE plpgsql AS $$
37242
BEGIN
38-
-- UNION to cover INSERT/UPDATE (NEW TABLE) and DELETE (OLD TABLE)
39-
UPDATE urls u
40-
SET updated_at = clock_timestamp() -- better than now() for long txns
41-
FROM (
42-
SELECT DISTINCT url_id FROM newtab
43-
UNION
44-
SELECT DISTINCT url_id FROM oldtab
45-
) AS hit
46-
WHERE u.id = hit.url_id;
243+
IF TG_OP = 'INSERT' THEN
244+
EXECUTE $q$
245+
UPDATE urls u
246+
SET updated_at = clock_timestamp()
247+
FROM (SELECT DISTINCT url_id FROM newtab) AS hit
248+
WHERE u.id = hit.url_id
249+
$q$;
250+
251+
ELSIF TG_OP = 'DELETE' THEN
252+
EXECUTE $q$
253+
UPDATE urls u
254+
SET updated_at = clock_timestamp()
255+
FROM (SELECT DISTINCT url_id FROM oldtab) AS hit
256+
WHERE u.id = hit.url_id
257+
$q$;
258+
259+
ELSE -- UPDATE
260+
EXECUTE $q$
261+
UPDATE urls u
262+
SET updated_at = clock_timestamp()
263+
FROM (
264+
SELECT DISTINCT url_id FROM newtab
265+
UNION
266+
SELECT DISTINCT url_id FROM oldtab
267+
) AS hit
268+
WHERE u.id = hit.url_id
269+
$q$;
270+
END IF;
47271
48272
RETURN NULL; -- statement-level trigger
49273
END $$;
50274
51275
-- statement-level trigger with transition tables
52-
CREATE TRIGGER trg_link_touch_parent
53-
AFTER INSERT OR UPDATE OR DELETE ON link_parent_child
276+
CREATE TRIGGER trg_link_urls_agency_touch_url_ins
277+
AFTER INSERT ON link_urls_agency
278+
REFERENCING NEW TABLE AS newtab
279+
FOR EACH STATEMENT
280+
EXECUTE FUNCTION touch_url_from_agency_link();
281+
282+
CREATE TRIGGER trg_link_urls_agency_touch_url_upd
283+
AFTER UPDATE ON link_urls_agency
54284
REFERENCING NEW TABLE AS newtab OLD TABLE AS oldtab
55285
FOR EACH STATEMENT
56-
EXECUTE FUNCTION touch_parent_from_link();
286+
EXECUTE FUNCTION touch_url_from_agency_link();
287+
288+
CREATE TRIGGER trg_link_urls_agency_touch_url_del
289+
AFTER DELETE ON link_urls_agency
290+
REFERENCING OLD TABLE AS oldtab
291+
FOR EACH STATEMENT
292+
EXECUTE FUNCTION touch_url_from_agency_link();
57293
58294
""")
59295

@@ -65,26 +301,56 @@ def _add_link_table_modification_triggers():
65301
LANGUAGE plpgsql AS
66302
$$
67303
BEGIN
68-
-- UNION to cover INSERT/UPDATE (NEW TABLE) and DELETE (OLD TABLE)
69-
UPDATE agencies a
70-
SET updated_at = clock_timestamp() -- better than now() for long txns
71-
FROM (SELECT DISTINCT agency_id
72-
FROM newtab
73-
UNION
74-
SELECT DISTINCT agency_id
75-
FROM oldtab) AS hit
76-
WHERE a.id = hit.agency_id;
77-
78-
RETURN NULL; -- statement-level trigger
304+
IF TG_OP = 'INSERT' THEN
305+
EXECUTE $q$
306+
UPDATE agencies a
307+
SET updated_at = clock_timestamp()
308+
FROM (SELECT DISTINCT agency_id FROM newtab) AS hit
309+
WHERE a.id = hit.agency_id
310+
$q$;
311+
312+
ELSIF TG_OP = 'DELETE' THEN
313+
EXECUTE $q$
314+
UPDATE agencies a
315+
SET updated_at = clock_timestamp()
316+
FROM (SELECT DISTINCT agency_id FROM oldtab) AS hit
317+
WHERE a.id = hit.agency_id
318+
$q$;
319+
320+
ELSE -- UPDATE
321+
EXECUTE $q$
322+
UPDATE agencies a
323+
SET updated_at = clock_timestamp()
324+
FROM (
325+
SELECT DISTINCT agency_id FROM newtab
326+
UNION
327+
SELECT DISTINCT agency_id FROM oldtab
328+
) AS hit
329+
WHERE a.id = hit.agency_id
330+
$q$;
331+
END IF;
332+
333+
RETURN NULL; -- statement-level trigger
79334
END
80335
$$;
81336
82337
-- statement-level trigger with transition tables
83-
CREATE TRIGGER trg_link_touch_parent
84-
AFTER INSERT OR UPDATE OR DELETE
85-
ON link_agencies_locations
86-
REFERENCING NEW TABLE AS newtab OLD TABLE AS oldtab
87-
FOR EACH STATEMENT
338+
CREATE TRIGGER trg_link_agencies_locations_touch_agencies_ins
339+
AFTER INSERT ON link_agencies_locations
340+
REFERENCING NEW TABLE AS newtab
341+
FOR EACH STATEMENT
342+
EXECUTE FUNCTION touch_agency_from_location_link();
343+
344+
CREATE TRIGGER trg_link_agencies_locations_touch_agencies_upd
345+
AFTER UPDATE ON link_agencies_locations
346+
REFERENCING NEW TABLE AS newtab OLD TABLE AS oldtab
347+
FOR EACH STATEMENT
348+
EXECUTE FUNCTION touch_agency_from_location_link();
349+
350+
CREATE TRIGGER trg_link_agencies_locations_touch_agencies_del
351+
AFTER DELETE ON link_agencies_locations
352+
REFERENCING OLD TABLE AS oldtab
353+
FOR EACH STATEMENT
88354
EXECUTE FUNCTION touch_agency_from_location_link();
89355
"""
90356
)
@@ -93,19 +359,7 @@ def _add_link_table_modification_triggers():
93359

94360

95361

96-
def upgrade() -> None:
97-
_create_sync_log()
98-
_create_ds_agency_link()
99-
_migrate_agency_ids_to_ds_agency_link()
100-
remove_id_column_from_agencies()
101-
rename_agency_id_to_id()
102-
_rename_existing_tables_to_ds_app_format()
103-
_alter_ds_app_link_data_source_table()
104-
_alter_ds_app_link_meta_url_table()
105-
_add_flag_deletion_tables()
106-
_add_last_synced_at_columns()
107-
_add_link_table_modification_triggers()
108-
_add_updated_at_to_optional_data_source_metadata_table()
362+
109363

110364
def _add_updated_at_to_optional_data_source_metadata_table():
111365
op.add_column(

0 commit comments

Comments
 (0)