1010from alembic import op
1111import sqlalchemy as sa
1212
13- from src .util .alembic_helpers import created_at_column , updated_at_column , create_updated_at_trigger
13+ from src .util .alembic_helpers import created_at_column , updated_at_column , create_updated_at_trigger , remove_enum_value
1414
1515# revision identifiers, used by Alembic.
1616revision : str = 'a57c3b5b6e93'
1919depends_on : Union [str , Sequence [str ], None ] = None
2020
2121
22+ def _add_data_portal_type_other_to_ds_optional_metadata ():
23+ op .add_column (
24+ 'url_optional_data_source_metadata' ,
25+ sa .Column (
26+ 'data_portal_type_other' ,
27+ sa .String (),
28+ nullable = True
29+ )
30+ )
31+
32+
33+ def upgrade () -> None :
34+ _create_sync_log ()
35+ _create_ds_agency_link ()
36+ _migrate_agency_ids_to_ds_agency_link ()
37+ remove_id_column_from_agencies ()
38+ rename_agency_id_to_id ()
39+ _rename_existing_tables_to_ds_app_format ()
40+ _alter_ds_app_link_data_source_table ()
41+ _alter_ds_app_link_meta_url_table ()
42+ _add_flag_deletion_tables ()
43+ _add_last_synced_at_columns ()
44+ _add_link_table_modification_triggers ()
45+ _add_updated_at_to_optional_data_source_metadata_table ()
46+ _update_sync_tasks ()
47+ _alter_agency_jurisdiction_type_column ()
48+ _add_updated_at_to_url_record_type_table ()
49+ _add_updated_at_trigger_to_url_optional_data_source_metadata ()
50+ _add_data_portal_type_other_to_ds_optional_metadata ()
51+
52+ def _add_updated_at_trigger_to_url_optional_data_source_metadata ():
53+ create_updated_at_trigger (
54+ "url_optional_data_source_metadata"
55+ )
56+
57+ def _add_updated_at_to_url_record_type_table ():
58+ op .add_column (
59+ 'url_record_type' ,
60+ updated_at_column ()
61+ )
62+ create_updated_at_trigger (
63+ "url_record_type"
64+ )
65+
66+
67+
68+ def _alter_agency_jurisdiction_type_column ():
69+ op .alter_column (
70+ 'agencies' ,
71+ 'jurisdiction_type' ,
72+ nullable = False ,
73+ )
74+
75+
76+ def _update_sync_tasks ():
77+
78+ # Drop Views
79+ op .execute ("drop view url_task_count_1_day" )
80+ op .execute ("drop view url_task_count_1_week" )
81+ op .execute ("drop materialized view url_status_mat_view" )
82+
83+
84+
85+ targets : list [tuple [str , str ]] = [
86+ ('tasks' , 'task_type' ),
87+ ('url_task_error' , 'task_type' )
88+ ]
89+
90+ remove_enum_value (
91+ enum_name = "task_type" ,
92+ value_to_remove = "Sync Agencies" ,
93+ targets = targets
94+ )
95+ remove_enum_value (
96+ enum_name = "task_type" ,
97+ value_to_remove = "Sync Data Sources" ,
98+ targets = targets
99+ )
100+ new_enum_values : list [str ] = [
101+ "Sync Agencies Add" ,
102+ "Sync Agencies Update" ,
103+ "Sync Agencies Delete" ,
104+ "Sync Data Sources Add" ,
105+ "Sync Data Sources Update" ,
106+ "Sync Data Sources Delete" ,
107+ "Sync Meta URLs Add" ,
108+ "Sync Meta URLs Update" ,
109+ "Sync Meta URLs Delete" ,
110+ ]
111+ for enum_value in new_enum_values :
112+ op .execute (f"ALTER TYPE task_type ADD VALUE '{ enum_value } ';" )
113+
114+ # Recreate Views
115+ op .execute ("""
116+ create view url_task_count_1_day(task_type, count) as
117+ SELECT
118+ t.task_type,
119+ count(ltu.url_id) AS count
120+ FROM
121+ tasks t
122+ JOIN link_task_urls ltu
123+ ON ltu.task_id = t.id
124+ WHERE
125+ t.updated_at > (now() - '1 day'::interval)
126+ GROUP BY
127+ t.task_type;
128+ """ )
129+
130+ op .execute ("""
131+ create view url_task_count_1_week(task_type, count) as
132+ SELECT
133+ t.task_type,
134+ count(ltu.url_id) AS count
135+ FROM
136+ tasks t
137+ JOIN link_task_urls ltu
138+ ON ltu.task_id = t.id
139+ WHERE
140+ t.updated_at > (now() - '7 days'::interval)
141+ GROUP BY
142+ t.task_type;
143+ """ )
144+
145+ op .execute (
146+ """
147+ CREATE MATERIALIZED VIEW url_status_mat_view as
148+ with
149+ urls_with_relevant_errors as (
150+ select
151+ ute.url_id
152+ from
153+ url_task_error ute
154+ where
155+ ute.task_type in (
156+ 'Screenshot',
157+ 'HTML',
158+ 'URL Probe'
159+ )
160+ )
161+ , status_text as (
162+ select
163+ u.id as url_id,
164+ case
165+ when (
166+ -- Validated as not relevant, individual record, or not found
167+ fuv.type in ('not relevant', 'individual record', 'not found')
168+ ) Then 'Accepted'
169+ when (
170+ (fuv.type = 'data source' and uds.url_id is null)
171+ OR
172+ (fuv.type = 'meta url' and udmu.url_id is null)
173+ ) Then 'Awaiting Submission'
174+ when (
175+ (fuv.type = 'data source' and uds.url_id is not null)
176+ OR
177+ (fuv.type = 'meta url' and udmu.url_id is not null)
178+ ) Then 'Submitted'
179+ when (
180+ -- Has compressed HTML
181+ uch.url_id is not null
182+ AND
183+ -- Has web metadata
184+ uwm.url_id is not null
185+ AND
186+ -- Has screenshot
187+ us.url_id is not null
188+ ) THEN 'Community Labeling'
189+ when uwre.url_id is not null then 'Error'
190+ ELSE 'Intake'
191+ END as status
192+
193+ from
194+ urls u
195+ left join urls_with_relevant_errors uwre
196+ on u.id = uwre.url_id
197+ left join url_screenshot us
198+ on u.id = us.url_id
199+ left join url_compressed_html uch
200+ on u.id = uch.url_id
201+ left join url_web_metadata uwm
202+ on u.id = uwm.url_id
203+ left join flag_url_validated fuv
204+ on u.id = fuv.url_id
205+ left join ds_app_link_meta_url udmu
206+ on u.id = udmu.url_id
207+ left join ds_app_link_data_source uds
208+ on u.id = uds.url_id
209+ )
210+ select
211+ url_id,
212+ status,
213+ CASE status
214+ WHEN 'Intake' THEN 100
215+ WHEN 'Error' THEN 110
216+ WHEN 'Community Labeling' THEN 200
217+ WHEN 'Accepted' THEN 300
218+ WHEN 'Awaiting Submission' THEN 380
219+ WHEN 'Submitted' THEN 390
220+ ELSE -1
221+ END as code
222+ from status_text
223+ """
224+ )
225+
226+
22227def last_synced_at_column ():
23228 return sa .Column (
24229 'last_synced_at' ,
@@ -35,25 +240,56 @@ def _add_link_table_modification_triggers():
35240 RETURNS trigger
36241 LANGUAGE plpgsql AS $$
37242 BEGIN
38- -- UNION to cover INSERT/UPDATE (NEW TABLE) and DELETE (OLD TABLE)
39- UPDATE urls u
40- SET updated_at = clock_timestamp() -- better than now() for long txns
41- FROM (
42- SELECT DISTINCT url_id FROM newtab
43- UNION
44- SELECT DISTINCT url_id FROM oldtab
45- ) AS hit
46- WHERE u.id = hit.url_id;
243+ IF TG_OP = 'INSERT' THEN
244+ EXECUTE $q$
245+ UPDATE urls u
246+ SET updated_at = clock_timestamp()
247+ FROM (SELECT DISTINCT url_id FROM newtab) AS hit
248+ WHERE u.id = hit.url_id
249+ $q$;
250+
251+ ELSIF TG_OP = 'DELETE' THEN
252+ EXECUTE $q$
253+ UPDATE urls u
254+ SET updated_at = clock_timestamp()
255+ FROM (SELECT DISTINCT url_id FROM oldtab) AS hit
256+ WHERE u.id = hit.url_id
257+ $q$;
258+
259+ ELSE -- UPDATE
260+ EXECUTE $q$
261+ UPDATE urls u
262+ SET updated_at = clock_timestamp()
263+ FROM (
264+ SELECT DISTINCT url_id FROM newtab
265+ UNION
266+ SELECT DISTINCT url_id FROM oldtab
267+ ) AS hit
268+ WHERE u.id = hit.url_id
269+ $q$;
270+ END IF;
47271
48272 RETURN NULL; -- statement-level trigger
49273 END $$;
50274
51275 -- statement-level trigger with transition tables
52- CREATE TRIGGER trg_link_touch_parent
53- AFTER INSERT OR UPDATE OR DELETE ON link_parent_child
276+ CREATE TRIGGER trg_link_urls_agency_touch_url_ins
277+ AFTER INSERT ON link_urls_agency
278+ REFERENCING NEW TABLE AS newtab
279+ FOR EACH STATEMENT
280+ EXECUTE FUNCTION touch_url_from_agency_link();
281+
282+ CREATE TRIGGER trg_link_urls_agency_touch_url_upd
283+ AFTER UPDATE ON link_urls_agency
54284 REFERENCING NEW TABLE AS newtab OLD TABLE AS oldtab
55285 FOR EACH STATEMENT
56- EXECUTE FUNCTION touch_parent_from_link();
286+ EXECUTE FUNCTION touch_url_from_agency_link();
287+
288+ CREATE TRIGGER trg_link_urls_agency_touch_url_del
289+ AFTER DELETE ON link_urls_agency
290+ REFERENCING OLD TABLE AS oldtab
291+ FOR EACH STATEMENT
292+ EXECUTE FUNCTION touch_url_from_agency_link();
57293
58294 """ )
59295
@@ -65,26 +301,56 @@ def _add_link_table_modification_triggers():
65301 LANGUAGE plpgsql AS
66302 $$
67303 BEGIN
68- -- UNION to cover INSERT/UPDATE (NEW TABLE) and DELETE (OLD TABLE)
69- UPDATE agencies a
70- SET updated_at = clock_timestamp() -- better than now() for long txns
71- FROM (SELECT DISTINCT agency_id
72- FROM newtab
73- UNION
74- SELECT DISTINCT agency_id
75- FROM oldtab) AS hit
76- WHERE a.id = hit.agency_id;
77-
78- RETURN NULL; -- statement-level trigger
304+ IF TG_OP = 'INSERT' THEN
305+ EXECUTE $q$
306+ UPDATE agencies a
307+ SET updated_at = clock_timestamp()
308+ FROM (SELECT DISTINCT agency_id FROM newtab) AS hit
309+ WHERE a.id = hit.agency_id
310+ $q$;
311+
312+ ELSIF TG_OP = 'DELETE' THEN
313+ EXECUTE $q$
314+ UPDATE agencies a
315+ SET updated_at = clock_timestamp()
316+ FROM (SELECT DISTINCT agency_id FROM oldtab) AS hit
317+ WHERE a.id = hit.agency_id
318+ $q$;
319+
320+ ELSE -- UPDATE
321+ EXECUTE $q$
322+ UPDATE agencies a
323+ SET updated_at = clock_timestamp()
324+ FROM (
325+ SELECT DISTINCT agency_id FROM newtab
326+ UNION
327+ SELECT DISTINCT agency_id FROM oldtab
328+ ) AS hit
329+ WHERE a.id = hit.agency_id
330+ $q$;
331+ END IF;
332+
333+ RETURN NULL; -- statement-level trigger
79334 END
80335 $$;
81336
82337 -- statement-level trigger with transition tables
83- CREATE TRIGGER trg_link_touch_parent
84- AFTER INSERT OR UPDATE OR DELETE
85- ON link_agencies_locations
86- REFERENCING NEW TABLE AS newtab OLD TABLE AS oldtab
87- FOR EACH STATEMENT
338+ CREATE TRIGGER trg_link_agencies_locations_touch_agencies_ins
339+ AFTER INSERT ON link_agencies_locations
340+ REFERENCING NEW TABLE AS newtab
341+ FOR EACH STATEMENT
342+ EXECUTE FUNCTION touch_agency_from_location_link();
343+
344+ CREATE TRIGGER trg_link_agencies_locations_touch_agencies_upd
345+ AFTER UPDATE ON link_agencies_locations
346+ REFERENCING NEW TABLE AS newtab OLD TABLE AS oldtab
347+ FOR EACH STATEMENT
348+ EXECUTE FUNCTION touch_agency_from_location_link();
349+
350+ CREATE TRIGGER trg_link_agencies_locations_touch_agencies_del
351+ AFTER DELETE ON link_agencies_locations
352+ REFERENCING OLD TABLE AS oldtab
353+ FOR EACH STATEMENT
88354 EXECUTE FUNCTION touch_agency_from_location_link();
89355 """
90356 )
@@ -93,19 +359,7 @@ def _add_link_table_modification_triggers():
93359
94360
95361
96- def upgrade () -> None :
97- _create_sync_log ()
98- _create_ds_agency_link ()
99- _migrate_agency_ids_to_ds_agency_link ()
100- remove_id_column_from_agencies ()
101- rename_agency_id_to_id ()
102- _rename_existing_tables_to_ds_app_format ()
103- _alter_ds_app_link_data_source_table ()
104- _alter_ds_app_link_meta_url_table ()
105- _add_flag_deletion_tables ()
106- _add_last_synced_at_columns ()
107- _add_link_table_modification_triggers ()
108- _add_updated_at_to_optional_data_source_metadata_table ()
362+
109363
110364def _add_updated_at_to_optional_data_source_metadata_table ():
111365 op .add_column (
0 commit comments