1- // Boston Income-Restricted Housing loader. Boston's open-data portal
2- // runs CKAN, not Socrata, so this loader uses the datastore_search API:
3- //
4- // https://data.boston.gov/api/3/action/datastore_search?resource_id=<id>
1+ // Boston Income-Restricted Housing loader.
52//
63// node scripts/load-bos.mjs
74//
8- // HEADS UP: Boston rotates the resource id on every dataset revision, so
9- // confirm the current one before running. The "Income-Restricted Housing"
10- // dataset landing page is:
11- // https://data.boston.gov/dataset/income-restricted-housing
12- // Click the resource on that page and copy the resource id from the URL.
13- // Then set BOS_RESOURCE_ID in .env.local, or hardcode it in RESOURCE_ID
14- // below.
5+ // Boston publishes the inventory in two places. The data.boston.gov CKAN
6+ // resource is purely tabular (no coordinates), so it can't be mapped.
7+ // We instead use the ArcGIS feature service "Parcels with Income-
8+ // Restricted Units" published by BPDA, which carries per-parcel
9+ // geometry. Each parcel is a polygon; we collapse it to a centroid for
10+ // the point geom column.
1511//
16- // Field mapping is best-effort against the columns Boston typically
17- // exposes. If your run drops a lot of rows, log a sample feature and
18- // adjust the keys in toProject().
12+ // Field truncations are real (Affordabl, Residenti, Stage_of, Complianc)
13+ // — ArcGIS shapefile-origin layers cap field names at 10 chars.
1914
2015import { config as loadDotenv } from "dotenv" ;
2116import pg from "pg" ;
@@ -29,9 +24,8 @@ if (!process.env.DATABASE_URL) {
2924}
3025
3126const CITY_ID = "bos" ;
32- const RESOURCE_ID =
33- process . env . BOS_RESOURCE_ID || "e7b3a8d8-b7f4-4c4f-8a1f-2b1c1d2e3f4a" ; // placeholder; replace
34- const ENDPOINT = "https://data.boston.gov/api/3/action/datastore_search" ;
27+ const ENDPOINT =
28+ "https://services.arcgis.com/sFnw0xNflSi8J0uh/arcgis/rest/services/Parcels_with_Income_Restricted_Units/FeatureServer/0/query" ;
3529
3630const { Pool } = pg ;
3731const pool = new Pool ( {
@@ -50,67 +44,104 @@ function num(v) {
5044async function fetchAll ( ) {
5145 const out = [ ] ;
5246 let offset = 0 ;
53- const limit = 1000 ;
5447 while ( true ) {
55- const url = new URL ( ENDPOINT ) ;
56- url . searchParams . set ( "resource_id" , RESOURCE_ID ) ;
57- url . searchParams . set ( "limit" , String ( limit ) ) ;
58- url . searchParams . set ( "offset" , String ( offset ) ) ;
59- const resp = await fetch ( url ) ;
60- if ( ! resp . ok ) throw new Error ( `ckan ${ resp . status } : ${ resp . statusText } ` ) ;
48+ const params = new URLSearchParams ( {
49+ where : "1=1" ,
50+ outFields : "*" ,
51+ outSR : "4326" ,
52+ returnGeometry : "true" ,
53+ f : "json" ,
54+ resultOffset : String ( offset ) ,
55+ resultRecordCount : "2000" ,
56+ } ) ;
57+ const resp = await fetch ( `${ ENDPOINT } ?${ params } ` ) ;
58+ if ( ! resp . ok ) throw new Error ( `arcgis ${ resp . status } : ${ resp . statusText } ` ) ;
6159 const json = await resp . json ( ) ;
62- if ( ! json . success ) throw new Error ( `ckan: ${ JSON . stringify ( json . error ) } ` ) ;
63- const records = json . result ?. records ?? [ ] ;
64- out . push ( ...records ) ;
65- if ( records . length < limit ) break ;
66- offset += records . length ;
60+ const feats = json . features ?? [ ] ;
61+ out . push ( ...feats ) ;
62+ if ( ! json . exceededTransferLimit || feats . length === 0 ) break ;
63+ offset += feats . length ;
6764 }
6865 return out ;
6966}
7067
71- function toProject ( r , i ) {
72- // Boston's column names are inconsistent across vintages. Try a few
73- // plausible spellings and fall back to null.
74- const lat = parseFloat ( r . lat ?? r . latitude ?? r . y ?? r . LATITUDE ) ;
75- const lng = parseFloat ( r . lng ?? r . lon ?? r . longitude ?? r . x ?? r . LONGITUDE ) ;
76- if ( ! Number . isFinite ( lat ) || ! Number . isFinite ( lng ) ) return null ;
68+ function centroid ( rings ) {
69+ if ( ! rings ?. length ) return null ;
70+ // Use the outer ring (first). Simple vertex-average; good enough for
71+ // urban parcels which are small + mostly convex.
72+ const ring = rings [ 0 ] ;
73+ if ( ! ring . length ) return null ;
74+ let sx = 0 ;
75+ let sy = 0 ;
76+ let n = 0 ;
77+ for ( const [ x , y ] of ring ) {
78+ if ( Number . isFinite ( x ) && Number . isFinite ( y ) ) {
79+ sx += x ;
80+ sy += y ;
81+ n += 1 ;
82+ }
83+ }
84+ if ( n === 0 ) return null ;
85+ return { lng : sx / n , lat : sy / n } ;
86+ }
87+
88+ function toProject ( feat ) {
89+ const a = feat . attributes || { } ;
90+ const c = centroid ( feat . geometry ?. rings ) ;
91+ if ( ! c ) return null ;
7792
78- const totalUnits = num (
79- r . total_units ?? r . units ?? r . total_unit_count ?? r . income_restricted_units ,
80- ) ;
81- const isRental =
82- String ( r . tenure || r . type || "" ) . toLowerCase ( ) . includes ( "rent" ) ||
83- String ( r . tenure || r . type || "" ) . toLowerCase ( ) . includes ( "rental" ) ;
93+ // The full address field includes neighborhood/state/zip too; pull
94+ // those out into their own columns where possible.
95+ const addressRaw = ( a . Address || "" ) . trim ( ) ;
96+ // "25 AMORY ST, Boston, MA, 02119" -> ["25 AMORY ST","Boston","MA","02119"]
97+ const parts = addressRaw . split ( "," ) . map ( ( s ) => s . trim ( ) ) . filter ( Boolean ) ;
98+ const street = parts [ 0 ] || null ;
99+ const zip = parts [ parts . length - 1 ] && / ^ \d { 5 } $ / . test ( parts [ parts . length - 1 ] )
100+ ? parts [ parts . length - 1 ]
101+ : null ;
102+
103+ const totalUnits = num ( a . Residenti ) ;
104+ const affordable = num ( a . Affordabl ) ;
105+ // Compliance text like "Rental Unit Group (R)" vs "Ownership ..."
106+ const isRental = String ( a . Complianc || "" ) . toLowerCase ( ) . includes ( "rent" ) ;
107+
108+ const name = a . Owner_1 && a . Owner_1 . trim ( )
109+ ? `${ street ?? "(parcel)" } (${ a . Owner_1 . trim ( ) } )`
110+ : street || "(parcel)" ;
84111
85112 return {
86113 city_id : CITY_ID ,
87- // CKAN _id is per-row but stable; fall back to running index.
88- external_id : String ( r . _id ?? r . id ?? r . parcel_id ?? `bos-${ i } ` ) ,
89- name : ( r . project_name || r . name || r . development || r . address || "(unnamed)" ) . toString ( ) . trim ( ) ,
90- address : r . address || r . street_address || null ,
91- borough : r . neighborhood || r . planning_district || null ,
92- neighborhood : r . neighborhood || null ,
93- postcode : r . zip || r . zip_code || null ,
94- council_district : r . city_council_district || r . council_district || null ,
114+ external_id : String ( a . Parcel_ID ?? a . FID ) ,
115+ name,
116+ address : street ,
117+ borough : null ,
118+ neighborhood : null ,
119+ postcode : zip ,
120+ council_district : null ,
95121 community_board : null ,
96- construction_type : r . development_type || r . project_type || null ,
122+ // Stage_of: e.g. "Under Construction", "Permitted", " " etc.
123+ construction_type :
124+ a . Stage_of && a . Stage_of . trim ( ) && a . Stage_of !== "None"
125+ ? a . Stage_of . trim ( )
126+ : null ,
97127 extended_affordability : false ,
98128 prevailing_wage : false ,
99129 start_date : null ,
100- completion_date : r . year_built ? ` ${ r . year_built } -01-01` : null ,
130+ completion_date : null ,
101131 buildings_count : 1 ,
102- lat,
103- lng,
104- units_total : totalUnits ,
105- units_counted : totalUnits ,
106- units_rental : isRental ? totalUnits : 0 ,
107- units_homeownership : isRental ? 0 : totalUnits ,
132+ lat : c . lat ,
133+ lng : c . lng ,
134+ // Counted units = affordable units; total = whole-building unit count.
135+ units_total : totalUnits || affordable ,
136+ units_counted : affordable ,
137+ units_rental : isRental ? affordable : 0 ,
138+ units_homeownership : isRental ? 0 : affordable ,
108139 units_extremely_low : 0 ,
109140 units_very_low : 0 ,
110141 units_low : 0 ,
111142 units_moderate : 0 ,
112143 units_middle : 0 ,
113- units_other_income : totalUnits ,
144+ units_other_income : affordable ,
114145 units_studio : 0 ,
115146 units_1br : 0 ,
116147 units_2br : 0 ,
@@ -144,12 +175,8 @@ INSERT INTO projects (
144175ON CONFLICT (city_id, external_id) DO UPDATE SET
145176 name = EXCLUDED.name,
146177 address = EXCLUDED.address,
147- borough = EXCLUDED.borough,
148- neighborhood = EXCLUDED.neighborhood,
149178 postcode = EXCLUDED.postcode,
150- council_district = EXCLUDED.council_district,
151179 construction_type = EXCLUDED.construction_type,
152- completion_date = EXCLUDED.completion_date,
153180 geom = EXCLUDED.geom,
154181 units_total = EXCLUDED.units_total,
155182 units_counted = EXCLUDED.units_counted,
@@ -173,7 +200,7 @@ async function ensureCity(client) {
173200 42.3601 ,
174201 - 71.0589 ,
175202 12 ,
176- "Analyze Boston: Income-Restricted Housing " ,
203+ "BPDA Parcels with Income-Restricted Units " ,
177204 "https://data.boston.gov/dataset/income-restricted-housing" ,
178205 ] ,
179206 ) ;
@@ -183,11 +210,11 @@ async function main() {
183210 const client = await pool . connect ( ) ;
184211 try {
185212 await ensureCity ( client ) ;
186- console . log ( ` fetching Boston records from CKAN resource ${ RESOURCE_ID } ...` ) ;
187- const records = await fetchAll ( ) ;
188- console . log ( `got ${ records . length } records ` ) ;
189- const projects = records . map ( toProject ) . filter ( Boolean ) ;
190- console . log ( `upserting ${ projects . length } with valid coordinates ...` ) ;
213+ console . log ( " fetching Boston BPDA parcels ..." ) ;
214+ const features = await fetchAll ( ) ;
215+ console . log ( `got ${ features . length } features ` ) ;
216+ const projects = features . map ( toProject ) . filter ( Boolean ) ;
217+ console . log ( `upserting ${ projects . length } parcels with usable geometry ...` ) ;
191218
192219 await client . query ( "BEGIN" ) ;
193220 for ( const p of projects ) {
0 commit comments