Skip to content

Commit 7b0b045

Browse files
committed
fix austin field mappings, swap boston loader to BPDA arcgis layer
Austin's affordable housing inventory uses mfi_20/mfi_30/...mfi_120 for income tiers, project_id for the stable key, address/zip flat fields, and US-format affordability_start_date / _expiration_date. I'd guessed several of these wrong. With the corrected mapping the loader pulls 2,418 projects. Boston's CKAN income-restricted-housing resource has no coordinates at all — just neighborhood + zip + unit counts. Useless for mapping. The BPDA "Parcels with Income-Restricted Units" feature service publishes the same inventory as polygons with addresses and owners. The loader now hits that, computes a parcel centroid for the geom column, and pulls in 3,071 parcels. Field names are shapefile-truncated (Affordabl, Residenti, Stage_of) so the mapping is explicit. Signed-off-by: Charlie Tonneslan <cst0520@gmail.com>
1 parent 19e6c9e commit 7b0b045

2 files changed

Lines changed: 142 additions & 95 deletions

File tree

scripts/load-aus.mjs

Lines changed: 47 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -55,51 +55,71 @@ async function fetchAll() {
5555
return out;
5656
}
5757

58+
// Convert Austin's "12-31-2024" / "2024-12-31" / null into ISO YYYY-MM-DD.
59+
function parseDate(s) {
60+
if (!s) return null;
61+
const str = String(s).trim();
62+
// 12-21-2000 style (US)
63+
const us = /^(\d{2})-(\d{2})-(\d{4})$/.exec(str);
64+
if (us) return `${us[3]}-${us[1]}-${us[2]}`;
65+
// already ISO
66+
if (/^\d{4}-\d{2}-\d{2}/.test(str)) return str.slice(0, 10);
67+
const d = new Date(str);
68+
if (!isNaN(d.getTime())) return d.toISOString().slice(0, 10);
69+
return null;
70+
}
71+
5872
function toProject(r, i) {
59-
const lat = parseFloat(
60-
r.latitude ?? r.y ?? r.location?.latitude ?? r.geocoded_column?.coordinates?.[1],
61-
);
62-
const lng = parseFloat(
63-
r.longitude ?? r.x ?? r.location?.longitude ?? r.geocoded_column?.coordinates?.[0],
64-
);
73+
const lat = parseFloat(r.latitude);
74+
const lng = parseFloat(r.longitude);
6575
if (!Number.isFinite(lat) || !Number.isFinite(lng)) return null;
6676

67-
const totalUnits = num(r.total_units ?? r.units_affordable ?? r.affordable_units ?? r.units);
68-
const isRental = String(r.housing_type || r.tenure || "").toLowerCase().includes("rent");
77+
// Austin records both total and affordable counts. We use affordable
78+
// since this is the affordable-housing inventory; total includes
79+
// market-rate units in mixed-income developments.
80+
const totalUnits = num(r.affordable_units ?? r.total_units);
81+
const isRental = String(r.housing_tenure || "").toLowerCase().includes("rent");
6982

7083
return {
7184
city_id: CITY_ID,
72-
external_id: String(r.project_id ?? r.development_id ?? r.id ?? r.objectid ?? `aus-${i}`),
73-
name: (r.project_name || r.development_name || r.property_name || r.address || "(unnamed)").toString().trim(),
74-
address: r.address || r.property_address || null,
85+
external_id: String(r.project_id || r.contract_id || `aus-${i}`),
86+
name: (r.project_name || r.address || "(unnamed)").toString().trim(),
87+
address: r.address || null,
7588
borough: r.council_district ? `District ${r.council_district}` : null,
76-
neighborhood: r.neighborhood || r.planning_area || null,
77-
postcode: r.zip || r.zip_code || null,
89+
neighborhood: null,
90+
postcode: r.zip || null,
7891
council_district: r.council_district ? String(r.council_district) : null,
7992
community_board: null,
80-
construction_type: r.housing_type || r.construction_type || null,
81-
extended_affordability: false,
93+
construction_type: r.unit_type || null,
94+
extended_affordability: String(r.affordability_period || "").length > 0 && Number(r.affordability_period) >= 40,
8295
prevailing_wage: false,
83-
start_date: null,
84-
completion_date: r.year_built ? `${r.year_built}-01-01` : null,
96+
// Austin tracks affordability_start_date (when the regulatory
97+
// period begins, usually placement-in-service). Storing as
98+
// start_date lets the existing trends + expiring views work.
99+
start_date: parseDate(r.affordability_start_date),
100+
// affordability_expiration_date isn't a "completion" but it's the
101+
// closest thing to one we get here; storing it lets the +30y heuristic
102+
// in /api/expiring at least be in the right ballpark for Austin,
103+
// and the per-project page will show the real expiration date.
104+
completion_date: parseDate(r.affordability_expiration_date),
85105
buildings_count: 1,
86106
lat,
87107
lng,
88108
units_total: totalUnits,
89109
units_counted: totalUnits,
90110
units_rental: isRental ? totalUnits : 0,
91111
units_homeownership: isRental ? 0 : totalUnits,
92-
units_extremely_low: num(r.units_30_mfi ?? r.units_30_ami),
93-
units_very_low: num(r.units_50_mfi ?? r.units_50_ami),
94-
units_low: num(r.units_60_mfi ?? r.units_60_ami ?? r.units_80_mfi ?? r.units_80_ami),
95-
units_moderate: 0,
96-
units_middle: 0,
112+
units_extremely_low: num(r.mfi_20) + num(r.mfi_30),
113+
units_very_low: num(r.mfi_40) + num(r.mfi_50),
114+
units_low: num(r.mfi_60) + num(r.mfi_65) + num(r.mfi_70) + num(r.mfi_80),
115+
units_moderate: num(r.mfi_100),
116+
units_middle: num(r.mfi_120),
97117
units_other_income: 0,
98-
units_studio: num(r.efficiency_units ?? r.studio_units),
99-
units_1br: num(r.one_bedroom_units ?? r.units_1br),
100-
units_2br: num(r.two_bedroom_units ?? r.units_2br),
101-
units_3br: num(r.three_bedroom_units ?? r.units_3br),
102-
units_4plus_br: num(r.four_bedroom_units ?? r.units_4_plus_br),
118+
units_studio: 0,
119+
units_1br: 0,
120+
units_2br: 0,
121+
units_3br: 0,
122+
units_4plus_br: 0,
103123
};
104124
}
105125

scripts/load-bos.mjs

Lines changed: 95 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,16 @@
1-
// Boston Income-Restricted Housing loader. Boston's open-data portal
2-
// runs CKAN, not Socrata, so this loader uses the datastore_search API:
3-
//
4-
// https://data.boston.gov/api/3/action/datastore_search?resource_id=<id>
1+
// Boston Income-Restricted Housing loader.
52
//
63
// node scripts/load-bos.mjs
74
//
8-
// HEADS UP: Boston rotates the resource id on every dataset revision, so
9-
// confirm the current one before running. The "Income-Restricted Housing"
10-
// dataset landing page is:
11-
// https://data.boston.gov/dataset/income-restricted-housing
12-
// Click the resource on that page and copy the resource id from the URL.
13-
// Then set BOS_RESOURCE_ID in .env.local, or hardcode it in RESOURCE_ID
14-
// below.
5+
// Boston publishes the inventory in two places. The data.boston.gov CKAN
6+
// resource is purely tabular (no coordinates), so it can't be mapped.
7+
// We instead use the ArcGIS feature service "Parcels with Income-
8+
// Restricted Units" published by BPDA, which carries per-parcel
9+
// geometry. Each parcel is a polygon; we collapse it to a centroid for
10+
// the point geom column.
1511
//
16-
// Field mapping is best-effort against the columns Boston typically
17-
// exposes. If your run drops a lot of rows, log a sample feature and
18-
// adjust the keys in toProject().
12+
// Field truncations are real (Affordabl, Residenti, Stage_of, Complianc)
13+
// — ArcGIS shapefile-origin layers cap field names at 10 chars.
1914

2015
import { config as loadDotenv } from "dotenv";
2116
import pg from "pg";
@@ -29,9 +24,8 @@ if (!process.env.DATABASE_URL) {
2924
}
3025

3126
const CITY_ID = "bos";
32-
const RESOURCE_ID =
33-
process.env.BOS_RESOURCE_ID || "e7b3a8d8-b7f4-4c4f-8a1f-2b1c1d2e3f4a"; // placeholder; replace
34-
const ENDPOINT = "https://data.boston.gov/api/3/action/datastore_search";
27+
const ENDPOINT =
28+
"https://services.arcgis.com/sFnw0xNflSi8J0uh/arcgis/rest/services/Parcels_with_Income_Restricted_Units/FeatureServer/0/query";
3529

3630
const { Pool } = pg;
3731
const pool = new Pool({
@@ -50,67 +44,104 @@ function num(v) {
5044
async function fetchAll() {
5145
const out = [];
5246
let offset = 0;
53-
const limit = 1000;
5447
while (true) {
55-
const url = new URL(ENDPOINT);
56-
url.searchParams.set("resource_id", RESOURCE_ID);
57-
url.searchParams.set("limit", String(limit));
58-
url.searchParams.set("offset", String(offset));
59-
const resp = await fetch(url);
60-
if (!resp.ok) throw new Error(`ckan ${resp.status}: ${resp.statusText}`);
48+
const params = new URLSearchParams({
49+
where: "1=1",
50+
outFields: "*",
51+
outSR: "4326",
52+
returnGeometry: "true",
53+
f: "json",
54+
resultOffset: String(offset),
55+
resultRecordCount: "2000",
56+
});
57+
const resp = await fetch(`${ENDPOINT}?${params}`);
58+
if (!resp.ok) throw new Error(`arcgis ${resp.status}: ${resp.statusText}`);
6159
const json = await resp.json();
62-
if (!json.success) throw new Error(`ckan: ${JSON.stringify(json.error)}`);
63-
const records = json.result?.records ?? [];
64-
out.push(...records);
65-
if (records.length < limit) break;
66-
offset += records.length;
60+
const feats = json.features ?? [];
61+
out.push(...feats);
62+
if (!json.exceededTransferLimit || feats.length === 0) break;
63+
offset += feats.length;
6764
}
6865
return out;
6966
}
7067

71-
function toProject(r, i) {
72-
// Boston's column names are inconsistent across vintages. Try a few
73-
// plausible spellings and fall back to null.
74-
const lat = parseFloat(r.lat ?? r.latitude ?? r.y ?? r.LATITUDE);
75-
const lng = parseFloat(r.lng ?? r.lon ?? r.longitude ?? r.x ?? r.LONGITUDE);
76-
if (!Number.isFinite(lat) || !Number.isFinite(lng)) return null;
68+
function centroid(rings) {
69+
if (!rings?.length) return null;
70+
// Use the outer ring (first). Simple vertex-average; good enough for
71+
// urban parcels which are small + mostly convex.
72+
const ring = rings[0];
73+
if (!ring.length) return null;
74+
let sx = 0;
75+
let sy = 0;
76+
let n = 0;
77+
for (const [x, y] of ring) {
78+
if (Number.isFinite(x) && Number.isFinite(y)) {
79+
sx += x;
80+
sy += y;
81+
n += 1;
82+
}
83+
}
84+
if (n === 0) return null;
85+
return { lng: sx / n, lat: sy / n };
86+
}
87+
88+
function toProject(feat) {
89+
const a = feat.attributes || {};
90+
const c = centroid(feat.geometry?.rings);
91+
if (!c) return null;
7792

78-
const totalUnits = num(
79-
r.total_units ?? r.units ?? r.total_unit_count ?? r.income_restricted_units,
80-
);
81-
const isRental =
82-
String(r.tenure || r.type || "").toLowerCase().includes("rent") ||
83-
String(r.tenure || r.type || "").toLowerCase().includes("rental");
93+
// The full address field includes neighborhood/state/zip too; pull
94+
// those out into their own columns where possible.
95+
const addressRaw = (a.Address || "").trim();
96+
// "25 AMORY ST, Boston, MA, 02119" -> ["25 AMORY ST","Boston","MA","02119"]
97+
const parts = addressRaw.split(",").map((s) => s.trim()).filter(Boolean);
98+
const street = parts[0] || null;
99+
const zip = parts[parts.length - 1] && /^\d{5}$/.test(parts[parts.length - 1])
100+
? parts[parts.length - 1]
101+
: null;
102+
103+
const totalUnits = num(a.Residenti);
104+
const affordable = num(a.Affordabl);
105+
// Compliance text like "Rental Unit Group (R)" vs "Ownership ..."
106+
const isRental = String(a.Complianc || "").toLowerCase().includes("rent");
107+
108+
const name = a.Owner_1 && a.Owner_1.trim()
109+
? `${street ?? "(parcel)"} (${a.Owner_1.trim()})`
110+
: street || "(parcel)";
84111

85112
return {
86113
city_id: CITY_ID,
87-
// CKAN _id is per-row but stable; fall back to running index.
88-
external_id: String(r._id ?? r.id ?? r.parcel_id ?? `bos-${i}`),
89-
name: (r.project_name || r.name || r.development || r.address || "(unnamed)").toString().trim(),
90-
address: r.address || r.street_address || null,
91-
borough: r.neighborhood || r.planning_district || null,
92-
neighborhood: r.neighborhood || null,
93-
postcode: r.zip || r.zip_code || null,
94-
council_district: r.city_council_district || r.council_district || null,
114+
external_id: String(a.Parcel_ID ?? a.FID),
115+
name,
116+
address: street,
117+
borough: null,
118+
neighborhood: null,
119+
postcode: zip,
120+
council_district: null,
95121
community_board: null,
96-
construction_type: r.development_type || r.project_type || null,
122+
// Stage_of: e.g. "Under Construction", "Permitted", " " etc.
123+
construction_type:
124+
a.Stage_of && a.Stage_of.trim() && a.Stage_of !== "None"
125+
? a.Stage_of.trim()
126+
: null,
97127
extended_affordability: false,
98128
prevailing_wage: false,
99129
start_date: null,
100-
completion_date: r.year_built ? `${r.year_built}-01-01` : null,
130+
completion_date: null,
101131
buildings_count: 1,
102-
lat,
103-
lng,
104-
units_total: totalUnits,
105-
units_counted: totalUnits,
106-
units_rental: isRental ? totalUnits : 0,
107-
units_homeownership: isRental ? 0 : totalUnits,
132+
lat: c.lat,
133+
lng: c.lng,
134+
// Counted units = affordable units; total = whole-building unit count.
135+
units_total: totalUnits || affordable,
136+
units_counted: affordable,
137+
units_rental: isRental ? affordable : 0,
138+
units_homeownership: isRental ? 0 : affordable,
108139
units_extremely_low: 0,
109140
units_very_low: 0,
110141
units_low: 0,
111142
units_moderate: 0,
112143
units_middle: 0,
113-
units_other_income: totalUnits,
144+
units_other_income: affordable,
114145
units_studio: 0,
115146
units_1br: 0,
116147
units_2br: 0,
@@ -144,12 +175,8 @@ INSERT INTO projects (
144175
ON CONFLICT (city_id, external_id) DO UPDATE SET
145176
name = EXCLUDED.name,
146177
address = EXCLUDED.address,
147-
borough = EXCLUDED.borough,
148-
neighborhood = EXCLUDED.neighborhood,
149178
postcode = EXCLUDED.postcode,
150-
council_district = EXCLUDED.council_district,
151179
construction_type = EXCLUDED.construction_type,
152-
completion_date = EXCLUDED.completion_date,
153180
geom = EXCLUDED.geom,
154181
units_total = EXCLUDED.units_total,
155182
units_counted = EXCLUDED.units_counted,
@@ -173,7 +200,7 @@ async function ensureCity(client) {
173200
42.3601,
174201
-71.0589,
175202
12,
176-
"Analyze Boston: Income-Restricted Housing",
203+
"BPDA Parcels with Income-Restricted Units",
177204
"https://data.boston.gov/dataset/income-restricted-housing",
178205
],
179206
);
@@ -183,11 +210,11 @@ async function main() {
183210
const client = await pool.connect();
184211
try {
185212
await ensureCity(client);
186-
console.log(`fetching Boston records from CKAN resource ${RESOURCE_ID}...`);
187-
const records = await fetchAll();
188-
console.log(`got ${records.length} records`);
189-
const projects = records.map(toProject).filter(Boolean);
190-
console.log(`upserting ${projects.length} with valid coordinates...`);
213+
console.log("fetching Boston BPDA parcels...");
214+
const features = await fetchAll();
215+
console.log(`got ${features.length} features`);
216+
const projects = features.map(toProject).filter(Boolean);
217+
console.log(`upserting ${projects.length} parcels with usable geometry...`);
191218

192219
await client.query("BEGIN");
193220
for (const p of projects) {

0 commit comments

Comments
 (0)