Skip to content

Commit 7b60aeb

Browse files
committed
Add Postgres FDW Large queries
Currently dupes the regular Postgres queries but replaces the some of the table names to use the ClickHouse materialized views. Update the `SelectArea()` function to enable the medium/large buttons for both FDW and CLickHouse, and update `getQuery()` to fetch the large FDW queries when wanted. Add DDL to `schemas/postgres.sql` to create the `clickhouse_fdw` extension and import the tables from the demo database so that they're accessible to HouseClick. This won't work without `clickhouse_fdw`, so add a placeholder to `README.md` for forthcoming installation instructions. While at it, remove some trailing spaces from files and restore the `EXTRACT()` function to the Postgres queries, since it was fixed by ClickHouse/pg_clickhouse#31.
1 parent bcbc782 commit 7b60aeb

5 files changed

Lines changed: 178 additions & 32 deletions

File tree

README.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,10 @@ After the script execution, the data will be available in the `data/uk_house_lis
118118

119119
# Load data into PostgreSQL
120120

121+
## Install clickhouse_fdw
122+
123+
TK
124+
121125
## Create tables
122126

123127
Execute this command to create the tables:

app/components/analytics/SelectArea.tsx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -305,7 +305,7 @@ export default function SelectArea({ postCode, district, town, database, dataset
305305
<button
306306
disabled={database === 'postgres'}
307307
className={`flex grow items-center gap-1 px-3 py-1
308-
${dataset === 'large' && database === 'clickhouse'
308+
${dataset === 'large' && database !== 'postgres'
309309
? 'bg-[#2D2D2D] rounded-lg border border-[#FAFF69] text-white'
310310
: 'border rounded-md bg-transparent border-transparent'}
311311
${database === 'postgres' ? 'opacity-50 cursor-not-allowed' : 'hover:cursor-pointer'}

app/lib/analytics.ts

Lines changed: 21 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import { createClient, DataFormat } from '@clickhouse/client'
22
import { Pool } from 'pg';
33
import { AnalyticFilter } from "@/lib/types";
4-
import { postgreSQLQueries, clickHouseQueries, clickHouseQueriesLarge } from "@/lib/analytics_queries";
4+
import { postgreSQLQueries, postgresFDWQueriesLarge, clickHouseQueries, clickHouseQueriesLarge } from "@/lib/analytics_queries";
55
import { getDbSelection, getDatasetSelection } from './db-context'
66

77
const clickhouse = createClient({
@@ -85,7 +85,7 @@ export async function getHouseSalesComparison({ town, district, postcode }: { to
8585

8686
export async function getPriceEvolution({ town, district, postcode }: { town: string, district: string, postcode: string }) {
8787
const query = getQuery('priceOverTime');
88-
88+
8989
const condition = buildCondition(town, district, postcode);
9090
const data = await runQuery('priceOverTime', query(condition))
9191
return data;
@@ -175,7 +175,7 @@ export async function getPriceByType( { town, district, postcode }: { town: stri
175175

176176

177177
export async function getHouseSoldOverTime({ town, district, postcode }: { town: string, district: string, postcode: string }) {
178-
178+
179179
let conditions = [];
180180
let column = null
181181
let queryName = 'soldOverTimeNoFilter'
@@ -206,14 +206,14 @@ async function runQuery(name: string, query: string) {
206206
const dataset = getDatasetSelection()
207207
const start_time = performance.now();
208208
let queryString = query
209-
209+
210210
// Create a promise that will reject after the timeout period
211211
const timeout = (ms: number) => {
212212
return new Promise((_, reject) => {
213213
setTimeout(() => reject(new Error(`Query execution timed out after ${ms/1000} seconds`)), ms);
214214
});
215215
};
216-
216+
217217
// Create the actual query promise
218218
const executeQuery = async () => {
219219
let rawResults: Record<string, any>[];
@@ -226,7 +226,7 @@ async function runQuery(name: string, query: string) {
226226
})
227227
rawResults = await results.json();
228228
} else if (database === 'postgres' || database === 'fdw') {
229-
const schema = database === 'fdw' ? 'fdw' : 'public'
229+
const schema = database === 'fdw' ? 'uk' : 'public'
230230
queryString = queryString.replace(/FROM\s+uk/, `FROM ${schema}.uk`)
231231
try {
232232
const result = await pool.query(queryString);
@@ -239,7 +239,7 @@ async function runQuery(name: string, query: string) {
239239
}
240240
return rawResults;
241241
};
242-
242+
243243
let rawResults: Record<string, any>[];
244244
try {
245245
// Race the query execution against the timeout
@@ -252,7 +252,7 @@ async function runQuery(name: string, query: string) {
252252
console.error(`Error executing query "${name}":`, error.message);
253253
throw error;
254254
}
255-
255+
256256
const endTime = performance.now();
257257
const elapsedTime = endTime - start_time;
258258
console.log(`Database: ${database} query: ${name} Execution time: ${elapsedTime} ms`)
@@ -267,15 +267,22 @@ function getQuery(queryName: string) {
267267
if (database === 'clickhouse') {
268268
if (dataset === 'large') {
269269
const query = clickHouseQueriesLarge[queryName]
270-
return query;
271-
} else {
270+
return query;
271+
} else {
272272
const query = clickHouseQueries[queryName]
273-
return query;
274-
}
275-
276-
} else if (database === 'postgres' || database === 'fdw') {
273+
return query;
274+
}
275+
} else if (database === 'postgres') {
277276
const query = postgreSQLQueries[queryName]
278277
return query;
278+
} else if (database === 'fdw') {
279+
if (dataset === 'large') {
280+
const query = postgresFDWQueriesLarge[queryName]
281+
return query;
282+
} else {
283+
const query = postgreSQLQueries[queryName]
284+
return query;
285+
}
279286
} else {
280287
throw new Error('Unsupported database type')
281288
}

app/lib/analytics_queries.ts

Lines changed: 136 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -32,11 +32,11 @@ export const postgreSQLQueries: Record<string, Function> = {
3232
priceIncrease: (condition: string) => `SELECT
3333
round(avg(price) FILTER (WHERE ${condition})) AS filter_avg,
3434
round(avg(price)) AS avg,
35-
date_part('year', date) AS year
35+
EXTRACT(YEAR FROM date) AS year
3636
FROM uk_price_paid
3737
GROUP BY year
3838
ORDER BY year ASC;`,
39-
getRanks: (condition: string, quantiles: string) => `SELECT
39+
getRanks: (condition: string, quantiles: string) => `SELECT
4040
percentile_cont(ARRAY[${quantiles}]) WITHIN GROUP (ORDER BY price) AS quantiles,
4141
round(avg(price) FILTER (WHERE ${condition})) AS filtered_avg,
4242
round(avg(price)) AS avg
@@ -54,7 +54,7 @@ export const postgreSQLQueries: Record<string, Function> = {
5454
round(count(*)::numeric / COUNT(DISTINCT ${column}))::int AS count
5555
FROM uk_price_paid
5656
GROUP BY type;`,
57-
soldByPeriod: (condition: string) => `SELECT
57+
soldByPeriod: (condition: string) => `SELECT
5858
count(*) FILTER (WHERE date >= current_date - INTERVAL '6 months') AS "6",
5959
count(*) FILTER (WHERE date >= current_date - INTERVAL '12 months' AND date < current_date - INTERVAL '6 months') AS "12",
6060
count(*) FILTER (WHERE date >= current_date - INTERVAL '18 months' AND date < current_date - INTERVAL '12 months') AS "18",
@@ -76,17 +76,17 @@ export const postgreSQLQueries: Record<string, Function> = {
7676
FROM uk_price_paid
7777
GROUP BY type;`,
7878
salesByDayPreviousYear: (condition: string) => `SELECT
79-
date_part('year', date) AS year,
79+
EXTRACT(YEAR FROM date) AS year,
8080
date_trunc('day', date) AS day,
8181
count(*) AS c
8282
FROM uk_price_paid
8383
WHERE ${condition}
8484
AND date >= date_trunc('year', current_date) - INTERVAL '1 year'
8585
AND date < date_trunc('year', current_date)
8686
GROUP BY year, day
87-
ORDER BY year ASC, day ASC;`,
87+
ORDER BY year ASC, day ASC;`,
8888
salesByDayCurrentYear: (condition: string) => `SELECT
89-
date_part('year', date) AS year,
89+
EXTRACT(YEAR FROM date) AS year,
9090
date_trunc('day', date) AS day,
9191
count(*) AS c
9292
FROM uk_price_paid
@@ -111,15 +111,134 @@ export const postgreSQLQueries: Record<string, Function> = {
111111
round(percentile_cont(0.99) WITHIN GROUP (ORDER BY price)) AS max_price
112112
FROM uk_price_paid
113113
WHERE date > (current_date - interval '18 months') AND ${condition}`,
114-
getPopularTowns: (district: string) => `SELECT
114+
getPopularTowns: (district: string) => `SELECT
115115
town, count(*) as popularity FROM uk_price_paid WHERE district = '${district}' GROUP BY district, town ORDER BY popularity DESC LIMIT 10`,
116-
getPopularDistricts: () => `SELECT
116+
getPopularDistricts: () => `SELECT
117117
district, count(*) as popularity FROM uk_price_paid GROUP BY district ORDER BY popularity DESC LIMIT 10`,
118-
getPopularPostcodes: (town: string, district: string) => `SELECT
118+
getPopularPostcodes: (town: string, district: string) => `SELECT
119119
postcode1, count(*) as popularity FROM uk_price_paid WHERE town = '${town}' AND district = '${district}' AND postcode1 != '' GROUP BY district, town, postcode1 ORDER BY popularity DESC LIMIT 10`,
120120
getHouseSales: (condition: string) => `SELECT count(1) FILTER (WHERE ${condition})::int AS area_count, count(*) as national_count FROM uk_price_paid`,
121121
}
122122

123+
export const postgresFDWQueriesLarge: Record<string, Function> = {
124+
soldOverTime: (condition: string, column: string) => `SELECT
125+
year,
126+
COUNT(count_state) FILTER (WHERE ${condition})::INT AS filtered_count,
127+
ROUND(COUNT(count_state)::NUMERIC / COUNT(DISTINCT ${column}))::INT AS count
128+
FROM houseclick.uk_price_paid_yearly_agg
129+
GROUP BY year
130+
ORDER BY year ASC;`,
131+
soldOverTimeNoFilter: () => `SELECT
132+
DATE_TRUNC('year', date)::date AS year,
133+
ROUND(COUNT(*)::NUMERIC / COUNT(DISTINCT town))::INT AS count
134+
FROM uk_price_paid_synthetic
135+
GROUP BY year
136+
ORDER BY year ASC;`,
137+
priceOverTime: (condition: string) => `SELECT
138+
date_trunc('month', date)::date AS month,
139+
round(avg(price) FILTER (WHERE ${condition})) AS filter_price,
140+
round(avg(price)) AS avg
141+
FROM uk_price_paid_synthetic
142+
GROUP BY month
143+
ORDER BY month ASC;`,
144+
stats: (condition: string) => `SELECT
145+
avg(price) AS avg,
146+
percentile_cont(0.5) WITHIN GROUP (ORDER BY price) AS median,
147+
percentile_cont(0.95) WITHIN GROUP (ORDER BY price) AS "95th",
148+
percentile_cont(0.99) WITHIN GROUP (ORDER BY price) AS "99th",
149+
count(*) AS sold
150+
FROM uk_price_paid_synthetic
151+
WHERE date > current_date - INTERVAL '6 months' AND ${condition};`,
152+
priceIncrease: (condition: string) => `SELECT
153+
round(avg(avg_price_state) FILTER (WHERE ${condition})) AS filter_avg,
154+
round(avg(avg_price_state)) AS avg,
155+
year
156+
FROM houseclick.uk_price_paid_yearly_agg
157+
GROUP BY year
158+
ORDER BY year ASC;`,
159+
getRanks: (condition: string, quantiles: string) => `SELECT
160+
percentile_cont(ARRAY[${quantiles}]) WITHIN GROUP (ORDER BY price) AS quantiles,
161+
round(avg(price) FILTER (WHERE ${condition})) AS filtered_avg,
162+
round(avg(price)) AS avg
163+
FROM uk_price_paid_synthetic
164+
WHERE date > current_date - INTERVAL '6 months';`,
165+
numberByDuration: (condition: string) => `SELECT
166+
duration as name,
167+
count(1) FILTER (WHERE ${condition})::int AS value
168+
FROM uk_price_paid_synthetic
169+
WHERE duration = 'freehold' OR duration = 'leasehold'
170+
GROUP BY duration;`,
171+
numberByType: (condition: string, column: string) => `SELECT
172+
type,
173+
count(1) FILTER (WHERE ${condition})::int AS filtered_count,
174+
round(count(*)::numeric / COUNT(DISTINCT ${column}))::int AS count
175+
FROM houseclick.uk_price_paid_type_agg
176+
GROUP BY type;`,
177+
soldByPeriod: (condition: string) => `SELECT
178+
count(*) FILTER (WHERE date >= current_date - INTERVAL '6 months') AS "6",
179+
count(*) FILTER (WHERE date >= current_date - INTERVAL '12 months' AND date < current_date - INTERVAL '6 months') AS "12",
180+
count(*) FILTER (WHERE date >= current_date - INTERVAL '18 months' AND date < current_date - INTERVAL '12 months') AS "18",
181+
count(*) FILTER (WHERE date >= current_date - INTERVAL '24 months' AND date < current_date - INTERVAL '18 months') AS "24"
182+
FROM uk_price_paid_synthetic
183+
WHERE ${condition};`,
184+
priceByType: (condition: string) => `SELECT
185+
type,
186+
round(min(min_price_state)) + 100 AS min,
187+
round(min(min_price_state) FILTER (WHERE ${condition})) AS min_filtered,
188+
round(max(max_price_state)) AS max,
189+
round(max(max_price_state) FILTER (WHERE ${condition})) AS max_filtered,
190+
round(percentile_cont(0.5) WITHIN GROUP (ORDER BY quantile_price_state)) AS median,
191+
round(percentile_cont(0.5) WITHIN GROUP (ORDER BY quantile_price_state) FILTER (WHERE ${condition})) AS median_filtered,
192+
round(percentile_cont(0.25) WITHIN GROUP (ORDER BY quantile_price_state)) AS "25th",
193+
round(percentile_cont(0.25) WITHIN GROUP (ORDER BY quantile_price_state) FILTER (WHERE ${condition})) AS "25th_filtered",
194+
round(percentile_cont(0.75) WITHIN GROUP (ORDER BY quantile_price_state)) AS "75th",
195+
round(percentile_cont(0.75) WITHIN GROUP (ORDER BY quantile_price_state) FILTER (WHERE ${condition})) AS "75th_filtered"
196+
FROM houseclick.uk_price_paid_type_agg
197+
GROUP BY type;`,
198+
salesByDayPreviousYear: (condition: string) => `SELECT
199+
EXTRACT(YEAR FROM date) AS year,
200+
date_trunc('day', date) AS day,
201+
count(*) AS c
202+
FROM uk_price_paid_synthetic
203+
WHERE ${condition}
204+
AND date >= date_trunc('year', current_date) - INTERVAL '1 year'
205+
AND date < date_trunc('year', current_date)
206+
GROUP BY year, day
207+
ORDER BY year ASC, day ASC;`,
208+
salesByDayCurrentYear: (condition: string) => `SELECT
209+
EXTRACT(YEAR FROM date) AS year,
210+
date_trunc('day', date) AS day,
211+
count(*) AS c
212+
FROM uk_price_paid_synthetic
213+
WHERE ${condition}
214+
AND date >= date_trunc('year', current_date)
215+
GROUP BY year, day
216+
ORDER BY year ASC, day ASC;`,
217+
soldByDuration: (condition: string) => `SELECT
218+
duration AS name,
219+
count(*) AS value
220+
FROM uk_price_paid_synthetic
221+
WHERE duration != 'unknown' AND ${condition}
222+
GROUP BY duration;`,
223+
soldByDurationNoFilter: (condition: string) => `SELECT
224+
duration AS name,
225+
count(*) AS value
226+
FROM uk_price_paid_synthetic
227+
WHERE duration != 'unknown'
228+
GROUP BY duration;`,
229+
getMinMax: (condition: string) => `SELECT
230+
round(percentile_cont(0.01) WITHIN GROUP (ORDER BY price)) AS min_price,
231+
round(percentile_cont(0.99) WITHIN GROUP (ORDER BY price)) AS max_price
232+
FROM uk_price_paid_synthetic
233+
WHERE date > (current_date - interval '18 months') AND ${condition}`,
234+
getPopularTowns: (district: string) => `SELECT
235+
town, count(*) as popularity FROM uk_price_paid_synthetic WHERE district = '${district}' GROUP BY district, town ORDER BY popularity DESC LIMIT 10`,
236+
getPopularDistricts: () => `SELECT
237+
district, count(*) as popularity FROM uk_price_paid_synthetic GROUP BY district ORDER BY popularity DESC LIMIT 10`,
238+
getPopularPostcodes: (town: string, district: string) => `SELECT
239+
postcode1, count(*) as popularity FROM uk_price_paid_synthetic WHERE town = '${town}' AND district = '${district}' AND postcode1 != '' GROUP BY district, town, postcode1 ORDER BY popularity DESC LIMIT 10`,
240+
getHouseSales: (condition: string) => `SELECT count(1) FILTER (WHERE ${condition})::int AS area_count, count(*) as national_count FROM uk_price_paid_synthetic`,
241+
}
123242

124243
export const clickHouseQueries: Record<string, Function> = {
125244
soldOverTime: (condition: string, column: string) => `SELECT
@@ -200,7 +319,7 @@ export const clickHouseQueries: Record<string, Function> = {
200319
day
201320
ORDER BY
202321
year ASC WITH FILL,
203-
day ASC WITH FILL FROM toUnixTimestamp(CAST(toStartOfYear(now() - toIntervalYear(1)), 'DateTime')) TO toUnixTimestamp(CAST(toStartOfYear(now()), 'DateTime')) STEP toIntervalDay(1)`,
322+
day ASC WITH FILL FROM toUnixTimestamp(CAST(toStartOfYear(now() - toIntervalYear(1)), 'DateTime')) TO toUnixTimestamp(CAST(toStartOfYear(now()), 'DateTime')) STEP toIntervalDay(1)`,
204323
salesByDayCurrentYear: (condition: string) => `SELECT
205324
toYear(date) AS year,
206325
toStartOfDay(date) AS day,
@@ -220,11 +339,11 @@ export const clickHouseQueries: Record<string, Function> = {
220339
round(quantile(0.99)(price)) AS \`max_price\`
221340
FROM uk.uk_price_paid
222341
WHERE date > (now() - toIntervalMonth(18)) AND ${condition}`,
223-
getPopularTowns: (district: string) => `SELECT
342+
getPopularTowns: (district: string) => `SELECT
224343
town, count() as popularity FROM uk.uk_price_paid WHERE district = '${district}' GROUP BY town ORDER BY popularity DESC LIMIT 10`,
225-
getPopularDistricts: () => `SELECT
344+
getPopularDistricts: () => `SELECT
226345
district, count() as popularity FROM uk.uk_price_paid GROUP BY district ORDER BY popularity DESC LIMIT 10`,
227-
getPopularPostcodes: (town: string, district: string) => `SELECT
346+
getPopularPostcodes: (town: string, district: string) => `SELECT
228347
postcode1, count() as popularity FROM uk.uk_price_paid WHERE town = '${town}' AND district = '${district}' AND postcode1 != '' GROUP BY postcode1 ORDER BY popularity DESC LIMIT 10`,
229348
getPriceEvolution: (condition: string) => `SELECT
230349
toStartOfMonth(date) AS month,
@@ -315,7 +434,7 @@ export const clickHouseQueriesLarge: Record<string, Function> = {
315434
day
316435
ORDER BY
317436
year ASC WITH FILL,
318-
day ASC WITH FILL FROM toUnixTimestamp(CAST(toStartOfYear(now() - toIntervalYear(1)), 'DateTime')) TO toUnixTimestamp(CAST(toStartOfYear(now()), 'DateTime')) STEP toIntervalDay(1)`,
437+
day ASC WITH FILL FROM toUnixTimestamp(CAST(toStartOfYear(now() - toIntervalYear(1)), 'DateTime')) TO toUnixTimestamp(CAST(toStartOfYear(now()), 'DateTime')) STEP toIntervalDay(1)`,
319438
salesByDayCurrentYear: (condition: string) => `SELECT
320439
toYear(date) AS year,
321440
toStartOfDay(date) AS day,
@@ -335,11 +454,11 @@ export const clickHouseQueriesLarge: Record<string, Function> = {
335454
round(quantile(0.99)(price)) AS \`max_price\`
336455
FROM uk.uk_price_paid_synthetic
337456
WHERE date > (now() - toIntervalMonth(18)) AND ${condition}`,
338-
getPopularTowns: (district: string) => `SELECT
457+
getPopularTowns: (district: string) => `SELECT
339458
town, count() as popularity FROM uk.uk_price_paid_synthetic WHERE district = '${district}' GROUP BY town ORDER BY popularity DESC LIMIT 10`,
340-
getPopularDistricts: () => `SELECT
459+
getPopularDistricts: () => `SELECT
341460
district, count() as popularity FROM uk.uk_price_paid_synthetic GROUP BY district ORDER BY popularity DESC LIMIT 10`,
342-
getPopularPostcodes: (town: string, district: string) => `SELECT
461+
getPopularPostcodes: (town: string, district: string) => `SELECT
343462
postcode1, count() as popularity FROM uk.uk_price_paid_synthetic WHERE town = '${town}' AND district = '${district}' AND postcode1 != '' GROUP BY postcode1 ORDER BY popularity DESC LIMIT 10`,
344463
getPriceEvolution: (condition: string) => `SELECT
345464
toStartOfMonth(date) AS month,

0 commit comments

Comments
 (0)