Skip to content

Commit 4126e2c

Browse files
committed
database/bulk-delete: delete many rows without overwhelming the DB
1 parent af28b7f commit 4126e2c

File tree

2 files changed

+60
-30
lines changed

2 files changed

+60
-30
lines changed

src/packages/database/postgres/bulk-delete.test.ts

+25-3
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,14 @@ describe("bulk delete", () => {
2020
const p = getPool();
2121
const project_id = uuid();
2222
const N = 2000;
23+
24+
// extra entry, which has to remain
25+
const other = uuid();
26+
await p.query(
27+
"INSERT INTO project_log (id, project_id, time) VALUES($1::UUID, $2::UUID, $3::TIMESTAMP)",
28+
[other, uuid(), new Date()],
29+
);
30+
2331
for (let i = 0; i < N; i++) {
2432
await p.query(
2533
"INSERT INTO project_log (id, project_id, time) VALUES($1::UUID, $2::UUID, $3::TIMESTAMP)",
@@ -33,17 +41,31 @@ describe("bulk delete", () => {
3341
);
3442
expect(num1.rows[0].num).toEqual(N);
3543

36-
await bulk_delete({
44+
const res = await bulk_delete({
3745
table: "project_log",
3846
field: "project_id",
3947
value: project_id,
40-
limit: 100,
48+
limit: 128,
4149
});
4250

51+
// if this ever fails, the "ret.rowCount" value is inaccurate.
52+
// This must be replaced by "RETURNING 1" in the the query and a "SELECT COUNT(*) ..." and so.
53+
// (and not only here, but everywhere in the code base)
54+
expect(res.rowsDeleted).toEqual(N);
55+
expect(res.durationS).toBeGreaterThan(0.01);
56+
expect(res.totalPgTimeS).toBeGreaterThan(0.001);
57+
expect(res.totalWaitS).toBeGreaterThan(0.001);
58+
expect((res.totalPgTimeS * 10) / res.totalWaitS).toBeGreaterThan(0.5);
59+
4360
const num2 = await p.query(
4461
"SELECT COUNT(*)::INT as num FROM project_log WHERE project_id = $1",
4562
[project_id],
4663
);
4764
expect(num2.rows[0].num).toEqual(0);
48-
});
65+
66+
const otherRes = await p.query("SELECT * FROM project_log WHERE id = $1", [
67+
other,
68+
]);
69+
expect(otherRes.rows[0].id).toEqual(other);
70+
}, 10000);
4971
});
+35-27
Original file line numberDiff line numberDiff line change
@@ -1,73 +1,81 @@
1+
// see packages/database/pool/pool.ts for where this name is also hard coded:
2+
process.env.PGDATABASE = "smc_ephemeral_testing_database";
3+
14
import { escapeIdentifier } from "pg";
25

36
import getPool from "@cocalc/database/pool";
47
import { SCHEMA } from "@cocalc/util/schema";
58

69
interface Opts {
7-
table: string;
10+
table: string; // e.g. project_log, etc.
811
field: "project_id" | "account_id"; // for now, we only support a few
12+
id?: string; // default "id", the ID field in the table, which identifies each row uniquely
913
value: string; // a UUID
10-
limit?: number;
14+
limit?: number; // default 1024
15+
maxUtilPct?: number; // 0-100, percent
1116
}
1217

1318
type Ret = Promise<{
1419
rowsDeleted: number;
1520
durationS: number;
21+
totalWaitS: number;
22+
totalPgTimeS: number;
1623
}>;
1724

18-
function deleteQuery(table: string, field: string) {
25+
function deleteQuery(table: string, field: string, id: string) {
1926
const T = escapeIdentifier(table);
2027
const F = escapeIdentifier(field);
28+
const ID = escapeIdentifier(id);
2129

2230
return `
2331
DELETE FROM ${T}
24-
WHERE ${F} IN (
25-
SELECT ${F} FROM ${T} WHERE ${F} = $1 LIMIT $2
26-
)
27-
RETURNING 1
28-
`;
32+
WHERE ${ID} IN (
33+
SELECT ${ID} FROM ${T} WHERE ${F} = $1 LIMIT $2
34+
)`;
2935
}
3036

3137
export async function bulk_delete(opts: Opts): Ret {
32-
const { table, field, value } = opts;
33-
let { limit = 1000 } = opts;
38+
const { table, field, value, id = "id", maxUtilPct = 10 } = opts;
39+
let { limit = 1024 } = opts;
3440
// assert table name is a key in SCHEMA
3541
if (!(table in SCHEMA)) {
3642
throw new Error(`table ${table} does not exist`);
3743
}
3844

39-
const q = deleteQuery(table, field);
40-
console.log(q);
41-
console.log(opts);
45+
if (maxUtilPct < 1 || maxUtilPct > 99) {
46+
throw new Error(`maxUtilPct must be between 1 and 99`);
47+
}
4248

49+
const q = deleteQuery(table, field, id);
4350
const pool = getPool();
44-
4551
const start_ts = Date.now();
46-
let rowsDeleted = 0;
4752

53+
let rowsDeleted = 0;
54+
let totalWaitS = 0;
55+
let totalPgTimeS = 0;
4856
while (true) {
4957
const t0 = Date.now();
5058
const ret = await pool.query(q, [value, limit]);
51-
const td = Date.now() - t0;
59+
const dt = (Date.now() - t0) / 1000;
5260
rowsDeleted += ret.rowCount ?? 0;
61+
totalPgTimeS += dt;
5362

54-
// adjust the limit
55-
const next = Math.round(
56-
td > 0.1 ? limit / 2 : td < 0.05 ? limit * 2 : limit,
57-
);
58-
limit = Math.max(1, Math.min(10000, next));
63+
// adjust the limit: we aim to keep the operation between 0.1 and 0.2 secs
64+
const next = dt > 0.2 ? limit / 2 : dt < 0.1 ? limit * 2 : limit;
65+
limit = Math.max(1, Math.min(32768, Math.round(next)));
5966

6067
// wait for a bit, but not more than 1 second ~ this aims for a max utilization of 10%
61-
const wait_ms = Math.min(1000, td * 10);
62-
await new Promise((done) => setTimeout(done, wait_ms));
68+
const waitS = Math.min(1, dt * ((100 - maxUtilPct) / maxUtilPct));
69+
await new Promise((done) => setTimeout(done, 1000 * waitS));
70+
totalWaitS += waitS;
6371

64-
console.log(
65-
`loop: deleted ${ret.rowCount} | wait=${wait_ms} | limit=${limit}`,
66-
);
72+
// console.log(
73+
// `deleted ${ret.rowCount} | dt=${dt} | wait=${waitS} | limit=${limit}`,
74+
// );
6775

6876
if (ret.rowCount === 0) break;
6977
}
7078

7179
const durationS = (Date.now() - start_ts) / 1000;
72-
return { durationS, rowsDeleted };
80+
return { durationS, rowsDeleted, totalWaitS, totalPgTimeS };
7381
}

0 commit comments

Comments
 (0)