Skip to content

Commit 3bcabeb

Browse files
committed
database/cleanup: WIP bulk delete
1 parent b546a4d commit 3bcabeb

File tree

2 files changed

+122
-0
lines changed

2 files changed

+122
-0
lines changed
Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
/*
2+
* This file is part of CoCalc: Copyright © 2024 Sagemath, Inc.
3+
* License: AGPLv3 s.t. "Commons Clause" – see LICENSE.md for details
4+
*/
5+
6+
import getPool, { initEphemeralDatabase } from "@cocalc/database/pool";
7+
import { uuid } from "@cocalc/util/misc";
8+
import { bulk_delete } from "./bulk-delete";
9+
10+
beforeAll(async () => {
11+
await initEphemeralDatabase();
12+
}, 15000);
13+
14+
afterAll(async () => {
15+
await getPool().end();
16+
});
17+
18+
describe("bulk delete", () => {
19+
test("deleting projects", async () => {
20+
const p = getPool();
21+
const project_id = uuid();
22+
const N = 2000;
23+
for (let i = 0; i < N; i++) {
24+
await p.query(
25+
"INSERT INTO project_log (id, project_id, time) VALUES($1::UUID, $2::UUID, $3::TIMESTAMP)",
26+
[uuid(), project_id, new Date()],
27+
);
28+
}
29+
30+
const num1 = await p.query(
31+
"SELECT COUNT(*)::INT as num FROM project_log WHERE project_id = $1",
32+
[project_id],
33+
);
34+
expect(num1.rows[0].num).toEqual(N);
35+
36+
await bulk_delete({
37+
table: "project_log",
38+
field: "project_id",
39+
value: project_id,
40+
limit: 100,
41+
});
42+
43+
const num2 = await p.query(
44+
"SELECT COUNT(*)::INT as num FROM project_log WHERE project_id = $1",
45+
[project_id],
46+
);
47+
expect(num2.rows[0].num).toEqual(0);
48+
});
49+
});
Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
import { escapeIdentifier } from "pg";
2+
3+
import getPool from "@cocalc/database/pool";
4+
import { SCHEMA } from "@cocalc/util/schema";
5+
6+
interface Opts {
7+
table: string;
8+
field: "project_id" | "account_id"; // for now, we only support a few
9+
value: string; // a UUID
10+
limit?: number;
11+
}
12+
13+
type Ret = Promise<{
14+
rowsDeleted: number;
15+
durationS: number;
16+
}>;
17+
18+
function deleteQuery(table: string, field: string) {
19+
const T = escapeIdentifier(table);
20+
const F = escapeIdentifier(field);
21+
22+
return `
23+
DELETE FROM ${T}
24+
WHERE ${F} IN (
25+
SELECT ${F} FROM ${T} WHERE ${F} = $1 LIMIT $2
26+
)
27+
RETURNING 1
28+
`;
29+
}
30+
31+
export async function bulk_delete(opts: Opts): Ret {
32+
const { table, field, value } = opts;
33+
let { limit = 1000 } = opts;
34+
// assert table name is a key in SCHEMA
35+
if (!(table in SCHEMA)) {
36+
throw new Error(`table ${table} does not exist`);
37+
}
38+
39+
const q = deleteQuery(table, field);
40+
console.log(q);
41+
console.log(opts);
42+
43+
const pool = getPool();
44+
45+
const start_ts = Date.now();
46+
let rowsDeleted = 0;
47+
48+
while (true) {
49+
const t0 = Date.now();
50+
const ret = await pool.query(q, [value, limit]);
51+
const td = Date.now() - t0;
52+
rowsDeleted += ret.rowCount ?? 0;
53+
54+
// adjust the limit
55+
const next = Math.round(
56+
td > 0.1 ? limit / 2 : td < 0.05 ? limit * 2 : limit,
57+
);
58+
limit = Math.max(1, Math.min(10000, next));
59+
60+
// wait for a bit, but not more than 1 second ~ this aims for a max utilization of 10%
61+
const wait_ms = Math.min(1000, td * 10);
62+
await new Promise((done) => setTimeout(done, wait_ms));
63+
64+
console.log(
65+
`loop: deleted ${ret.rowCount} | wait=${wait_ms} | limit=${limit}`,
66+
);
67+
68+
if (ret.rowCount === 0) break;
69+
}
70+
71+
const durationS = (Date.now() - start_ts) / 1000;
72+
return { durationS, rowsDeleted };
73+
}

0 commit comments

Comments
 (0)