@@ -9,11 +9,12 @@ Code related to permanently deleting projects.
9
9
10
10
import getLogger from "@cocalc/backend/logger" ;
11
11
import getPool from "@cocalc/database/pool" ;
12
- import { callback2 } from "@cocalc/util/async-utils" ;
13
- import { PostgreSQL } from "./types" ;
14
- import { minutes_ago } from "@cocalc/util/misc" ;
15
12
import { getServerSettings } from "@cocalc/database/settings" ;
13
+ import { callback2 } from "@cocalc/util/async-utils" ;
16
14
import { KUCALC_ON_PREMISES } from "@cocalc/util/db-schema/site-defaults" ;
15
+ import { minutes_ago } from "@cocalc/util/misc" ;
16
+ import { bulk_delete } from "./bulk-delete" ;
17
+ import { PostgreSQL } from "./types" ;
17
18
18
19
const log = getLogger ( "db:delete-projects" ) ;
19
20
@@ -59,8 +60,9 @@ async function get_account_id(
59
60
}
60
61
61
62
/*
62
- This deletes all projects older than the given number of days, from the perspective of a user.
63
- Another task has to run to actually get rid of the data, etc.
63
+ This removes all users from all projects older than the given number of days and marked as deleted.
64
+ In particular, users are no longer able to access that project.
65
+ The "cleanup_old_projects_data" function has to run to actually get rid of the data, etc.
64
66
*/
65
67
export async function unlink_old_deleted_projects (
66
68
db : PostgreSQL ,
@@ -70,7 +72,7 @@ export async function unlink_old_deleted_projects(
70
72
query : "UPDATE projects" ,
71
73
set : { users : null } ,
72
74
where : [
73
- "deleted = true" ,
75
+ "deleted = true" ,
74
76
"users IS NOT NULL" ,
75
77
`last_edited <= NOW() - '${ age_d } days'::INTERVAL` ,
76
78
] ,
@@ -83,27 +85,32 @@ FROM projects as p
83
85
INNER JOIN syncstrings as s
84
86
ON p.project_id = s.project_id
85
87
WHERE p.deleted = true
88
+ AND users IS NULL
86
89
AND p.state ->> 'state' != 'deleted'
90
+ ORDER BY
91
+ p.project_id, s.string_id
87
92
` ;
88
93
89
94
/*
90
- This is more thorough than the above. It issues actual delete operations on data of projects marked as deleted.
95
+ This more thorough delete procedure comes after the above.
96
+ It issues actual delete operations on data of projects marked as deleted.
91
97
When done, it sets the state.state to "deleted".
92
98
93
99
The operations involves deleting all syncstrings of that project (and associated with that, patches),
94
- and only for on-prem setups, it also deletes all the data stored in the project on disk.
100
+ and only for on-prem setups, it also deletes all the data stored in the project on disk and various tables .
95
101
96
- This function is called every couple of hours. Hence ensure it does not run longer than the given max_run_m time (minutes)
102
+ This function is called every couple of hours. Hence it checks to not run longer than the given max_run_m time (minutes).
97
103
*/
98
104
export async function cleanup_old_projects_data (
99
105
db : PostgreSQL ,
100
- delay_ms = 50 ,
101
106
max_run_m = 60 ,
102
107
) {
103
108
const settings = await getServerSettings ( ) ;
104
109
const on_prem = settings . kucalc === KUCALC_ON_PREMISES ;
110
+ const L0 = log . extend ( "cleanup_old_projects_data" ) ;
111
+ const L = L0 . debug ;
105
112
106
- log . debug ( "cleanup_old_projects_data" , { delay_ms , max_run_m, on_prem } ) ;
113
+ log . debug ( "cleanup_old_projects_data" , { max_run_m, on_prem } ) ;
107
114
const start_ts = new Date ( ) ;
108
115
109
116
const pool = getPool ( ) ;
@@ -115,34 +122,95 @@ export async function cleanup_old_projects_data(
115
122
for ( const row of rows ) {
116
123
const { project_id, string_id } = row ;
117
124
if ( start_ts < minutes_ago ( max_run_m ) ) {
118
- log . debug (
119
- `cleanup_old_projects_data: too much time elapsed, breaking after ${ num } syncstrings` ,
120
- ) ;
125
+ L ( `too much time elapsed, breaking after ${ num } syncstrings` ) ;
121
126
break ;
122
127
}
123
128
124
- log . debug (
125
- `cleanup_old_projects_data: deleting syncstring ${ project_id } /${ string_id } ` ,
126
- ) ;
129
+ L ( `deleting syncstring ${ project_id } /${ string_id } ` ) ;
127
130
num += 1 ;
128
131
await callback2 ( db . delete_syncstring , { string_id } ) ;
129
132
130
- // wait for the given amount of delay_ms millio seconds
131
- await new Promise ( ( done ) => setTimeout ( done , delay_ms ) ) ;
133
+ // wait a bit after deleting syncstrings, e.g. to let the standby db catch up
134
+ await new Promise ( ( done ) => setTimeout ( done , 100 ) ) ;
132
135
136
+ // Q_CLEANUP_SYNCSTRINGS orders by project_id, hence we trigger project specific actions when the id changes
133
137
if ( pid != project_id ) {
134
138
pid = project_id ;
139
+ const L2 = L0 . extend ( project_id ) . debug ;
140
+
135
141
if ( on_prem ) {
136
- log . debug (
137
- `cleanup_old_projects_data: deleting project data in ${ project_id } ` ,
138
- ) ;
142
+ L2 ( `cleanup_old_projects_data for project_id=${ project_id } ` ) ;
139
143
// TODO: this only works on-prem, and requires the project files to be mounted
140
144
141
- log . debug ( `deleting all shared files in project ${ project_id } ` ) ;
145
+ L2 ( `deleting all shared files in project ${ project_id } ` ) ;
142
146
// TODO: do it directly like above, and also get rid of all those shares in the database
147
+
148
+ const delPublicPaths = await bulk_delete ( {
149
+ table : "public_paths" ,
150
+ field : "project_id" ,
151
+ value : project_id ,
152
+ } ) ;
153
+ L2 ( `deleted public_paths ${ delPublicPaths . rowsDeleted } entries` ) ;
154
+
155
+ const delProjectLog = await bulk_delete ( {
156
+ table : "project_log" ,
157
+ field : "project_id" ,
158
+ value : project_id ,
159
+ } ) ;
160
+ L2 ( `deleted project_log ${ delProjectLog . rowsDeleted } entries` ) ;
161
+
162
+ const delFileUse = await bulk_delete ( {
163
+ table : "file_use" ,
164
+ field : "project_id" ,
165
+ value : project_id ,
166
+ } ) ;
167
+ L2 ( `deleted file_use ${ delFileUse . rowsDeleted } entries` ) ;
168
+
169
+ const delAccessLog = await bulk_delete ( {
170
+ table : "file_access_log" ,
171
+ field : "project_id" ,
172
+ value : project_id ,
173
+ } ) ;
174
+ L2 ( `deleted file_access_log ${ delAccessLog . rowsDeleted } entries` ) ;
175
+
176
+ const delJupyterApiLog = await bulk_delete ( {
177
+ table : "jupyter_api_log" ,
178
+ field : "project_id" ,
179
+ value : project_id ,
180
+ } ) ;
181
+ L2 ( `deleted jupyter_api_log ${ delJupyterApiLog . rowsDeleted } entries` ) ;
182
+
183
+ for ( const field of [
184
+ "target_project_id" ,
185
+ "source_project_id" ,
186
+ ] as const ) {
187
+ const delCopyPaths = await bulk_delete ( {
188
+ table : "copy_paths" ,
189
+ field,
190
+ value : project_id ,
191
+ } ) ;
192
+ L2 ( `deleted copy_paths/${ field } ${ delCopyPaths . rowsDeleted } entries` ) ;
193
+ }
194
+
195
+ const delListings = await bulk_delete ( {
196
+ table : "listings" ,
197
+ field : "project_id" ,
198
+ id : "project_id" , // TODO listings has a more complex ID, is this a problem?
199
+ value : project_id ,
200
+ } ) ;
201
+ L2 ( `deleted ${ delListings . rowsDeleted } listings` ) ;
202
+
203
+ const delInviteTokens = await bulk_delete ( {
204
+ table : "project_invite_tokens" ,
205
+ field : "project_id" ,
206
+ value : project_id ,
207
+ id : "token" ,
208
+ } ) ;
209
+ L2 ( `deleted ${ delInviteTokens . rowsDeleted } entries` ) ;
143
210
}
144
211
145
212
// now, that we're done with that project, mark it as state.state ->> 'deleted'
213
+ // in addition to the flag "deleted = true"
146
214
await callback2 ( db . set_project_state , {
147
215
project_id,
148
216
state : "deleted" ,
0 commit comments