Skip to content

Commit bd4780f

Browse files
committed
db-dump: set sequence values when importing a database dump
By default, the import script recreates the database schema, which includes creating new sequences with zero values. This results in the lazy crates.io developer occasionally receiving obscure errors when inserting records into tables that use sequences, often not on the first or second insert due to IDs in the database dump not always being continuous. Rather than dumping the real sequence values from the database, we can just recreate them based on the maximum ID in each table. Works well enough, and means we don't have to tinker with the export script or ship extra data. This commit only configures the database tables that actually include data in the database dump. There are other sequences, but since those tables won't have data imported, it doesn't matter if they remain zero after import.
1 parent 560dbfe commit bd4780f

File tree

4 files changed

+48
-3
lines changed

4 files changed

+48
-3
lines changed

crates/crates_io_database_dump/src/configuration.rs

+11-2
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
use serde::Deserialize;
1+
use serde::{Deserialize, Serialize};
22
use std::collections::{BTreeMap, VecDeque};
33

44
/// An enum indicating whether a column is included in the database dumps.
@@ -15,7 +15,9 @@ pub enum ColumnVisibility {
1515
/// and should list all tables the current tables refers to with foreign key
1616
/// constraints on public columns. The `filter` field is a valid SQL expression
1717
/// used in a `WHERE` clause to filter the rows of the table. The `columns`
18-
/// field maps column names to their respective visibilities.
18+
/// field maps column names to their respective visibilities. The `sequence`
19+
/// field, if present, defines the sequence used by the table when generating
20+
/// IDs, along with the ID column.
1921
#[derive(Clone, Debug, Default, Deserialize)]
2022
pub struct TableConfig {
2123
#[serde(default)]
@@ -24,6 +26,13 @@ pub struct TableConfig {
2426
pub columns: BTreeMap<String, ColumnVisibility>,
2527
#[serde(default)]
2628
pub column_defaults: BTreeMap<String, String>,
29+
pub sequence: Option<SequenceConfig>,
30+
}
31+
32+
#[derive(Clone, Debug, Default, Deserialize, Serialize)]
33+
pub struct SequenceConfig {
34+
pub column: String,
35+
pub name: String,
2736
}
2837

2938
/// Maps table names to the respective configurations. Used to load `dump_db.toml`.

crates/crates_io_database_dump/src/dump-db.toml

+21
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,9 @@ description = "public"
4848
crates_cnt = "public"
4949
created_at = "public"
5050
path = "public"
51+
[categories.sequence]
52+
column = "id"
53+
name = "categories_id_seq"
5154

5255
[crate_downloads.columns]
5356
crate_id = "public"
@@ -87,6 +90,9 @@ textsearchable_index_col = "private" # This Postgres specific and can be derived
8790
repository = "public"
8891
max_upload_size = "public"
8992
max_features = "public"
93+
[crates.sequence]
94+
column = "id"
95+
name = "packages_id_seq"
9096

9197
[crates_categories]
9298
dependencies = ["categories", "crates"]
@@ -130,6 +136,9 @@ features = "public"
130136
target = "public"
131137
kind = "public"
132138
explicit_name = "public"
139+
[dependencies.sequence]
140+
column = "id"
141+
name = "dependencies_id_seq"
133142

134143
[__diesel_schema_migrations.columns]
135144
version = "private"
@@ -152,6 +161,9 @@ id = "public"
152161
keyword = "public"
153162
crates_cnt = "public"
154163
created_at = "public"
164+
[keywords.sequence]
165+
column = "id"
166+
name = "keywords_id_seq"
155167

156168
[metadata.columns]
157169
total_downloads = "public"
@@ -186,6 +198,9 @@ github_id = "public"
186198
name = "public"
187199
avatar = "public"
188200
org_id = "public"
201+
[teams.sequence]
202+
column = "id"
203+
name = "teams_id_seq"
189204

190205
[users]
191206
filter = """
@@ -207,6 +222,9 @@ is_admin = "private"
207222
publish_notifications = "private"
208223
[users.column_defaults]
209224
gh_access_token = "''"
225+
[users.sequence]
226+
column = "id"
227+
name = "users_id_seq"
210228

211229
[version_downloads]
212230
dependencies = ["versions"]
@@ -253,6 +271,9 @@ documentation = "public"
253271
repository = "public"
254272
categories = "public"
255273
keywords = "public"
274+
[versions.sequence]
275+
column = "id"
276+
name = "versions_id_seq"
256277

257278
[versions_published_by.columns]
258279
version_id = "private"

crates/crates_io_database_dump/src/dump-import.sql.j2

+13
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,19 @@ BEGIN;
2929
{% for cd in table.column_defaults %}
3030
ALTER TABLE "{{table.name}}" ALTER COLUMN "{{cd.column}}" DROP DEFAULT;
3131
{%- endfor %}
32+
{%- endfor %}
33+
34+
-- Set sequence values.
35+
{% for table in tables -%}
36+
{% if table.sequence %}
37+
SELECT setval(
38+
'{{table.sequence.name}}',
39+
COALESCE(
40+
(SELECT MAX("{{table.sequence.column}}") FROM "{{table.name}}")::BIGINT,
41+
1
42+
)
43+
);
44+
{% endif %}
3245
{%- endfor %}
3346

3447
-- Reenable triggers on each table.

crates/crates_io_database_dump/src/gen_scripts.rs

+3-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
use crate::configuration::{ColumnVisibility, TableConfig, VisibilityConfig};
1+
use crate::configuration::{ColumnVisibility, SequenceConfig, TableConfig, VisibilityConfig};
22
use anyhow::Context;
33
use serde::Serialize;
44
use std::{fs::File, path::Path};
@@ -18,6 +18,7 @@ struct HandlebarsTableContext<'a> {
1818
filter: Option<String>,
1919
columns: String,
2020
column_defaults: Vec<ColumnDefault<'a>>,
21+
sequence: Option<&'a SequenceConfig>,
2122
}
2223

2324
#[derive(Debug, Serialize)]
@@ -52,6 +53,7 @@ impl TableConfig {
5253
filter,
5354
columns,
5455
column_defaults,
56+
sequence: self.sequence.as_ref(),
5557
})
5658
}
5759
}

0 commit comments

Comments
 (0)