Skip to content

Commit a6eba0e

Browse files
craig[bot]kev-cao
andcommitted
Merge #155263
155263: roachtest: deflake mixed-version backup/restore roachtest r=msbutler a=kev-cao As part of the 25.4 migration, all descriptors are rewritten to use the new serialization format. If this occurs during a restore, the restore will fail due to version mismatches. This test deflakes our mixed-version backup-restore roachtests to rerun the restore if it encounters that error. Fixes: #154611 Fixes: #154850 Release note: None Co-authored-by: Kevin Cao <[email protected]>
2 parents 73c10e8 + 96b12d7 commit a6eba0e

File tree

2 files changed

+43
-7
lines changed

2 files changed

+43
-7
lines changed

pkg/cmd/roachtest/tests/backup_restore_roundtrip.go

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -221,7 +221,10 @@ func backupRestoreRoundTrip(
221221

222222
t.L().Printf("verifying backup %d", i+1)
223223
// Verify content in backups.
224-
err = d.verifyBackupCollection(ctx, t.L(), testRNG, collection, true /* checkFiles */, true /* internalSystemJobs */)
224+
err = d.verifyBackupCollection(
225+
ctx, t.L(), testRNG, collection,
226+
true /* checkFiles */, true /* internalSystemJobs */, nil, /* mvHelper */
227+
)
225228
if err != nil {
226229
return err
227230
}
@@ -538,7 +541,8 @@ func testOnlineRestoreRecovery(ctx context.Context, t test.Test, c cluster.Clust
538541

539542
t.L().Printf("performing online restore of backup")
540543
if _, _, err := d.runRestore(
541-
ctx, t.L(), testRNG, collection, false /* checkFiles */, true, /* internalSystemJobs */
544+
ctx, t.L(), testRNG, collection,
545+
false /* checkFiles */, true /* internalSystemJobs */, nil, /* mvHelper */
542546
); err != nil {
543547
return err
544548
}

pkg/cmd/roachtest/tests/mixed_version_backup.go

Lines changed: 37 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@ import (
5353
"github.com/cockroachdb/cockroach/pkg/util/syncutil"
5454
"github.com/cockroachdb/cockroach/pkg/util/timeutil"
5555
"github.com/cockroachdb/errors"
56+
"github.com/cockroachdb/version"
5657
)
5758

5859
const (
@@ -1267,8 +1268,9 @@ func (d *BackupRestoreTestDriver) verifyBackupCollection(
12671268
bc *backupCollection,
12681269
checkFiles bool,
12691270
internalSystemJobs bool,
1271+
mvHelper *mixedversion.Helper,
12701272
) error {
1271-
restoredTables, _, err := d.runRestore(ctx, l, rng, bc, checkFiles, internalSystemJobs)
1273+
restoredTables, _, err := d.runRestore(ctx, l, rng, bc, checkFiles, internalSystemJobs, mvHelper)
12721274
if err != nil {
12731275
return fmt.Errorf("error restoring backup: %w", err)
12741276
}
@@ -1306,6 +1308,7 @@ func (d *BackupRestoreTestDriver) runRestore(
13061308
bc *backupCollection,
13071309
checkFiles bool,
13081310
internalSystemJobs bool,
1311+
mvHelper *mixedversion.Helper,
13091312
) ([]string, string, error) {
13101313
restoreStmt, restoredTables, restoreDB := d.buildRestoreStatement(bc)
13111314
if _, isTableBackup := bc.btype.(*tableBackup); isTableBackup {
@@ -1327,9 +1330,34 @@ func (d *BackupRestoreTestDriver) runRestore(
13271330
if err := d.testUtils.QueryRow(ctx, rng, restoreStmt).Scan(&jobID); err != nil {
13281331
return nil, "", fmt.Errorf("backup %s: error in restore statement: %w", bc.name, err)
13291332
}
1330-
if err := d.testUtils.waitForJobSuccess(ctx, l, rng, jobID, internalSystemJobs); err != nil {
1331-
return nil, "", err
1333+
if jobErr := d.testUtils.waitForJobSuccess(ctx, l, rng, jobID, internalSystemJobs); jobErr != nil {
1334+
if mvHelper == nil {
1335+
return nil, "", jobErr
1336+
}
1337+
v25_4 := version.MajorVersion{Year: 25, Ordinal: 4}
1338+
isUpgradeTo25_4 := mvHelper.System.ToVersion.Major().Equals(v25_4) &&
1339+
mvHelper.System.FromVersion.Major().LessThan(v25_4)
1340+
if !isUpgradeTo25_4 {
1341+
return nil, "", jobErr
1342+
}
1343+
// In the 25.4 upgrade, all descriptors are rewritten in the migration to use
1344+
// the new serialization format. If this upgrade occurs in the middle of the
1345+
// restore, we may encounter a version mismatch error. As a short-term fix for
1346+
// this test flake, we retry the restore if we encounter this error.
1347+
if !strings.Contains(jobErr.Error(), "version mismatch for descriptor") {
1348+
return nil, "", jobErr
1349+
}
1350+
l.Printf(
1351+
"encountered version mismatch error due to mixed-version upgrade, retrying restore: %v", jobErr,
1352+
)
1353+
if err := d.testUtils.QueryRow(ctx, rng, restoreStmt).Scan(&jobID); err != nil {
1354+
return nil, "", fmt.Errorf("backup %s: error in restore statement: %w", bc.name, err)
1355+
}
1356+
if jobErr = d.testUtils.waitForJobSuccess(ctx, l, rng, jobID, internalSystemJobs); jobErr != nil {
1357+
return nil, "", jobErr
1358+
}
13321359
}
1360+
13331361
return restoredTables, restoreDB, nil
13341362
}
13351363

@@ -2748,7 +2776,9 @@ func (mvb *mixedVersionBackup) verifySomeBackups(
27482776

27492777
for _, collection := range toBeRestored {
27502778
l.Printf("mixed-version: verifying %s", collection.name)
2751-
if err := mvb.backupRestoreTestDriver.verifyBackupCollection(ctx, l, rng, collection, checkFiles, internalSystemJobs); err != nil {
2779+
if err := mvb.backupRestoreTestDriver.verifyBackupCollection(
2780+
ctx, l, rng, collection, checkFiles, internalSystemJobs, h,
2781+
); err != nil {
27522782
return errors.Wrap(err, "mixed-version")
27532783
}
27542784
}
@@ -2828,7 +2858,9 @@ func (mvb *mixedVersionBackup) verifyAllBackups(
28282858
return
28292859
}
28302860

2831-
if err := mvb.backupRestoreTestDriver.verifyBackupCollection(ctx, l, rng, collection, checkFiles, internalSystemJobs); err != nil {
2861+
if err := mvb.backupRestoreTestDriver.verifyBackupCollection(
2862+
ctx, l, rng, collection, checkFiles, internalSystemJobs, h,
2863+
); err != nil {
28322864
err := errors.Wrapf(err, "%s", v)
28332865
l.Printf("restore error: %v", err)
28342866
// Attempt to collect logs and debug.zip at the time of this

0 commit comments

Comments
 (0)