tarantool · Serpentian · Apr 13, 2026 · Apr 18, 2026 · Apr 13, 2026 · Apr 13, 2026
diff --git a/.github/workflows/catch_flaky.yml b/.github/workflows/catch_flaky.yml
@@ -80,7 +80,7 @@ jobs:
           tt version
 
       - name: Setup luatest
-        run: tt rocks install luatest
+        run: tt rocks install luatest 1.4.1
 
       - run: cmake .
       - run: make test-flaky

diff --git a/.github/workflows/fast_testing.yml b/.github/workflows/fast_testing.yml
@@ -107,7 +107,7 @@ jobs:
           tt version
 
       - name: Setup luatest
-        run: tt rocks install luatest
+        run: tt rocks install luatest 1.4.1
 
       - run: cmake .
       - run: make test-force
diff --git a/.github/workflows/reusable_testing.yml b/.github/workflows/reusable_testing.yml
@@ -44,7 +44,7 @@ jobs:
           tt version
 
       - name: Setup luatest
-        run: tt rocks install luatest
+        run: tt rocks install luatest 1.4.1
 
       - run: cmake .
       - run: make test-force
diff --git a/test/instances/storage.lua b/test/instances/storage.lua
@@ -146,6 +146,9 @@ local function bucket_recovery_wait()
         if index_has(status_index, vconst.BUCKET.RECEIVING) then
             error('Still have RECEIVING buckets')
         end
+        if index_has(status_index, vconst.BUCKET.READONLY) then
+            error('Still have READONLY buckets')
+        end
     end)
 end
 
@@ -163,7 +166,7 @@ end
 
 local function bucket_recovery_continue()
     vshard.storage.internal.errinj.ERRINJ_RECOVERY_PAUSE = false
-    vshard.storage.garbage_collector_wakeup()
+    vshard.storage.recovery_wakeup()
 end
 
 local function wal_sync()

diff --git a/test/luatest_helpers/vtest.lua b/test/luatest_helpers/vtest.lua
@@ -498,6 +498,26 @@ local function cluster_rebalancer_enable(g)
     t.assert_equals(err, nil, 'rebalancer enable')
 end
 
+--
+-- Disable recovery on all masters.
+--
+local function cluster_recovery_pause(g)
+    local _, err =  cluster_exec_each_master(g, function()
+        _G.bucket_recovery_pause()
+    end)
+    t.assert_equals(err, nil, 'cluster recovery pause')
+end
+
+--
+-- Enable recovery on all masters.
+--
+local function cluster_recovery_continue(g)
+    local _, err =  cluster_exec_each_master(g, function()
+        _G.bucket_recovery_continue()
+    end)
+    t.assert_equals(err, nil, 'cluster recovery continue')
+end
+
 --
 -- Wait vclock sync in each replicaset between all its replicas.
 --
@@ -870,6 +890,8 @@ return {
     cluster_bootstrap = cluster_bootstrap,
     cluster_rebalancer_disable = cluster_rebalancer_disable,
     cluster_rebalancer_enable = cluster_rebalancer_enable,
+    cluster_recovery_pause = cluster_recovery_pause,
+    cluster_recovery_continue = cluster_recovery_continue,
     cluster_wait_vclock_all = cluster_wait_vclock_all,
     cluster_wait_fullsync = cluster_wait_fullsync,
     cluster_rebalancer_find = cluster_rebalancer_find,

diff --git a/test/rebalancer/bucket_ref.result b/test/rebalancer/bucket_ref.result
@@ -271,7 +271,7 @@ fiber.sleep(0.2)
 vshard.storage.buckets_info(1)
 ---
 - 1:
-    status: active
+    status: readonly
     rw_lock: true
     id: 1
     ref_rw: 1
@@ -292,11 +292,8 @@ vshard.storage.buckets_info(1)
     id: 1
 ...
 --
--- Rebalancer takes buckets starting from the minimal id. If a
--- bucket with that ID is locked, it should try another. The case
--- makes bucket with minimal ID locked for RW requests. The only
--- function taking the lock is bucket_send, so to test that a
--- manual bucket_send is called before rebalancer.
+-- Cancel during bucket_send. In that case all the locks should
+-- be freed, obviously.
 --
 vshard.storage.rebalancer_enable()
 ---
@@ -327,12 +324,6 @@ function keep_ref(id)								\
 end
 ---
 ...
-fiber_to_ref = fiber.create(keep_ref, 1)
----
-...
-while vshard.storage.buckets_info(1)[1].ref_rw ~= 1 do fiber.sleep(0.01) end
----
-...
 -- Now bucket_send on that bucket blocks.
 function do_send(id)								\
 	send_result = {								\
@@ -342,54 +333,6 @@ function do_send(id)								\
 end
 ---
 ...
-fiber_to_lock = fiber.create(do_send, 1)
----
-...
-while not vshard.storage.buckets_info(1)[1].rw_lock do fiber.sleep(0.01) end
----
-...
-cfg.sharding[util.replicasets[1]].weight = 99
----
-...
-cfg.sharding[util.replicasets[2]].weight = 101
----
-...
-cfg.rebalancer_disbalance_threshold = 0
----
-...
-vshard.storage.cfg(cfg, box.info.uuid)
----
-...
-wait_rebalancer_state('The cluster is balanced ok', test_run)
----
-...
--- Cleanup after the test.
-keep_lock = false
----
-...
-while not send_result do fiber.sleep(0.01) end
----
-...
-send_result
----
-- - true
-...
-cfg.sharding[util.replicasets[1]].weight = nil
----
-...
-cfg.sharding[util.replicasets[2]].weight = nil
----
-...
-vshard.storage.cfg(cfg, box.info.uuid)
----
-...
-wait_rebalancer_state('The cluster is balanced ok', test_run)
----
-...
---
--- Cancel during bucket_send. In that case all the locks should
--- be freed, obviously.
---
 keep_lock = true
 ---
 ...

diff --git a/test/rebalancer/bucket_ref.test.lua b/test/rebalancer/bucket_ref.test.lua
@@ -91,11 +91,8 @@ _ = test_run:switch('box_1_a')
 vshard.storage.buckets_info(1)
 
 --
--- Rebalancer takes buckets starting from the minimal id. If a
--- bucket with that ID is locked, it should try another. The case
--- makes bucket with minimal ID locked for RW requests. The only
--- function taking the lock is bucket_send, so to test that a
--- manual bucket_send is called before rebalancer.
+-- Cancel during bucket_send. In that case all the locks should
+-- be freed, obviously.
 --
 vshard.storage.rebalancer_enable()
 _ = test_run:switch('box_2_a')
@@ -112,39 +109,13 @@ function keep_ref(id)								\
 	end									\
 	vshard.storage.bucket_unrefrw(1)					\
 end
-fiber_to_ref = fiber.create(keep_ref, 1)
-while vshard.storage.buckets_info(1)[1].ref_rw ~= 1 do fiber.sleep(0.01) end
-
 -- Now bucket_send on that bucket blocks.
 function do_send(id)								\
 	send_result = {								\
 		vshard.storage.bucket_send(id, util.replicasets[2],		\
 					   {timeout = 9999999})			\
 	}									\
 end
-fiber_to_lock = fiber.create(do_send, 1)
-while not vshard.storage.buckets_info(1)[1].rw_lock do fiber.sleep(0.01) end
-
-
-cfg.sharding[util.replicasets[1]].weight = 99
-cfg.sharding[util.replicasets[2]].weight = 101
-cfg.rebalancer_disbalance_threshold = 0
-vshard.storage.cfg(cfg, box.info.uuid)
-wait_rebalancer_state('The cluster is balanced ok', test_run)
-
--- Cleanup after the test.
-keep_lock = false
-while not send_result do fiber.sleep(0.01) end
-send_result
-cfg.sharding[util.replicasets[1]].weight = nil
-cfg.sharding[util.replicasets[2]].weight = nil
-vshard.storage.cfg(cfg, box.info.uuid)
-wait_rebalancer_state('The cluster is balanced ok', test_run)
-
---
--- Cancel during bucket_send. In that case all the locks should
--- be freed, obviously.
---
 keep_lock = true
 fiber_to_ref = fiber.create(keep_ref, 1)
 while vshard.storage.buckets_info(1)[1].ref_rw ~= 1 do fiber.sleep(0.01) end

diff --git a/test/rebalancer/rebalancer.result b/test/rebalancer/rebalancer.result
@@ -360,7 +360,7 @@ test_run:switch('box_1_a')
 vshard.storage.rebalancer_disable()
 ---
 ...
-wait_bucket_is_collected(100)
+for i = 91, 100 do wait_bucket_is_collected(i) end
 ---
 ...
 vshard.storage.bucket_force_create(91, 10)

diff --git a/test/rebalancer/rebalancer.test.lua b/test/rebalancer/rebalancer.test.lua
@@ -181,7 +181,7 @@ util.map_bucket_protection(test_run, {REPLICASET_1}, true)
 --
 test_run:switch('box_1_a')
 vshard.storage.rebalancer_disable()
-wait_bucket_is_collected(100)
+for i = 91, 100 do wait_bucket_is_collected(i) end
 vshard.storage.bucket_force_create(91, 10)
 space = box.space.test
 space:replace{1, 91}

diff --git a/test/rebalancer/receiving_bucket.result b/test/rebalancer/receiving_bucket.result
@@ -228,7 +228,7 @@ vshard.storage.internal.errinj.ERRINJ_LAST_RECEIVE_DELAY = true
 _ = test_run:switch('box_2_a')
 ---
 ...
-_, err = vshard.storage.bucket_send(101, util.replicasets[1], {timeout = 0.1})
+_, err = vshard.storage.bucket_send(101, util.replicasets[1], {timeout = 1})
 ---
 ...
 util.is_timeout_error(err)
@@ -308,7 +308,7 @@ while f1:status() ~= 'suspended' do fiber.sleep(0.01) end
 vshard.storage.buckets_info(1)
 ---
 - 1:
-    status: active
+    status: readonly
     rw_lock: true
     id: 1
     ref_rw: 1

diff --git a/test/rebalancer/receiving_bucket.test.lua b/test/rebalancer/receiving_bucket.test.lua
@@ -94,7 +94,7 @@ box.space._bucket:get{1}
 _ = test_run:switch('box_1_a')
 vshard.storage.internal.errinj.ERRINJ_LAST_RECEIVE_DELAY = true
 _ = test_run:switch('box_2_a')
-_, err = vshard.storage.bucket_send(101, util.replicasets[1], {timeout = 0.1})
+_, err = vshard.storage.bucket_send(101, util.replicasets[1], {timeout = 1})
 util.is_timeout_error(err)
 wait_bucket_is_collected(101)
 _ = test_run:switch('box_1_a')

diff --git a/test/rebalancer/restart_during_rebalancing.result b/test/rebalancer/restart_during_rebalancing.result
@@ -306,6 +306,7 @@ vshard.storage.info().bucket
   total: 50
   garbage: 0
   pinned: 0
+  readonly: 0
   sending: 0
 ...
 check_consistency()
@@ -323,6 +324,7 @@ vshard.storage.info().bucket
   total: 50
   garbage: 0
   pinned: 0
+  readonly: 0
   sending: 0
 ...
 check_consistency()
@@ -340,6 +342,7 @@ vshard.storage.info().bucket
   total: 50
   garbage: 0
   pinned: 0
+  readonly: 0
   sending: 0
 ...
 check_consistency()
@@ -357,6 +360,7 @@ vshard.storage.info().bucket
   total: 50
   garbage: 0
   pinned: 0
+  readonly: 0
   sending: 0
 ...
 check_consistency()

diff --git a/test/storage-luatest/auto_master_2_2_2_test.lua b/test/storage-luatest/auto_master_2_2_2_test.lua
@@ -369,7 +369,7 @@ test_group.test_master_discovery_on_disconnect = function(g)
             -- destination's master is disconnected. A master search is
             -- triggered then.
             local ok, err = ivshard.storage.bucket_send(
-                bid, rs_uuid, {timeout = 0.01})
+                bid, rs_uuid, {timeout = 1})
             ilt.assert(not ok)
             ilt.assert_not_equals(err, nil)
             -- Recovery will re-discover the master.
@@ -411,13 +411,15 @@ test_group.test_master_discovery_on_disconnect = function(g)
     g.replica_2_b:update_box_cfg{read_only = false}
     promote_if_needed(g, g.replica_2_b)
     send_bucket_to_new_master(g.replica_1_a, g.replica_2_b)
-    -- Can't GC the bucket until the old master is back. But can send it.
-    g.replica_2_b:exec(bucket_send, {bid, g.replica_1_a:replicaset_uuid()})
+    -- Can't GC the bucket until the old master is back and can't send it.
 
     -- Restore everything back.
     g.replica_2_a:start()
     vtest.cluster_cfg(g, global_cfg)
     promote_if_needed(g, g.replica_2_a)
+    -- `replica_2_a` should get the bucket from `replica_2_b` to send it.
+    g.replica_2_a:wait_for_vclock_of(g.replica_2_b)
+    g.replica_2_a:exec(bucket_send, {bid, g.replica_1_a:replicaset_uuid()})
     g.replica_2_b:exec(bucket_gc_wait)
     g.replica_2_b:update_box_cfg{read_only = true}
     vtest.cluster_exec_each(g, function()

diff --git a/test/storage-luatest/bucket_triggers_test.lua b/test/storage-luatest/bucket_triggers_test.lua
@@ -493,7 +493,7 @@ test_group.test_bucket_space_reject_bad_replace_on_transition = function(g)
         _bucket:replace{bid, ivconst.BUCKET.ACTIVE}
         internal.is_bucket_protected = true
         -- To be sure that the loops above didn't somehow skip everything.
-        ilt.assert_equals(count, 42, 'transition count')
+        ilt.assert_equals(count, 56, 'transition count')
         _G.bucket_gc_continue()
     end)
     rep_b:wait_vclock_of(rep_a)