Skip to content

Commit e2af447

Browse files
vitalifqyryq
authored andcommitted
Fix snapshot cleanup on ModifyScheme failures during vector index build (ydb-platform#27454) (ydb-platform#27581)
1 parent 64850d9 commit e2af447

File tree

4 files changed

+117
-2
lines changed

4 files changed

+117
-2
lines changed

ydb/core/tx/schemeshard/schemeshard_build_index__progress.cpp

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2675,6 +2675,23 @@ struct TSchemeShard::TIndexBuilder::TTxReplyModify: public TSchemeShard::TIndexB
26752675
case TIndexBuildInfo::EState::CreateBuild:
26762676
case TIndexBuildInfo::EState::LockBuild:
26772677
case TIndexBuildInfo::EState::AlterSequence:
2678+
{
2679+
Y_ENSURE(txId == buildInfo.ApplyTxId);
2680+
2681+
if (record.GetStatus() != NKikimrScheme::StatusAccepted &&
2682+
record.GetStatus() != NKikimrScheme::StatusAlreadyExists) {
2683+
// Otherwise we won't cancel the index build correctly
2684+
buildInfo.ApplyTxId = {};
2685+
buildInfo.ApplyTxStatus = NKikimrScheme::StatusSuccess;
2686+
buildInfo.ApplyTxDone = false;
2687+
} else {
2688+
buildInfo.ApplyTxStatus = record.GetStatus();
2689+
}
2690+
Self->PersistBuildIndexApplyTxStatus(db, buildInfo);
2691+
2692+
ifErrorMoveTo(TIndexBuildInfo::EState::Rejection_Applying);
2693+
break;
2694+
}
26782695
case TIndexBuildInfo::EState::Applying:
26792696
case TIndexBuildInfo::EState::Rejection_Applying:
26802697
{

ydb/core/tx/schemeshard/ut_helpers/helpers.cpp

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2871,7 +2871,7 @@ namespace NSchemeShardUT_Private {
28712871
return WaitNextValResult(runtime, sender, expectedStatus);
28722872
}
28732873

2874-
NKikimrMiniKQL::TResult ReadTable(TTestActorRuntime& runtime, ui64 tabletId,
2874+
NKikimrMiniKQL::TResult ReadSystemTable(TTestActorRuntime& runtime, ui64 tabletId,
28752875
const TString& table, const TVector<TString>& pk, const TVector<TString>& columns,
28762876
const TString& rangeFlags)
28772877
{
@@ -2886,7 +2886,7 @@ namespace NSchemeShardUT_Private {
28862886
NKikimrProto::EReplyStatus status = LocalMiniKQL(runtime, tabletId, Sprintf(R"((
28872887
(let range '(%s%s))
28882888
(let columns '(%s))
2889-
(let result (SelectRange '__user__%s range columns '()))
2889+
(let result (SelectRange '%s range columns '()))
28902890
(return (AsList (SetResult 'Result result) ))
28912891
))", rangeFlags.data(), keyFmt.data(), columnsFmt.data(), table.data()), result, error);
28922892
UNIT_ASSERT_VALUES_EQUAL_C(status, NKikimrProto::EReplyStatus::OK, error);
@@ -2895,6 +2895,13 @@ namespace NSchemeShardUT_Private {
28952895
return result;
28962896
}
28972897

2898+
NKikimrMiniKQL::TResult ReadTable(TTestActorRuntime& runtime, ui64 tabletId,
2899+
const TString& table, const TVector<TString>& pk, const TVector<TString>& columns,
2900+
const TString& rangeFlags)
2901+
{
2902+
return ReadSystemTable(runtime, tabletId, "__user__"+table, pk, columns, rangeFlags);
2903+
}
2904+
28982905
TVector<TString> ReadShards(TTestActorRuntime& runtime, ui64 schemeshardId, const TString& table) {
28992906
auto pathDesc = DescribePath(runtime, schemeshardId, table, true, false, true);
29002907
auto tableDesc = pathDesc.GetPathDescription().GetTable();

ydb/core/tx/schemeshard/ut_helpers/helpers.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -718,6 +718,8 @@ namespace NSchemeShardUT_Private {
718718
TTestActorRuntime& runtime, const TString& path,
719719
Ydb::StatusIds::StatusCode expectedStatus = Ydb::StatusIds::SUCCESS);
720720

721+
NKikimrMiniKQL::TResult ReadSystemTable(TTestActorRuntime& runtime, ui64 tabletId,
722+
const TString& table, const TVector<TString>& pk, const TVector<TString>& columns, const TString& rangeFlags = "");
721723
NKikimrMiniKQL::TResult ReadTable(TTestActorRuntime& runtime, ui64 tabletId,
722724
const TString& table, const TVector<TString>& pk, const TVector<TString>& columns, const TString& rangeFlags = "");
723725

ydb/core/tx/schemeshard/ut_index_build/ut_vector_index_build.cpp

Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
#include <ydb/public/lib/deprecated/kicli/kicli.h>
12
#include <ydb/core/base/table_index.h>
23
#include <ydb/core/protos/schemeshard/operations.pb.h>
34
#include <ydb/core/tx/schemeshard/ut_helpers/helpers.h>
@@ -1710,4 +1711,92 @@ Y_UNIT_TEST_SUITE(VectorIndexBuildTest) {
17101711
}
17111712
}
17121713

1714+
Y_UNIT_TEST(CreateBuildProposeReject) {
1715+
TTestBasicRuntime runtime;
1716+
TTestEnv env(runtime);
1717+
ui64 txId = 100;
1718+
1719+
runtime.SetLogPriority(NKikimrServices::TX_DATASHARD, NLog::PRI_TRACE);
1720+
runtime.SetLogPriority(NKikimrServices::BUILD_INDEX, NLog::PRI_TRACE);
1721+
1722+
TestCreateTable(runtime, ++txId, "/MyRoot", R"(
1723+
Name: "vectors"
1724+
Columns { Name: "id" Type: "Uint64" }
1725+
Columns { Name: "embedding" Type: "String" }
1726+
KeyColumnNames: [ "id" ]
1727+
)");
1728+
env.TestWaitNotification(runtime, txId);
1729+
1730+
NYdb::NTable::TGlobalIndexSettings globalIndexSettings;
1731+
1732+
std::unique_ptr<NYdb::NTable::TKMeansTreeSettings> kmeansTreeSettings;
1733+
{
1734+
Ydb::Table::KMeansTreeSettings proto;
1735+
UNIT_ASSERT(google::protobuf::TextFormat::ParseFromString(R"(
1736+
settings {
1737+
metric: DISTANCE_COSINE
1738+
vector_type: VECTOR_TYPE_FLOAT
1739+
vector_dimension: 1024
1740+
}
1741+
levels: 5
1742+
clusters: 4
1743+
)", &proto));
1744+
using T = NYdb::NTable::TKMeansTreeSettings;
1745+
kmeansTreeSettings = std::make_unique<T>(T::FromProto(proto));
1746+
}
1747+
1748+
const auto maxShards = DescribePath(runtime, TTestTxConfig::SchemeShard, "/MyRoot/vectors")
1749+
.GetPathDescription().GetDomainDescription().GetSchemeLimits().GetMaxShardsInPath();
1750+
1751+
TBlockEvents<TEvSchemeShard::TEvModifySchemeTransaction> blocker(runtime, [&](auto& ev) {
1752+
auto& modifyScheme = *ev->Get()->Record.MutableTransaction(0);
1753+
if (modifyScheme.GetOperationType() == NKikimrSchemeOp::ESchemeOpInitiateBuildIndexImplTable) {
1754+
auto& op = *modifyScheme.MutableCreateTable();
1755+
// make shard count exceed the limit to fail the operation
1756+
op.SetUniformPartitionsCount(maxShards+1);
1757+
}
1758+
return false;
1759+
});
1760+
1761+
const ui64 buildIndexTx = ++txId;
1762+
AsyncBuildVectorIndex(runtime, buildIndexTx, TTestTxConfig::SchemeShard, "/MyRoot", "/MyRoot/vectors", "index1", {"embedding"});
1763+
1764+
env.TestWaitNotification(runtime, buildIndexTx);
1765+
1766+
{
1767+
auto buildIndexOperation = TestGetBuildIndex(runtime, TTestTxConfig::SchemeShard, "/MyRoot", buildIndexTx);
1768+
Cout << "BuildIndex 1 " << buildIndexOperation.DebugString() << Endl;
1769+
UNIT_ASSERT_VALUES_EQUAL_C(
1770+
buildIndexOperation.GetIndexBuild().GetState(), Ydb::Table::IndexBuildState::STATE_REJECTED,
1771+
buildIndexOperation.DebugString()
1772+
);
1773+
UNIT_ASSERT_STRING_CONTAINS(buildIndexOperation.DebugString(), "Invalid partition count specified");
1774+
}
1775+
1776+
blocker.Stop().Unblock();
1777+
1778+
{
1779+
auto result = ReadSystemTable(runtime, TTestTxConfig::SchemeShard, "SnapshotTables", {"Id", "TableOwnerId", "TableLocalId"}, {"Id"});
1780+
auto value = NClient::TValue::Create(result);
1781+
auto rowCount = value["Result"]["List"].Size();
1782+
UNIT_ASSERT_VALUES_EQUAL_C(rowCount, 0, "Snapshot is not removed after rejecting index build");
1783+
}
1784+
1785+
// The next index build should succeed
1786+
1787+
const ui64 buildIndexTx2 = ++txId;
1788+
AsyncBuildVectorIndex(runtime, buildIndexTx2, TTestTxConfig::SchemeShard, "/MyRoot", "/MyRoot/vectors", "index1", {"embedding"});
1789+
env.TestWaitNotification(runtime, buildIndexTx2);
1790+
1791+
{
1792+
auto buildIndexOperation = TestGetBuildIndex(runtime, TTestTxConfig::SchemeShard, "/MyRoot", buildIndexTx2);
1793+
Cout << "BuildIndex 2 " << buildIndexOperation.DebugString() << Endl;
1794+
UNIT_ASSERT_VALUES_EQUAL_C(
1795+
buildIndexOperation.GetIndexBuild().GetState(), Ydb::Table::IndexBuildState::STATE_DONE,
1796+
buildIndexOperation.DebugString()
1797+
);
1798+
}
1799+
1800+
}
1801+
17131802
}

0 commit comments

Comments
 (0)