Skip to content

Release node name on graceful shutdown (Issue #9734) #10367

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 28 commits into from
Nov 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
5e9a1a5
Release node name on graceful shutdown
stanislav-shchetinin Oct 13, 2024
aaa6763
fixed .proto
stanislav-shchetinin Oct 13, 2024
550e326
DB update
stanislav-shchetinin Oct 13, 2024
19910f0
сlean up
stanislav-shchetinin Oct 14, 2024
092d8cb
refactoring: DbUpdateSlotIndexToNull
stanislav-shchetinin Oct 14, 2024
9f222c6
flag processing && refactoring
stanislav-shchetinin Oct 14, 2024
f14d78e
refactoring
stanislav-shchetinin Oct 14, 2024
dc5a14d
added test
stanislav-shchetinin Oct 15, 2024
f2d1373
refactoring test
stanislav-shchetinin Oct 15, 2024
95afac3
update test (timeout now)
stanislav-shchetinin Oct 16, 2024
431ecbb
clean up
stanislav-shchetinin Oct 16, 2024
1370b75
fixed test
stanislav-shchetinin Oct 16, 2024
863700e
refactoring
stanislav-shchetinin Oct 17, 2024
18dbe1c
fill event
stanislav-shchetinin Oct 17, 2024
28a4dce
Merge branch 'main' into node-name
stanislav-shchetinin Oct 17, 2024
6b69770
nodeId for GracefulShutdown
stanislav-shchetinin Oct 18, 2024
150a9f8
clean up
stanislav-shchetinin Oct 18, 2024
cc5e3cd
graceful unused
stanislav-shchetinin Oct 18, 2024
5988593
dynamic
stanislav-shchetinin Oct 18, 2024
1b09df5
graceful shutdiwn fix
stanislav-shchetinin Nov 1, 2024
c82bfe1
SendData
stanislav-shchetinin Nov 1, 2024
1704068
Merge branch 'main' into node-name
stanislav-shchetinin Nov 1, 2024
97e633a
Merge branch 'main' into node-name
stanislav-shchetinin Nov 27, 2024
b924f97
fix
stanislav-shchetinin Nov 28, 2024
e6017a3
fix
stanislav-shchetinin Nov 28, 2024
ae2a927
fix
stanislav-shchetinin Nov 28, 2024
13e2190
fix
stanislav-shchetinin Nov 28, 2024
e288386
fix
stanislav-shchetinin Nov 28, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 22 additions & 0 deletions ydb/core/driver_lib/run/run.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@

#include <ydb/core/mind/local.h>
#include <ydb/core/mind/tenant_pool.h>
#include <ydb/core/mind/node_broker.h>
#include <ydb/core/base/hive.h>

#include <ydb/core/base/tablet_resolver.h>
Expand Down Expand Up @@ -1772,6 +1773,27 @@ void TKikimrRunner::KikimrStart() {
void TKikimrRunner::KikimrStop(bool graceful) {
Y_UNUSED(graceful);

bool enableReleaseNodeNameOnGracefulShutdown = AppData->FeatureFlags.GetEnableReleaseNodeNameOnGracefulShutdown();

if (enableReleaseNodeNameOnGracefulShutdown) {
using namespace NKikimr::NNodeBroker;
using TEvent = TEvNodeBroker::TEvGracefulShutdownRequest;

const ui32 nodeId = ActorSystem->NodeId;
bool isDynamicNode = AppData->DynamicNameserviceConfig->MinDynamicNodeId <= nodeId;

if (isDynamicNode) {
NTabletPipe::TClientConfig pipeConfig;
pipeConfig.RetryPolicy = {.RetryLimitCount = 10};
auto pipe = NTabletPipe::CreateClient({}, MakeNodeBrokerID(), pipeConfig);
TActorId nodeBrokerPipe = ActorSystem->Register(pipe);
THolder<TEvent> event = MakeHolder<TEvent>();
event->Record.SetNodeId(nodeId);

NTabletPipe::SendData({}, nodeBrokerPipe, event.Release());
}
}

if (EnabledGrpcService) {
ActorSystem->Send(new IEventHandle(NGRpcService::CreateGrpcPublisherServiceActorId(), {}, new TEvents::TEvPoisonPill));
}
Expand Down
20 changes: 20 additions & 0 deletions ydb/core/mind/node_broker.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -846,6 +846,18 @@ void TNodeBroker::DbUpdateNodeLocation(const TNodeInfo &node,
db.Table<T>().Key(node.NodeId).Update<T::Location>(node.Location.GetSerializedLocation());
}

void TNodeBroker::DbReleaseSlotIndex(const TNodeInfo &node,
TTransactionContext &txc)
{

LOG_DEBUG_S(TActorContext::AsActorContext(), NKikimrServices::NODE_BROKER,
"Release slot index (" << node.SlotIndex << ") node " << node.IdString() );
NIceDb::TNiceDb db(txc.DB);
using T = Schema::Nodes;
db.Table<T>().Key(node.NodeId)
.UpdateToNull<T::SlotIndex>();
}

void TNodeBroker::DbUpdateNodeAuthorizedByCertificate(const TNodeInfo &node,
TTransactionContext &txc)
{
Expand Down Expand Up @@ -1016,6 +1028,14 @@ void TNodeBroker::Handle(TEvNodeBroker::TEvRegistrationRequest::TPtr &ev,
ctx.RegisterWithSameMailbox(new TResolveTenantActor(ev, SelfId()));
}

void TNodeBroker::Handle(TEvNodeBroker::TEvGracefulShutdownRequest::TPtr &ev,
const TActorContext &ctx) {
LOG_TRACE_S(ctx, NKikimrServices::NODE_BROKER, "Handle TEvNodeBroker::TEvGracefulShutdownRequest"
<< ": request# " << ev->Get()->Record.ShortDebugString());
TabletCounters->Cumulative()[COUNTER_GRACEFUL_SHUTDOWN_REQUESTS].Increment(1);
ProcessTx(CreateTxGracefulShutdown(ev), ctx);
}

void TNodeBroker::Handle(TEvNodeBroker::TEvExtendLeaseRequest::TPtr &ev,
const TActorContext &ctx)
{
Expand Down
14 changes: 14 additions & 0 deletions ydb/core/mind/node_broker.h
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,10 @@ struct TEvNodeBroker {
EvSetConfigRequest,
EvSetConfigResponse,

// decommission
EvGracefulShutdownRequest,
EvGracefulShutdownResponse,

// TODO: remove
// internal
//EvNodeExpire = EvListNodes + 512,
Expand Down Expand Up @@ -125,6 +129,11 @@ struct TEvNodeBroker {
EvRegistrationRequest> {
};

struct TEvGracefulShutdownRequest : public TEventPB<TEvGracefulShutdownRequest,
NKikimrNodeBroker::TGracefulShutdownRequest,
EvGracefulShutdownRequest> {
};

struct TEvExtendLeaseRequest : public TEventPB<TEvExtendLeaseRequest,
NKikimrNodeBroker::TExtendLeaseRequest,
EvExtendLeaseRequest> {
Expand Down Expand Up @@ -156,6 +165,11 @@ struct TEvNodeBroker {
EvRegistrationResponse> {
};

struct TEvGracefulShutdownResponse : public TEventPB<TEvGracefulShutdownResponse,
NKikimrNodeBroker::TGracefulShutdownResponse,
EvGracefulShutdownResponse> {
};

struct TEvExtendLeaseResponse : public TEventPB<TEvExtendLeaseResponse,
NKikimrNodeBroker::TExtendLeaseResponse,
EvExtendLeaseResponse> {
Expand Down
64 changes: 64 additions & 0 deletions ydb/core/mind/node_broker__graceful_shutdown.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
#include "node_broker_impl.h"

#include <ydb/core/protos/counters_node_broker.pb.h>

namespace NKikimr::NNodeBroker {

using namespace NKikimrNodeBroker;

class TNodeBroker::TTxGracefulShutdown : public TTransactionBase<TNodeBroker> {
public:
TTxGracefulShutdown(TNodeBroker *self, TEvNodeBroker::TEvGracefulShutdownRequest::TPtr &ev)
: TBase(self)
, Event(ev)
{
}

TTxType GetTxType() const override { return TXTYPE_GRACESFUL_SHUTDOWN; }

bool Execute(TTransactionContext &txc, const TActorContext &ctx) override
{
const auto& rec = Event->Get()->Record;
const auto nodeId = rec.GetNodeId();

LOG_DEBUG_S(ctx, NKikimrServices::NODE_BROKER,
"TTxGracefulShutdown Execute. Graceful Shutdown request from " << nodeId << " ");

Response = MakeHolder<TEvNodeBroker::TEvGracefulShutdownResponse>();
const auto it = Self->Nodes.find(nodeId);

if (it != Self->Nodes.end()) {
auto& node = it->second;
Self->SlotIndexesPools[node.ServicedSubDomain].Release(node.SlotIndex.value());
Self->DbReleaseSlotIndex(node, txc);
node.SlotIndex.reset();

Response->Record.MutableStatus()->SetCode(TStatus::OK);

return true;
}

Response->Record.MutableStatus()->SetCode(TStatus::ERROR);
Response->Record.MutableStatus()->SetReason(TStringBuilder() << "Cannot find node " << nodeId);

return true;
}

void Complete(const TActorContext &ctx) override
{
LOG_DEBUG(ctx, NKikimrServices::NODE_BROKER, "TTxGracefulShutdown Complete");
ctx.Send(Event->Sender, Response.Release());
Self->TxCompleted(this, ctx);
}

private:
TEvNodeBroker::TEvGracefulShutdownRequest::TPtr Event;
THolder<TEvNodeBroker::TEvGracefulShutdownResponse> Response;
};

ITransaction *TNodeBroker::CreateTxGracefulShutdown(TEvNodeBroker::TEvGracefulShutdownRequest::TPtr &ev)
{
return new TTxGracefulShutdown(this, ev);
}

} // NKikimr::NNodeBroker
8 changes: 7 additions & 1 deletion ydb/core/mind/node_broker_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,7 @@ class TNodeBroker : public TActor<TNodeBroker>
class TTxInitScheme;
class TTxLoadState;
class TTxRegisterNode;
class TTxGracefulShutdown;
class TTxUpdateConfig;
class TTxUpdateConfigSubscription;
class TTxUpdateEpoch;
Expand All @@ -156,6 +157,7 @@ class TNodeBroker : public TActor<TNodeBroker>
ITransaction *CreateTxInitScheme();
ITransaction *CreateTxLoadState();
ITransaction *CreateTxRegisterNode(TEvPrivate::TEvResolvedRegistrationRequest::TPtr &ev);
ITransaction *CreateTxGracefulShutdown(TEvNodeBroker::TEvGracefulShutdownRequest::TPtr &ev);
ITransaction *CreateTxUpdateConfig(TEvConsole::TEvConfigNotificationRequest::TPtr &ev);
ITransaction *CreateTxUpdateConfig(TEvNodeBroker::TEvSetConfigRequest::TPtr &ev);
ITransaction *CreateTxUpdateConfigSubscription(TEvConsole::TEvReplaceConfigSubscriptionsResponse::TPtr &ev);
Expand Down Expand Up @@ -202,6 +204,7 @@ class TNodeBroker : public TActor<TNodeBroker>
HFuncTraced(TEvNodeBroker::TEvListNodes, Handle);
HFuncTraced(TEvNodeBroker::TEvResolveNode, Handle);
HFuncTraced(TEvNodeBroker::TEvRegistrationRequest, Handle);
HFuncTraced(TEvNodeBroker::TEvGracefulShutdownRequest, Handle);
HFuncTraced(TEvNodeBroker::TEvExtendLeaseRequest, Handle);
HFuncTraced(TEvNodeBroker::TEvCompactTables, Handle);
HFuncTraced(TEvNodeBroker::TEvGetConfigRequest, Handle);
Expand Down Expand Up @@ -288,9 +291,10 @@ class TNodeBroker : public TActor<TNodeBroker>
TTransactionContext &txc);
void DbUpdateNodeLocation(const TNodeInfo &node,
TTransactionContext &txc);
void DbReleaseSlotIndex(const TNodeInfo &node,
TTransactionContext &txc);
void DbUpdateNodeAuthorizedByCertificate(const TNodeInfo &node,
TTransactionContext &txc);

void Handle(TEvConsole::TEvConfigNotificationRequest::TPtr &ev,
const TActorContext &ctx);
void Handle(TEvConsole::TEvReplaceConfigSubscriptionsResponse::TPtr &ev,
Expand All @@ -301,6 +305,8 @@ class TNodeBroker : public TActor<TNodeBroker>
const TActorContext &ctx);
void Handle(TEvNodeBroker::TEvRegistrationRequest::TPtr &ev,
const TActorContext &ctx);
void Handle(TEvNodeBroker::TEvGracefulShutdownRequest::TPtr &ev,
const TActorContext &ctx);
void Handle(TEvNodeBroker::TEvExtendLeaseRequest::TPtr &ev,
const TActorContext &ctx);
void Handle(TEvNodeBroker::TEvCompactTables::TPtr &ev,
Expand Down
40 changes: 40 additions & 0 deletions ydb/core/mind/node_broker_ut.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -370,6 +370,26 @@ void CheckRegistration(TTestActorRuntime &runtime,
false, path, Nothing(), name);
}

THolder<TEvNodeBroker::TEvGracefulShutdownRequest>
MakeEventGracefulShutdown (ui32 nodeId)
{
auto eventGracefulShutdown = MakeHolder<TEvNodeBroker::TEvGracefulShutdownRequest>();
eventGracefulShutdown->Record.SetNodeId(nodeId);
return eventGracefulShutdown;
}

void CheckGracefulShutdown(TTestActorRuntime &runtime,
TActorId sender,
ui32 nodeId)
{
auto eventGracefulShutdown = MakeEventGracefulShutdown(nodeId);
TAutoPtr<IEventHandle> handle;
runtime.SendToPipe(MakeNodeBrokerID(), sender, eventGracefulShutdown.Release(), 0, GetPipeConfigWithRetries());
auto replyGracefulShutdown = runtime.GrabEdgeEventRethrow<TEvNodeBroker::TEvGracefulShutdownResponse>(handle);

UNIT_ASSERT_VALUES_EQUAL(replyGracefulShutdown->Record.GetStatus().GetCode(), TStatus::OK);
}

NKikimrNodeBroker::TEpoch GetEpoch(TTestActorRuntime &runtime,
TActorId sender)
{
Expand Down Expand Up @@ -1795,4 +1815,24 @@ Y_UNIT_TEST_SUITE(TSlotIndexesPoolTest) {
}
}

Y_UNIT_TEST_SUITE(GracefulShutdown) {
Y_UNIT_TEST(TTxGracefulShutdown) {
TTestBasicRuntime runtime(8, false);
Setup(runtime, 4);
TActorId sender = runtime.AllocateEdgeActor();

auto epoch = GetEpoch(runtime, sender);

CheckRegistration(runtime, sender, "host1", 1001, "host1.yandex.net", "1.2.3.4",
1, 2, 3, 4, TStatus::OK, NODE1, epoch.GetNextEnd(),
false, DOMAIN_NAME, {}, "slot-0");

CheckGracefulShutdown(runtime, sender, NODE1);

CheckRegistration(runtime, sender, "host2", 1001, "host2.yandex.net", "1.2.3.5",
1, 2, 3, 5, TStatus::OK, NODE2, epoch.GetNextEnd(),
false, DOMAIN_NAME, {}, "slot-0");
}
}

} // NKikimr
1 change: 1 addition & 0 deletions ydb/core/mind/ya.make
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ SRCS(
lease_holder.h
local.cpp
local.h
node_broker__graceful_shutdown.cpp
node_broker.cpp
node_broker.h
node_broker_impl.h
Expand Down
2 changes: 2 additions & 0 deletions ydb/core/protos/counters_node_broker.proto
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ enum ECumulativeCounters {
COUNTER_RESOLVE_NODE_REQUESTS = 1 [(CounterOpts) = {Name: "ResolveNodeRequests"}];
COUNTER_REGISTRATION_REQUESTS = 2 [(CounterOpts) = {Name: "RegistrationRequests"}];
COUNTER_EXTEND_LEASE_REQUESTS = 3 [(CounterOpts) = {Name: "ExtendLeaseRequests"}];
COUNTER_GRACEFUL_SHUTDOWN_REQUESTS = 4 [(CounterOpts) = {Name: "GracefulShutdownRequests"}];
}

enum EPercentileCounters {
Expand Down Expand Up @@ -52,4 +53,5 @@ enum ETxTypes {
TXTYPE_UPDATE_CONFIG = 4 [(TxTypeOpts) = {Name: "TTxUpdateConfig"}];
TXTYPE_UPDATE_CONFIG_SUBSCRIPTION = 5 [(TxTypeOpts) = {Name: "TTxUpdateConfigSubscription"}];
TXTYPE_UPDATE_EPOCH = 6 [(TxTypeOpts) = {Name: "TTxUpdateEpoch"}];
TXTYPE_GRACESFUL_SHUTDOWN = 7 [(TxTypeOpts) = {Name: "TTxGracefulShutdown"}];
}
1 change: 1 addition & 0 deletions ydb/core/protos/feature_flags.proto
Original file line number Diff line number Diff line change
Expand Up @@ -177,4 +177,5 @@ message TFeatureFlags {
optional bool EnableDriveSerialsDiscovery = 152 [default = false];
optional bool EnableSeparateDiskSpaceQuotas = 153 [default = false];
optional bool EnableAntlr4Parser = 154 [default = false];
optional bool EnableReleaseNodeNameOnGracefulShutdown = 155 [default = false];
}
8 changes: 8 additions & 0 deletions ydb/core/protos/node_broker.proto
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,14 @@ message TRegistrationResponse {
optional uint64 ScopePathId = 4;
}

message TGracefulShutdownRequest {
optional uint32 NodeId = 1;
}

message TGracefulShutdownResponse {
optional TStatus Status = 1;
}

message TExtendLeaseRequest {
optional uint32 NodeId = 1;
}
Expand Down
Loading