Skip to content

Commit dc9a67b

Browse files
committed
issue-4548: fix filestore-server crashing while processing a nullptr event in TIndexTabletActor::StateZombie
1 parent 0b1dad2 commit dc9a67b

File tree

3 files changed

+217
-0
lines changed

3 files changed

+217
-0
lines changed

cloud/storage/core/libs/throttling/tablet_throttler.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,10 @@ class TTabletThrottler final
6262

6363
void OnShutDown(const NActors::TActorContext&) override
6464
{
65+
if (PostponedQueueFlushInProgress) {
66+
// We are already in the process of flushing postponed requests
67+
return;
68+
}
6569
PostponedQueueFlushScheduled = false;
6670

6771
while (PostponedRequests.size()) {
@@ -71,6 +75,8 @@ class TTabletThrottler final
7175
TAutoPtr<NActors::IEventHandle> ev = x.Event.release();
7276
Owner.Receive(ev);
7377

78+
// When shutting down, we do not expect that the actor will try to
79+
// schedule flushing again
7480
Y_ABORT_UNLESS(!PostponedQueueFlushScheduled);
7581
PostponedRequests.pop_front();
7682
}
Lines changed: 200 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,200 @@
1+
#include "tablet_throttler.h"
2+
3+
#include "tablet_throttler_logger.h"
4+
#include "tablet_throttler_policy.h"
5+
6+
#include <cloud/storage/core/libs/actors/helpers.h>
7+
#include <cloud/storage/core/libs/common/context.h>
8+
9+
#include <contrib/ydb/core/testlib/basics/runtime.h>
10+
#include <contrib/ydb/library/actors/core/actor.h>
11+
#include <contrib/ydb/library/actors/core/hfunc.h>
12+
13+
#include <library/cpp/testing/unittest/registar.h>
14+
15+
namespace NCloud {
16+
17+
////////////////////////////////////////////////////////////////////////////////
18+
19+
namespace {
20+
21+
////////////////////////////////////////////////////////////////////////////////
22+
23+
using namespace NActors;
24+
25+
class TSampleActorWithThrottler final: public TActor<TSampleActorWithThrottler>
26+
{
27+
public:
28+
TSampleActorWithThrottler()
29+
: TActor(&TThis::StateWork)
30+
{}
31+
32+
void ResetThrottler(ITabletThrottlerPtr throttler)
33+
{
34+
Throttler = std::move(throttler);
35+
}
36+
37+
STRICT_STFUNC(
38+
StateWork, HFunc(NActors::TEvents::TEvWakeup, HandleWakeUp);
39+
HFunc(NActors::TEvents::TEvFlushLog, HandleFlush));
40+
41+
STRICT_STFUNC(
42+
StateZombie, HFunc(NActors::TEvents::TEvWakeup, RejectRequest);)
43+
44+
void HandleWakeUp(
45+
const NActors::TEvents::TEvWakeup::TPtr& ev,
46+
const NActors::TActorContext& ctx)
47+
{
48+
if (RequestsCount++ == 0) {
49+
// The first request should be postponed
50+
51+
auto callContext =
52+
MakeIntrusive<TCallContextBase>(static_cast<ui64>(0));
53+
auto requestInfo = TThrottlingRequestInfo{};
54+
55+
Throttler->Throttle(
56+
ctx,
57+
callContext,
58+
requestInfo,
59+
[ev]() -> NActors::IEventHandlePtr
60+
{ return NActors::IEventHandlePtr(ev.Release()); },
61+
"TestMethod");
62+
} else {
63+
Become(&TThis::StateZombie);
64+
65+
Throttler->OnShutDown(ctx);
66+
}
67+
}
68+
69+
void HandleFlush(
70+
const NActors::TEvents::TEvFlushLog::TPtr& ev,
71+
const NActors::TActorContext& ctx)
72+
{
73+
Y_UNUSED(ev);
74+
Throttler->StartFlushing(ctx);
75+
}
76+
77+
static void RejectRequest(
78+
const NActors::TEvents::TEvWakeup::TPtr& ev,
79+
const NActors::TActorContext& ctx)
80+
{
81+
auto response = std::make_unique<NActors::TEvents::TEvActorDied>();
82+
NCloud::Reply(ctx, *ev, std::move(response));
83+
}
84+
85+
private:
86+
ITabletThrottlerPtr Throttler;
87+
88+
ui64 RequestsCount = 0;
89+
};
90+
91+
/////////////////////////////////////////////////////////////////////////////
92+
93+
struct TTabletThrottlerLoggerStub: public ITabletThrottlerLogger
94+
{
95+
void LogRequestPostponedBeforeSchedule(
96+
const NActors::TActorContext& ctx,
97+
TCallContextBase& callContext,
98+
TDuration delay,
99+
const char* methodName) const override
100+
{
101+
Y_UNUSED(ctx, callContext, delay, methodName);
102+
}
103+
104+
void LogRequestPostponedAfterSchedule(
105+
const NActors::TActorContext& ctx,
106+
TCallContextBase& callContext,
107+
ui32 postponedCount,
108+
const char* methodName) const override
109+
{
110+
Y_UNUSED(ctx, callContext, postponedCount, methodName);
111+
}
112+
113+
void LogRequestAdvanced(
114+
const NActors::TActorContext& ctx,
115+
TCallContextBase& callContext,
116+
const char* methodName,
117+
ui32 opType,
118+
TDuration delay) const override
119+
{
120+
Y_UNUSED(ctx, callContext, methodName, opType, delay);
121+
}
122+
};
123+
124+
//////////////////////////////////////////////////////////////////////////////
125+
126+
struct TTabletThrottlerPolicyAlwaysPostpone: public ITabletThrottlerPolicy
127+
{
128+
bool TryPostpone(
129+
TInstant ts,
130+
const TThrottlingRequestInfo& requestInfo) override
131+
{
132+
Y_UNUSED(ts, requestInfo);
133+
return true;
134+
}
135+
136+
TMaybe<TDuration> SuggestDelay(
137+
TInstant ts,
138+
TDuration queueTime,
139+
const TThrottlingRequestInfo& requestInfo) override
140+
{
141+
Y_UNUSED(ts, queueTime, requestInfo);
142+
return TDuration::Seconds(1);
143+
}
144+
145+
void OnPostponedEvent(
146+
TInstant ts,
147+
const TThrottlingRequestInfo& requestInfo) override
148+
{
149+
Y_UNUSED(ts, requestInfo);
150+
}
151+
};
152+
153+
} // namespace
154+
155+
Y_UNIT_TEST_SUITE(TTabletThrottlerTest)
156+
{
157+
/**
158+
* Scenario that caused a crash:
159+
* 1. A request is present in the postponed queue
160+
* 2. Throttler flush is initiated
161+
* 3. During the processing of the postponed queue, an actor shutdown is
162+
* initiated
163+
* 4. During the shutdown, Throttle->OnShutDown is called. It is not
164+
* supposed to process the request that was initially in the postponed
165+
* queue the second time.
166+
*/
167+
Y_UNIT_TEST(ShouldNotFlushNullEventOnShutdown)
168+
{
169+
TTabletThrottlerLoggerStub logger;
170+
TTabletThrottlerPolicyAlwaysPostpone policy;
171+
172+
std::unique_ptr<TSampleActorWithThrottler> actor =
173+
std::make_unique<TSampleActorWithThrottler>();
174+
auto throttler = CreateTabletThrottler(*actor, logger, policy);
175+
actor->ResetThrottler(std::move(throttler));
176+
177+
TTestActorRuntimeBase runtime;
178+
runtime.Initialize();
179+
180+
auto senderId = runtime.AllocateEdgeActor();
181+
182+
auto actorId = runtime.Register(actor.release());
183+
184+
// One request is postponed
185+
runtime.Send(
186+
TAutoPtr<IEventHandle>(new IEventHandle(
187+
actorId,
188+
senderId,
189+
new NActors::TEvents::TEvWakeup())));
190+
191+
// Flush is initiated
192+
runtime.Send(
193+
TAutoPtr<IEventHandle>(new IEventHandle(
194+
actorId,
195+
senderId,
196+
new NActors::TEvents::TEvFlushLog())));
197+
}
198+
}
199+
200+
} // namespace NCloud
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,19 @@
11
UNITTEST_FOR(cloud/storage/core/libs/throttling)
22

3+
INCLUDE(${ARCADIA_ROOT}/cloud/storage/core/tests/recipes/small.inc)
4+
5+
SPLIT_FACTOR(1)
6+
37
SRCS(
48
helpers_ut.cpp
59
leaky_bucket_ut.cpp
10+
tablet_throttler_ut.cpp
11+
)
12+
13+
PEERDIR(
14+
contrib/ydb/core/testlib
15+
contrib/ydb/core/testlib/basics
16+
contrib/ydb/core/testlib/default
617
)
718

819
END()

0 commit comments

Comments
 (0)