Skip to content

Commit ffd016b

Browse files
authored
Fix DiskRegistry-based devices request tracking (#4575)
continue #4381 1. Fixed operationId reusing 2. Fixed missed OnRequestFinished() when operation finished with error
1 parent e3c0833 commit ffd016b

11 files changed

+144
-59
lines changed

cloud/blockstore/libs/storage/partition_nonrepl/part_nonrepl_actor.cpp

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -661,14 +661,15 @@ bool TNonreplicatedPartitionActor::HandleRequests(STFUNC_SIG)
661661
ui64 TNonreplicatedPartitionActor::GenerateOperationId(
662662
size_t deviceRequestsCount)
663663
{
664+
static std::atomic<ui64> DeviceOperationIdGenerator = 1;
665+
666+
const ui64 val = DeviceOperationIdGenerator.fetch_add(deviceRequestsCount);
664667
const ui32 trackingFreq = Config->GetDeviceOperationTrackingFrequency();
665-
const ui64 operationId =
666-
(trackingFreq > 0 && DeviceOperationIdGenerator % trackingFreq == 0)
667-
? DeviceOperationIdGenerator
668-
: 0;
668+
if (trackingFreq > 0 && val % trackingFreq == 0) {
669+
return val;
670+
}
669671

670-
DeviceOperationIdGenerator += deviceRequestsCount;
671-
return operationId;
672+
return 0;
672673
}
673674

674675
////////////////////////////////////////////////////////////////////////////////

cloud/blockstore/libs/storage/partition_nonrepl/part_nonrepl_actor.h

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -89,8 +89,6 @@ class TNonreplicatedPartitionActor final
8989
GetCycleCount(),
9090
TLogTitle::TPartitionNonrepl{.DiskId = PartConfig->GetName()}};
9191

92-
ui64 DeviceOperationIdGenerator = 1;
93-
9492
public:
9593
TNonreplicatedPartitionActor(
9694
TStorageConfigPtr config,

cloud/blockstore/libs/storage/partition_nonrepl/part_nonrepl_actor_base_request.cpp

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -120,7 +120,7 @@ void TDiskAgentBaseRequestActor::OnRequestStarted(
120120
const NActors::TActorContext& ctx,
121121
const TString& deviceUUID,
122122
TDeviceOperationTracker::ERequestType requestType,
123-
ui32 cookie)
123+
size_t requestIndex)
124124
{
125125
if (!DeviceOperationId) {
126126
return;
@@ -130,24 +130,24 @@ void TDiskAgentBaseRequestActor::OnRequestStarted(
130130
TEvVolumePrivate::TEvDiskRegistryDeviceOperationStarted>(
131131
deviceUUID,
132132
requestType,
133-
DeviceOperationId + cookie);
133+
DeviceOperationId + requestIndex);
134134

135-
ctx.Send(VolumeActorId, startEvent.release());
135+
ctx.Send(VolumeActorId, std::move(startEvent));
136136
}
137137

138138
void TDiskAgentBaseRequestActor::OnRequestFinished(
139139
const NActors::TActorContext& ctx,
140-
ui32 cookie)
140+
size_t requestIndex)
141141
{
142142
if (!DeviceOperationId) {
143143
return;
144144
}
145145

146146
auto finishEvent = std::make_unique<
147147
TEvVolumePrivate::TEvDiskRegistryDeviceOperationFinished>(
148-
DeviceOperationId + cookie);
148+
DeviceOperationId + requestIndex);
149149

150-
ctx.Send(VolumeActorId, finishEvent.release());
150+
ctx.Send(VolumeActorId, std::move(finishEvent));
151151
}
152152

153153
void TDiskAgentBaseRequestActor::HandleCancelRequest(
@@ -156,6 +156,12 @@ void TDiskAgentBaseRequestActor::HandleCancelRequest(
156156
{
157157
const auto* msg = ev->Get();
158158

159+
for (size_t requestIndex = 0; requestIndex < DeviceRequests.size();
160+
++requestIndex)
161+
{
162+
OnRequestFinished(ctx, requestIndex);
163+
}
164+
159165
TVector<TString> devices;
160166
for (const auto& request: DeviceRequests) {
161167
devices.push_back(request.Device.GetDeviceUUID());

cloud/blockstore/libs/storage/partition_nonrepl/part_nonrepl_actor_base_request.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -84,9 +84,11 @@ class TDiskAgentBaseRequestActor
8484
const NActors::TActorContext& ctx,
8585
const TString& deviceUUID,
8686
TDeviceOperationTracker::ERequestType requestType,
87-
ui32 cookie);
87+
size_t requestIndex);
8888

89-
void OnRequestFinished(const NActors::TActorContext& ctx, ui32 cookie);
89+
void OnRequestFinished(
90+
const NActors::TActorContext& ctx,
91+
size_t requestIndex);
9092

9193
private:
9294
void StateWork(TAutoPtr<NActors::IEventHandle>& ev);

cloud/blockstore/libs/storage/partition_nonrepl/part_nonrepl_actor_checksumblocks.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -146,6 +146,8 @@ void TDiskAgentChecksumActor::HandleChecksumDeviceBlocksUndelivery(
146146
const TEvDiskAgent::TEvChecksumDeviceBlocksRequest::TPtr& ev,
147147
const TActorContext& ctx)
148148
{
149+
OnRequestFinished(ctx, ev->Cookie);
150+
149151
const auto& device = DeviceRequests[ev->Cookie].Device;
150152
LOG_WARN(
151153
ctx,
@@ -164,6 +166,8 @@ void TDiskAgentChecksumActor::HandleChecksumDeviceBlocksResponse(
164166
{
165167
auto* msg = ev->Get();
166168

169+
OnRequestFinished(ctx, ev->Cookie);
170+
167171
if (HasError(msg->GetError())) {
168172
HandleError(ctx, msg->GetError(), EStatus::Fail);
169173
return;
@@ -174,8 +178,6 @@ void TDiskAgentChecksumActor::HandleChecksumDeviceBlocksResponse(
174178
const auto rangeSize = deviceRequest.DeviceBlockRange.Size() * PartConfig->GetBlockSize();
175179
Checksums[rangeStart] = {msg->Record.GetChecksum(), rangeSize};
176180

177-
OnRequestFinished(ctx, ev->Cookie);
178-
179181
if (++RequestsCompleted < DeviceRequests.size()) {
180182
return;
181183
}

cloud/blockstore/libs/storage/partition_nonrepl/part_nonrepl_actor_readblocks.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -168,6 +168,8 @@ void TDiskAgentReadActor::HandleReadDeviceBlocksUndelivery(
168168
const TEvDiskAgent::TEvReadDeviceBlocksRequest::TPtr& ev,
169169
const TActorContext& ctx)
170170
{
171+
OnRequestFinished(ctx, ev->Cookie);
172+
171173
const auto& device = DeviceRequests[ev->Cookie].Device;
172174
LOG_WARN(
173175
ctx,
@@ -186,6 +188,8 @@ void TDiskAgentReadActor::HandleReadDeviceBlocksResponse(
186188
{
187189
auto* msg = ev->Get();
188190

191+
OnRequestFinished(ctx, ev->Cookie);
192+
189193
if (HasError(msg->GetError())) {
190194
HandleError(ctx, msg->GetError(), EStatus::Fail);
191195
return;
@@ -212,8 +216,6 @@ void TDiskAgentReadActor::HandleReadDeviceBlocksResponse(
212216
}
213217
}
214218

215-
OnRequestFinished(ctx, ev->Cookie);
216-
217219
if (++RequestsCompleted < DeviceRequests.size()) {
218220
return;
219221
}

cloud/blockstore/libs/storage/partition_nonrepl/part_nonrepl_actor_readblocks_local.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -170,6 +170,8 @@ void TDiskAgentReadLocalActor::HandleReadDeviceBlocksUndelivery(
170170
const TEvDiskAgent::TEvReadDeviceBlocksRequest::TPtr& ev,
171171
const TActorContext& ctx)
172172
{
173+
OnRequestFinished(ctx, ev->Cookie);
174+
173175
const auto& device = DeviceRequests[ev->Cookie].Device;
174176
LOG_WARN(
175177
ctx,
@@ -188,6 +190,8 @@ void TDiskAgentReadLocalActor::HandleReadDeviceBlocksResponse(
188190
{
189191
auto* msg = ev->Get();
190192

193+
OnRequestFinished(ctx, ev->Cookie);
194+
191195
if (HasError(msg->GetError())) {
192196
HandleError(ctx, msg->GetError(), EStatus::Fail);
193197
return;
@@ -225,8 +229,6 @@ void TDiskAgentReadLocalActor::HandleReadDeviceBlocksResponse(
225229
}
226230
}
227231

228-
OnRequestFinished(ctx, ev->Cookie);
229-
230232
if (++RequestsCompleted < DeviceRequests.size()) {
231233
return;
232234
}

cloud/blockstore/libs/storage/partition_nonrepl/part_nonrepl_actor_writeblocks.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -179,6 +179,8 @@ void TDiskAgentWriteActor::HandleWriteDeviceBlocksUndelivery(
179179
const TEvDiskAgent::TEvWriteDeviceBlocksRequest::TPtr& ev,
180180
const TActorContext& ctx)
181181
{
182+
OnRequestFinished(ctx, ev->Cookie);
183+
182184
const auto& device = DeviceRequests[ev->Cookie].Device;
183185
LOG_WARN(
184186
ctx,
@@ -197,13 +199,13 @@ void TDiskAgentWriteActor::HandleWriteDeviceBlocksResponse(
197199
{
198200
auto* msg = ev->Get();
199201

202+
OnRequestFinished(ctx, ev->Cookie);
203+
200204
if (HasError(msg->GetError())) {
201205
HandleError(ctx, msg->GetError(), EStatus::Fail);
202206
return;
203207
}
204208

205-
OnRequestFinished(ctx, ev->Cookie);
206-
207209
if (++RequestsCompleted < DeviceRequests.size()) {
208210
return;
209211
}

cloud/blockstore/libs/storage/partition_nonrepl/part_nonrepl_actor_writeblocks_multi_agent.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -182,6 +182,8 @@ void TDiskAgentMultiWriteActor::HandleWriteDeviceBlocksUndelivery(
182182
{
183183
Y_UNUSED(ev);
184184

185+
OnRequestFinished(ctx, ev->Cookie);
186+
185187
LOG_WARN(
186188
ctx,
187189
TBlockStoreComponents::PARTITION_WORKER,
@@ -199,6 +201,8 @@ void TDiskAgentMultiWriteActor::HandleWriteDeviceBlocksResponse(
199201
{
200202
const auto* msg = ev->Get();
201203

204+
OnRequestFinished(ctx, ev->Cookie);
205+
202206
auto replyInconsistentError = [&]()
203207
{
204208
auto response = std::make_unique<
@@ -240,8 +244,6 @@ void TDiskAgentMultiWriteActor::HandleWriteDeviceBlocksResponse(
240244
return;
241245
}
242246

243-
OnRequestFinished(ctx, 0);
244-
245247
Y_DEBUG_ABORT_UNLESS(error.GetCode() == S_OK);
246248
Done(ctx, MakeResponse(std::move(error)), EStatus::Success);
247249
}

cloud/blockstore/libs/storage/partition_nonrepl/part_nonrepl_actor_zeroblocks.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,8 @@ void TDiskAgentZeroActor::HandleZeroDeviceBlocksUndelivery(
144144
const TEvDiskAgent::TEvZeroDeviceBlocksRequest::TPtr& ev,
145145
const TActorContext& ctx)
146146
{
147+
OnRequestFinished(ctx, ev->Cookie);
148+
147149
const auto& device = DeviceRequests[ev->Cookie].Device;
148150
LOG_WARN(
149151
ctx,
@@ -162,13 +164,13 @@ void TDiskAgentZeroActor::HandleZeroDeviceBlocksResponse(
162164
{
163165
auto* msg = ev->Get();
164166

167+
OnRequestFinished(ctx, ev->Cookie);
168+
165169
if (HasError(msg->GetError())) {
166170
HandleError(ctx, msg->GetError(), EStatus::Fail);
167171
return;
168172
}
169173

170-
OnRequestFinished(ctx, ev->Cookie);
171-
172174
if (++RequestsCompleted < DeviceRequests.size()) {
173175
return;
174176
}

0 commit comments

Comments
 (0)