Skip to content

Commit 4d4d83d

Browse files
authored
more detailed information about error from blob cache (#16365)
1 parent 5b5dbc5 commit 4d4d83d

File tree

5 files changed

+16
-12
lines changed

5 files changed

+16
-12
lines changed

ydb/core/tx/columnshard/blob_cache.cpp

+9-7
Original file line numberDiff line numberDiff line change
@@ -237,7 +237,7 @@ class TBlobCache: public TActorBootstrapped<TBlobCache> {
237237
if (it != Cache.End()) {
238238
Hits->Inc();
239239
HitsBytes->Add(blobRange.Size);
240-
SendResult(sender, blobRange, NKikimrProto::OK, it.Value(), ctx, true);
240+
SendResult(sender, blobRange, NKikimrProto::OK, it.Value(), {}, ctx, true);
241241
return true;
242242
}
243243

@@ -423,10 +423,10 @@ class TBlobCache: public TActorBootstrapped<TBlobCache> {
423423
}
424424

425425
void SendResult(const TActorId& to, const TBlobRange& blobRange, NKikimrProto::EReplyStatus status,
426-
const TString& data, const TActorContext& ctx, const bool fromCache = false) {
426+
const TString& data, const TString& detailedError, const TActorContext& ctx, const bool fromCache = false) {
427427
LOG_S_DEBUG("Send result: " << blobRange << " to: " << to << " status: " << status);
428428

429-
ctx.Send(to, new TEvBlobCache::TEvReadBlobRangeResult(blobRange, status, data, fromCache));
429+
ctx.Send(to, new TEvBlobCache::TEvReadBlobRangeResult(blobRange, status, data, detailedError, fromCache));
430430
}
431431

432432
void Handle(TEvBlobStorage::TEvGetResult::TPtr& ev, const TActorContext& ctx) {
@@ -436,7 +436,9 @@ class TBlobCache: public TActorBootstrapped<TBlobCache> {
436436
Y_ABORT("Unexpected reply from blobstorage");
437437
}
438438

439+
TString detailedError;
439440
if (ev->Get()->Status != NKikimrProto::EReplyStatus::OK) {
441+
detailedError = ev->Get()->ToString();
440442
AFL_WARN(NKikimrServices::BLOB_CACHE)("fail", ev->Get()->ToString());
441443
ReadSimpleFailedBytes->Add(ev->Get()->ResponseSz);
442444
ReadSimpleFailedCount->Add(1);
@@ -458,14 +460,14 @@ class TBlobCache: public TActorBootstrapped<TBlobCache> {
458460

459461
for (size_t i = 0; i < ev->Get()->ResponseSz; ++i) {
460462
const auto& res = ev->Get()->Responses[i];
461-
ProcessSingleRangeResult(blobRanges[i], readCookie, res.Status, res.Buffer.ConvertToString(), ctx);
463+
ProcessSingleRangeResult(blobRanges[i], readCookie, res.Status, res.Buffer.ConvertToString(), detailedError, ctx);
462464
}
463465

464466
MakeReadRequests(ctx);
465467
}
466468

467469
void ProcessSingleRangeResult(const TBlobRange& blobRange, const ui64 readCookie,
468-
ui32 status, const TString& data, const TActorContext& ctx) noexcept
470+
ui32 status, const TString& data, const TString& detailedError, const TActorContext& ctx) noexcept
469471
{
470472
AFL_DEBUG(NKikimrServices::BLOB_CACHE)("ProcessSingleRangeResult", blobRange);
471473
auto readIt = OutstandingReads.find(blobRange);
@@ -500,7 +502,7 @@ class TBlobCache: public TActorBootstrapped<TBlobCache> {
500502
AFL_DEBUG(NKikimrServices::BLOB_CACHE)("ProcessSingleRangeResult", blobRange)("send_replies", readIt->second.Waiting.size());
501503
// Send results to all waiters
502504
for (const auto& to : readIt->second.Waiting) {
503-
SendResult(to, blobRange, (NKikimrProto::EReplyStatus)status, data, ctx);
505+
SendResult(to, blobRange, (NKikimrProto::EReplyStatus)status, data, detailedError, ctx);
504506
}
505507

506508
OutstandingReads.erase(readIt);
@@ -525,7 +527,7 @@ class TBlobCache: public TActorBootstrapped<TBlobCache> {
525527

526528
for (size_t i = 0; i < blobRanges.size(); ++i) {
527529
Y_ABORT_UNLESS(blobRanges[i].BlobId.GetTabletId() == tabletId);
528-
ProcessSingleRangeResult(blobRanges[i], readCookie, NKikimrProto::EReplyStatus::NOTREADY, {}, ctx);
530+
ProcessSingleRangeResult(blobRanges[i], readCookie, NKikimrProto::EReplyStatus::NOTREADY, {}, {}, ctx);
529531
}
530532
}
531533

ydb/core/tx/columnshard/blob_cache.h

+3-1
Original file line numberDiff line numberDiff line change
@@ -72,14 +72,16 @@ struct TEvBlobCache {
7272
TBlobRange BlobRange;
7373
NKikimrProto::EReplyStatus Status;
7474
TString Data;
75+
TString DetailedError;
7576
const bool FromCache = false;
7677
const TInstant ConstructTime = Now();
7778
const TString DataSourceId;
7879

79-
TEvReadBlobRangeResult(const TBlobRange& blobRange, NKikimrProto::EReplyStatus status, const TString& data, const bool fromCache = false, const TString& dataSourceId = Default<TString>())
80+
TEvReadBlobRangeResult(const TBlobRange& blobRange, NKikimrProto::EReplyStatus status, const TString& data, const TString& detailedError, const bool fromCache = false, const TString& dataSourceId = Default<TString>())
8081
: BlobRange(blobRange)
8182
, Status(status)
8283
, Data(data)
84+
, DetailedError(detailedError)
8385
, FromCache(fromCache)
8486
, DataSourceId(dataSourceId)
8587
{}

ydb/core/tx/columnshard/blobs_action/tier/adapter.cpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -16,11 +16,11 @@ std::unique_ptr<NActors::IEventBase> TRepliesAdapter::RebuildReplyEvent(std::uni
1616
}
1717
if (ev->IsSuccess()) {
1818
AFL_VERIFY(!!ev->Body)("key", ev->Key)("interval_from", ev->GetReadInterval().first)("interval_to", ev->GetReadInterval().second);
19-
return std::make_unique<NBlobCache::TEvBlobCache::TEvReadBlobRangeResult>(bRange, NKikimrProto::EReplyStatus::OK, ev->Body, false, StorageId);
19+
return std::make_unique<NBlobCache::TEvBlobCache::TEvReadBlobRangeResult>(bRange, NKikimrProto::EReplyStatus::OK, ev->Body, TString{}, false, StorageId);
2020
} else {
2121
AFL_DEBUG(NKikimrServices::TX_TIERING)("event", "s3_request_failed")("request_type", "get_object")(
2222
"exception", ev->GetError().GetExceptionName())("message", ev->GetError().GetMessage())("storage_id", StorageId)("blob", logoBlobId);
23-
return std::make_unique<NBlobCache::TEvBlobCache::TEvReadBlobRangeResult>(bRange, NKikimrProto::EReplyStatus::ERROR, TStringBuilder() << ev->Result, false, StorageId);
23+
return std::make_unique<NBlobCache::TEvBlobCache::TEvReadBlobRangeResult>(bRange, NKikimrProto::EReplyStatus::ERROR, TStringBuilder() << ev->Result, TStringBuilder{} << ev->GetError().GetExceptionName() << ", " << ev->GetError().GetMessage(), false, StorageId);
2424
}
2525
}
2626

ydb/core/tx/columnshard/blobs_reader/actor.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ void TActor::Handle(NBlobCache::TEvBlobCache::TEvReadBlobRangeResult::TPtr& ev)
1515
bool aborted = false;
1616
if (event.Status != NKikimrProto::EReplyStatus::OK) {
1717
WaitingBlobsCount.Sub(Task->GetWaitingRangesCount());
18-
if (!Task->AddError(event.DataSourceId, event.BlobRange, IBlobsReadingAction::TErrorStatus::Fail(event.Status, "cannot get blob: " + event.Data.substr(0, 1024)))) {
18+
if (!Task->AddError(event.DataSourceId, event.BlobRange, IBlobsReadingAction::TErrorStatus::Fail(event.Status, "cannot get blob: " + event.Data.substr(0, 1024) + ", detailed error: " + event.DetailedError))) {
1919
aborted = true;
2020
}
2121
} else {

ydb/core/tx/columnshard/blobs_reader/read_coordinator.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ void TReadCoordinatorActor::Handle(NBlobCache::TEvBlobCache::TEvReadBlobRangeRes
1818
auto tasks = BlobTasks.Extract(event.DataSourceId, event.BlobRange);
1919
for (auto&& i : tasks) {
2020
if (event.Status != NKikimrProto::EReplyStatus::OK) {
21-
i->AddError(event.DataSourceId, event.BlobRange, IBlobsReadingAction::TErrorStatus::Fail(event.Status, "cannot get blob"));
21+
i->AddError(event.DataSourceId, event.BlobRange, IBlobsReadingAction::TErrorStatus::Fail(event.Status, "cannot get blob, detailed error: " + event.DetailedError));
2222
} else {
2323
i->AddData(event.DataSourceId, event.BlobRange, event.Data);
2424
}

0 commit comments

Comments
 (0)