Skip to content

Commit 3719b7a

Browse files
committed
provide a ranking function via an allocation query
1 parent 48b670c commit 3719b7a

File tree

3 files changed

+64
-44
lines changed

3 files changed

+64
-44
lines changed

cloud/blockstore/libs/storage/disk_registry/model/device_list.cpp

Lines changed: 39 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -428,10 +428,6 @@ void TDeviceList::MarkDeviceAllocated(const TDiskId& diskId, const TDeviceId& id
428428
AllocatedDevices.emplace(id, diskId);
429429
}
430430

431-
// returns a list of racks sorted by preference and then by occupied space ASC
432-
// then by free space DESC
433-
// the nodes in each rack are sorted by occupied space ASC then by free space
434-
// DESC
435431
auto TDeviceList::SelectRacks(
436432
const TAllocationQuery& query,
437433
const TString& poolName) const -> TVector<TRack>
@@ -446,7 +442,6 @@ auto TDeviceList::SelectRacks(
446442
auto& rack = racks[currentRack];
447443
rack.Id = currentRack;
448444
rack.Nodes.push_back({nodeId, 0, 0});
449-
rack.Preferred = query.PreferredRacks.contains(currentRack);
450445
};
451446

452447
if (!query.NodeIds.empty()) {
@@ -486,59 +481,71 @@ auto TDeviceList::SelectRacks(
486481

487482
TVector<TRack> result;
488483
result.reserve(racks.size());
489-
for (auto& r: racks) {
490-
result.push_back(std::move(r.second));
484+
for (auto& [_, r]: racks) {
485+
if (r.FreeSpace) {
486+
result.push_back(std::move(r));
487+
}
491488
}
492489

493490
return result;
494491
}
495492

493+
// Returns a ranked list of nodes. Sorting is performed in stages:
494+
// - Sort racks by preference, occupied space, free space
495+
// - Sort nodes within each rack by occupied space, free space
496+
// - Optionally apply custom NodeRankingFunc to all nodes (if set in query)
496497
auto TDeviceList::RankNodes(
497498
const TAllocationQuery& query,
498-
TVector<TRack> racks) const -> TVector<TNodeInfo>
499+
TVector<TRack> racks) const -> TVector<TNodeId>
499500
{
501+
// Sort racks by occupied space ASC then by free space DESC
500502
Sort(
501503
racks,
502-
[](const TRack& l, const TRack& r)
504+
[](const TRack& lhs, const TRack& rhs)
503505
{
504-
return std::tie(r.Preferred, l.OccupiedSpace, r.FreeSpace, l.Id) <
505-
std::tie(l.Preferred, r.OccupiedSpace, l.FreeSpace, r.Id);
506+
return std::tie(lhs.OccupiedSpace, rhs.FreeSpace, lhs.Id) <
507+
std::tie(rhs.OccupiedSpace, lhs.FreeSpace, rhs.Id);
506508
});
507509

508-
TVector<TNodeInfo> nodes;
510+
// Move preferred racks to the start of the list
511+
if (query.PreferredRacks) {
512+
std::stable_partition(
513+
racks.begin(),
514+
racks.end(),
515+
[&](const TRack& rack)
516+
{ return query.PreferredRacks.contains(rack.Id); });
517+
}
518+
519+
TVector<TNodeId> nodeIds;
509520
{
510521
size_t size = 0;
511522
for (const auto& rack: racks) {
512523
size += rack.Nodes.size();
513524
}
514-
nodes.reserve(size);
525+
nodeIds.reserve(size);
515526
}
516527

517-
for (auto& rack: racks) {
528+
for (TRack& rack: racks) {
529+
// Sort rack's nodes by occupied space ASC then by free space DESC
518530
Sort(
519531
rack.Nodes,
520-
[](const TNodeInfo& l, const TNodeInfo& r)
532+
[](const TNodeInfo& lhs, const TNodeInfo& rhs)
521533
{
522-
return std::tie(l.OccupiedSpace, r.FreeSpace) <
523-
std::tie(r.OccupiedSpace, l.FreeSpace);
534+
return std::tie(lhs.OccupiedSpace, rhs.FreeSpace) <
535+
std::tie(rhs.OccupiedSpace, lhs.FreeSpace);
524536
});
525537

526-
nodes.insert(
527-
nodes.end(),
528-
std::make_move_iterator(rack.Nodes.begin()),
529-
std::make_move_iterator(rack.Nodes.end()));
538+
for (const TNodeInfo& node: rack.Nodes) {
539+
nodeIds.push_back(node.NodeId);
540+
}
530541
}
531542

532-
if (query.DownrankedNodeIds) {
533-
// move downranked nodes to the end of the list
534-
std::stable_partition(
535-
nodes.begin(),
536-
nodes.end(),
537-
[&](const TNodeInfo& node)
538-
{ return !query.DownrankedNodeIds.contains(node.NodeId); });
543+
// Get the final order of nodes
544+
if (query.NodeRankingFunc) {
545+
query.NodeRankingFunc(nodeIds);
539546
}
540547

541-
return nodes;
548+
return nodeIds;
542549
}
543550

544551
TVector<TDeviceList::TDeviceRange> TDeviceList::CollectDevices(
@@ -552,8 +559,8 @@ TVector<TDeviceList::TDeviceRange> TDeviceList::CollectDevices(
552559
TVector<TDeviceRange> ranges;
553560
ui64 totalSize = query.GetTotalByteCount();
554561

555-
for (const auto& node: RankNodes(query, SelectRacks(query, poolName))) {
556-
const auto* nodeDevices = NodeDevices.FindPtr(node.NodeId);
562+
for (ui32 nodeId: RankNodes(query, SelectRacks(query, poolName))) {
563+
const auto* nodeDevices = NodeDevices.FindPtr(nodeId);
557564
Y_ABORT_UNLESS(nodeDevices);
558565

559566
// finding free devices belonging to this node that match our
@@ -615,7 +622,7 @@ TVector<TDeviceList::TDeviceRange> TDeviceList::CollectDevices(
615622
}
616623

617624
if (deviceInfo.Range.first != it) {
618-
ranges.emplace_back(node.NodeId, deviceInfo.Range.first, it);
625+
ranges.emplace_back(nodeId, deviceInfo.Range.first, it);
619626
}
620627

621628
if (totalSize == 0) {

cloud/blockstore/libs/storage/disk_registry/model/device_list.h

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,14 +3,18 @@
33
#include "public.h"
44

55
#include <cloud/blockstore/libs/storage/protos/disk.pb.h>
6+
67
#include <cloud/storage/core/libs/common/error.h>
78

89
#include <util/datetime/base.h>
9-
#include <util/generic/hash_set.h>
1010
#include <util/generic/hash.h>
11+
#include <util/generic/hash_set.h>
1112
#include <util/generic/string.h>
1213
#include <util/generic/vector.h>
1314

15+
#include <functional>
16+
#include <span>
17+
1418
namespace NCloud::NBlockStore::NStorage {
1519

1620
////////////////////////////////////////////////////////////////////////////////
@@ -38,7 +42,6 @@ class TDeviceList
3842
TVector<TNodeInfo> Nodes;
3943
ui64 FreeSpace = 0;
4044
ui64 OccupiedSpace = 0;
41-
bool Preferred = false;
4245
};
4346

4447
struct TNodeDevices
@@ -71,7 +74,7 @@ class TDeviceList
7174
{
7275
THashSet<TString> ForbiddenRacks;
7376
THashSet<TString> PreferredRacks;
74-
THashSet<ui32> DownrankedNodeIds;
77+
std::function<void (std::span<ui32> nodeIds)> NodeRankingFunc;
7578

7679
ui32 LogicalBlockSize = 0;
7780
ui64 BlockCount = 0;
@@ -181,7 +184,7 @@ class TDeviceList
181184
const TAllocationQuery& query,
182185
const TString& poolName) const;
183186

184-
[[nodiscard]] TVector<TNodeInfo> RankNodes(
187+
[[nodiscard]] TVector<TNodeId> RankNodes(
185188
const TAllocationQuery& query,
186189
TVector<TRack> racks) const;
187190

cloud/blockstore/libs/storage/disk_registry/model/device_list_ut.cpp

Lines changed: 18 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -508,7 +508,17 @@ Y_UNIT_TEST_SUITE(TDeviceListTest)
508508
return deviceList.AllocateDevices(
509509
"disk",
510510
{
511-
.DownrankedNodeIds = std::move(downrankedNodeIds),
511+
.NodeRankingFunc =
512+
[downrankedNodeIds](std::span<ui32> nodeIds)
513+
{
514+
auto it = std::stable_partition(
515+
nodeIds.begin(),
516+
nodeIds.end(),
517+
[&](ui32 nodeId)
518+
{ return !downrankedNodeIds.contains(nodeId); });
519+
520+
std::sort(it, nodeIds.end());
521+
},
512522
.LogicalBlockSize = DefaultBlockSize,
513523
.BlockCount = n * DefaultBlockCount,
514524
.NodeIds = std::move(nodeIds),
@@ -636,21 +646,21 @@ Y_UNIT_TEST_SUITE(TDeviceListTest)
636646
}
637647

638648
{
639-
// rack3 *[agent5: 6] 6
640-
// rack1 [agent1: 2] 2
649+
// downranked nodes are sorted by nodeId, so agent1 is first
650+
// rack1 *[agent1: 2] 2
651+
// rack3 [agent5: 6] 6
641652

642653
auto devices = allocate(2, {}, downrankedNodes);
643654
UNIT_ASSERT_VALUES_EQUAL(2, devices.size());
644655
for (const auto& d: devices) {
645-
UNIT_ASSERT_VALUES_EQUAL_C("rack3", d.GetRack(), d);
646-
UNIT_ASSERT_VALUES_EQUAL(agent5.GetAgentId(), d.GetAgentId());
647-
UNIT_ASSERT_VALUES_EQUAL(agent5.GetNodeId(), d.GetNodeId());
656+
UNIT_ASSERT_VALUES_EQUAL_C("rack1", d.GetRack(), d);
657+
UNIT_ASSERT_VALUES_EQUAL(agent1.GetAgentId(), d.GetAgentId());
658+
UNIT_ASSERT_VALUES_EQUAL(agent1.GetNodeId(), d.GetNodeId());
648659
}
649660
}
650661

651662
{
652-
// rack3 *[agent5: 4] 4
653-
// rack1 *[agent1: 2] 2
663+
// rack3 [agent5: 6] 6
654664

655665
auto devices = allocate(6, {}, downrankedNodes);
656666
UNIT_ASSERT_VALUES_EQUAL(6, devices.size());

0 commit comments

Comments
 (0)