Skip to content

Commit cbb8b9d

Browse files
committed
provide a ranking function via an allocation query
1 parent 48b670c commit cbb8b9d

File tree

3 files changed

+59
-46
lines changed

3 files changed

+59
-46
lines changed

cloud/blockstore/libs/storage/disk_registry/model/device_list.cpp

Lines changed: 32 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -275,14 +275,11 @@ NProto::TDeviceConfig TDeviceList::AllocateDevice(
275275
const TDiskId& diskId,
276276
const TAllocationQuery& query)
277277
{
278-
for (auto& kv: NodeDevices) {
279-
if (!query.NodeIds.empty() && !query.NodeIds.contains(kv.first)) {
278+
for (auto& [nodeId, nodeDevices]: NodeDevices) {
279+
if (!query.NodeIds.empty() && !query.NodeIds.contains(nodeId)) {
280280
continue;
281281
}
282282

283-
const ui32 nodeId = kv.first;
284-
auto& nodeDevices = kv.second;
285-
286283
const auto& currentRack = nodeDevices.Rack;
287284
auto& devices = nodeDevices.FreeDevices;
288285

@@ -428,10 +425,6 @@ void TDeviceList::MarkDeviceAllocated(const TDiskId& diskId, const TDeviceId& id
428425
AllocatedDevices.emplace(id, diskId);
429426
}
430427

431-
// returns a list of racks sorted by preference and then by occupied space ASC
432-
// then by free space DESC
433-
// the nodes in each rack are sorted by occupied space ASC then by free space
434-
// DESC
435428
auto TDeviceList::SelectRacks(
436429
const TAllocationQuery& query,
437430
const TString& poolName) const -> TVector<TRack>
@@ -446,7 +439,6 @@ auto TDeviceList::SelectRacks(
446439
auto& rack = racks[currentRack];
447440
rack.Id = currentRack;
448441
rack.Nodes.push_back({nodeId, 0, 0});
449-
rack.Preferred = query.PreferredRacks.contains(currentRack);
450442
};
451443

452444
if (!query.NodeIds.empty()) {
@@ -486,59 +478,64 @@ auto TDeviceList::SelectRacks(
486478

487479
TVector<TRack> result;
488480
result.reserve(racks.size());
489-
for (auto& r: racks) {
490-
result.push_back(std::move(r.second));
481+
for (auto& [_, r]: racks) {
482+
if (r.FreeSpace) {
483+
r.Preferred = query.PreferredRacks.contains(r.Id);
484+
result.push_back(std::move(r));
485+
}
491486
}
492487

493488
return result;
494489
}
495490

491+
// Returns a ranked list of nodes. Sorting is performed in stages:
492+
// - Sort racks by preference, occupied space, free space
493+
// - Sort nodes within each rack by occupied space, free space
494+
// - Optionally apply custom NodeRankingFunc to all nodes (if set in query)
496495
auto TDeviceList::RankNodes(
497496
const TAllocationQuery& query,
498-
TVector<TRack> racks) const -> TVector<TNodeInfo>
497+
TVector<TRack> racks) const -> TVector<TNodeId>
499498
{
499+
// Sort racks by preference then by occupied space ASC then by free space
500+
// DESC
500501
Sort(
501502
racks,
502-
[](const TRack& l, const TRack& r)
503+
[&](const TRack& l, const TRack& r)
503504
{
504505
return std::tie(r.Preferred, l.OccupiedSpace, r.FreeSpace, l.Id) <
505506
std::tie(l.Preferred, r.OccupiedSpace, l.FreeSpace, r.Id);
506507
});
507508

508-
TVector<TNodeInfo> nodes;
509+
TVector<TNodeId> nodeIds;
509510
{
510511
size_t size = 0;
511512
for (const auto& rack: racks) {
512513
size += rack.Nodes.size();
513514
}
514-
nodes.reserve(size);
515+
nodeIds.reserve(size);
515516
}
516517

517-
for (auto& rack: racks) {
518+
for (TRack& rack: racks) {
519+
// Sort rack's nodes by occupied space ASC then by free space DESC
518520
Sort(
519521
rack.Nodes,
520-
[](const TNodeInfo& l, const TNodeInfo& r)
522+
[](const TNodeInfo& lhs, const TNodeInfo& rhs)
521523
{
522-
return std::tie(l.OccupiedSpace, r.FreeSpace) <
523-
std::tie(r.OccupiedSpace, l.FreeSpace);
524+
return std::tie(lhs.OccupiedSpace, rhs.FreeSpace) <
525+
std::tie(rhs.OccupiedSpace, lhs.FreeSpace);
524526
});
525527

526-
nodes.insert(
527-
nodes.end(),
528-
std::make_move_iterator(rack.Nodes.begin()),
529-
std::make_move_iterator(rack.Nodes.end()));
528+
for (const TNodeInfo& node: rack.Nodes) {
529+
nodeIds.push_back(node.NodeId);
530+
}
530531
}
531532

532-
if (query.DownrankedNodeIds) {
533-
// move downranked nodes to the end of the list
534-
std::stable_partition(
535-
nodes.begin(),
536-
nodes.end(),
537-
[&](const TNodeInfo& node)
538-
{ return !query.DownrankedNodeIds.contains(node.NodeId); });
533+
// Get the final order of nodes
534+
if (query.NodeRankingFunc) {
535+
query.NodeRankingFunc(nodeIds);
539536
}
540537

541-
return nodes;
538+
return nodeIds;
542539
}
543540

544541
TVector<TDeviceList::TDeviceRange> TDeviceList::CollectDevices(
@@ -552,8 +549,8 @@ TVector<TDeviceList::TDeviceRange> TDeviceList::CollectDevices(
552549
TVector<TDeviceRange> ranges;
553550
ui64 totalSize = query.GetTotalByteCount();
554551

555-
for (const auto& node: RankNodes(query, SelectRacks(query, poolName))) {
556-
const auto* nodeDevices = NodeDevices.FindPtr(node.NodeId);
552+
for (ui32 nodeId: RankNodes(query, SelectRacks(query, poolName))) {
553+
const auto* nodeDevices = NodeDevices.FindPtr(nodeId);
557554
Y_ABORT_UNLESS(nodeDevices);
558555

559556
// finding free devices belonging to this node that match our
@@ -615,7 +612,7 @@ TVector<TDeviceList::TDeviceRange> TDeviceList::CollectDevices(
615612
}
616613

617614
if (deviceInfo.Range.first != it) {
618-
ranges.emplace_back(node.NodeId, deviceInfo.Range.first, it);
615+
ranges.emplace_back(nodeId, deviceInfo.Range.first, it);
619616
}
620617

621618
if (totalSize == 0) {

cloud/blockstore/libs/storage/disk_registry/model/device_list.h

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,14 +3,18 @@
33
#include "public.h"
44

55
#include <cloud/blockstore/libs/storage/protos/disk.pb.h>
6+
67
#include <cloud/storage/core/libs/common/error.h>
78

89
#include <util/datetime/base.h>
9-
#include <util/generic/hash_set.h>
1010
#include <util/generic/hash.h>
11+
#include <util/generic/hash_set.h>
1112
#include <util/generic/string.h>
1213
#include <util/generic/vector.h>
1314

15+
#include <functional>
16+
#include <span>
17+
1418
namespace NCloud::NBlockStore::NStorage {
1519

1620
////////////////////////////////////////////////////////////////////////////////
@@ -71,7 +75,7 @@ class TDeviceList
7175
{
7276
THashSet<TString> ForbiddenRacks;
7377
THashSet<TString> PreferredRacks;
74-
THashSet<ui32> DownrankedNodeIds;
78+
std::function<void (std::span<ui32> nodeIds)> NodeRankingFunc;
7579

7680
ui32 LogicalBlockSize = 0;
7781
ui64 BlockCount = 0;
@@ -181,7 +185,7 @@ class TDeviceList
181185
const TAllocationQuery& query,
182186
const TString& poolName) const;
183187

184-
[[nodiscard]] TVector<TNodeInfo> RankNodes(
188+
[[nodiscard]] TVector<TNodeId> RankNodes(
185189
const TAllocationQuery& query,
186190
TVector<TRack> racks) const;
187191

cloud/blockstore/libs/storage/disk_registry/model/device_list_ut.cpp

Lines changed: 20 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -508,7 +508,17 @@ Y_UNIT_TEST_SUITE(TDeviceListTest)
508508
return deviceList.AllocateDevices(
509509
"disk",
510510
{
511-
.DownrankedNodeIds = std::move(downrankedNodeIds),
511+
.NodeRankingFunc =
512+
[downrankedNodeIds](std::span<ui32> nodeIds)
513+
{
514+
auto it = std::stable_partition(
515+
nodeIds.begin(),
516+
nodeIds.end(),
517+
[&](ui32 nodeId)
518+
{ return !downrankedNodeIds.contains(nodeId); });
519+
520+
std::sort(it, nodeIds.end());
521+
},
512522
.LogicalBlockSize = DefaultBlockSize,
513523
.BlockCount = n * DefaultBlockCount,
514524
.NodeIds = std::move(nodeIds),
@@ -636,21 +646,23 @@ Y_UNIT_TEST_SUITE(TDeviceListTest)
636646
}
637647

638648
{
639-
// rack3 *[agent5: 6] 6
640-
// rack1 [agent1: 2] 2
649+
// downranked nodes are sorted by nodeId, so agent1 is selected
650+
// first, even though rack1 has less space than rack3.
651+
652+
// rack1 *[agent1: 2] 2
653+
// rack3 [agent5: 6] 6
641654

642655
auto devices = allocate(2, {}, downrankedNodes);
643656
UNIT_ASSERT_VALUES_EQUAL(2, devices.size());
644657
for (const auto& d: devices) {
645-
UNIT_ASSERT_VALUES_EQUAL_C("rack3", d.GetRack(), d);
646-
UNIT_ASSERT_VALUES_EQUAL(agent5.GetAgentId(), d.GetAgentId());
647-
UNIT_ASSERT_VALUES_EQUAL(agent5.GetNodeId(), d.GetNodeId());
658+
UNIT_ASSERT_VALUES_EQUAL_C("rack1", d.GetRack(), d);
659+
UNIT_ASSERT_VALUES_EQUAL(agent1.GetAgentId(), d.GetAgentId());
660+
UNIT_ASSERT_VALUES_EQUAL(agent1.GetNodeId(), d.GetNodeId());
648661
}
649662
}
650663

651664
{
652-
// rack3 *[agent5: 4] 4
653-
// rack1 *[agent1: 2] 2
665+
// rack3 [agent5: 6] 6
654666

655667
auto devices = allocate(6, {}, downrankedNodes);
656668
UNIT_ASSERT_VALUES_EQUAL(6, devices.size());

0 commit comments

Comments
 (0)