-
Notifications
You must be signed in to change notification settings - Fork 48
Cluster Map Implementation #435
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from all commits
Commits
Show all changes
15 commits
Select commit
Hold shift + click to select a range
0dddef2
cluster map object implementation; refresh cluster map at start of fa…
boda26 b703b40
update cluster map implementation
boda26 983fc0c
separate ip and port in cluster map, remove coordinator in vmsdk
boda26 3e07e03
fix spellchck
boda26 80b8a1e
add cluster map unit tests; refactor cluster map code
boda26 463f1bf
multiple fix
boda26 f074561
restore settings.json
boda26 03c26b2
Update inconsistent flag for duplicate SocketAddress for different no…
boda26 ce6ba1a
multiple fix according to comments; change from ip to primary_endpoin…
boda26 465c45a
fix pointer invalidation issue in btree_map
boda26 005fcec
restore settings.json
boda26 6b7a581
marking GetRandomTargets as const function
boda26 28b0b7a
multiple minor fixes
boda26 085737b
fix clang tidy
boda26 3be207e
add invalid node config for test
boda26 File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -14,6 +14,7 @@ | |
| #include <thread> | ||
|
|
||
| #include "absl/synchronization/mutex.h" | ||
| #include "cluster_map.h" | ||
| #include "grpcpp/support/status.h" | ||
| #include "src/coordinator/client_pool.h" | ||
| #include "src/metrics.h" | ||
|
|
@@ -30,7 +31,8 @@ namespace valkey_search::query::fanout { | |
|
|
||
| constexpr unsigned kNoValkeyTimeout = 86400000; | ||
|
|
||
| template <typename Request, typename Response, FanoutTargetMode kTargetMode> | ||
| template <typename Request, typename Response, | ||
| vmsdk::cluster_map::FanoutTargetMode kTargetMode> | ||
| class FanoutOperationBase { | ||
| public: | ||
| explicit FanoutOperationBase() = default; | ||
|
|
@@ -43,7 +45,9 @@ class FanoutOperationBase { | |
| blocked_client_->MeasureTimeStart(); | ||
| deadline_tp_ = std::chrono::steady_clock::now() + | ||
| std::chrono::milliseconds(GetTimeoutMs()); | ||
| targets_ = GetTargets(ctx); | ||
| // Ensure cluster map is fresh | ||
| ValkeySearch::Instance().GetOrRefreshClusterMap(ctx); | ||
| targets_ = GetTargets(); | ||
| StartFanoutRound(); | ||
| } | ||
|
|
||
|
|
@@ -85,16 +89,14 @@ class FanoutOperationBase { | |
| } | ||
| } | ||
|
|
||
| std::vector<FanoutSearchTarget> GetTargets(ValkeyModuleCtx* ctx) const { | ||
| return query::fanout::FanoutTemplate::GetTargets(ctx, kTargetMode); | ||
| } | ||
| virtual std::vector<vmsdk::cluster_map::NodeInfo> GetTargets() const = 0; | ||
|
|
||
| void IssueRpc(const FanoutSearchTarget& target, const Request& request, | ||
| unsigned timeout_ms) { | ||
| void IssueRpc(const vmsdk::cluster_map::NodeInfo& target, | ||
| const Request& request, unsigned timeout_ms) { | ||
| coordinator::ClientPool* client_pool_ = | ||
| ValkeySearch::Instance().GetCoordinatorClientPool(); | ||
|
|
||
| if (target.type == FanoutSearchTarget::Type::kLocal) { | ||
| if (target.is_local) { | ||
| vmsdk::RunByMain([this, target, request]() { | ||
| auto [status, resp] = this->GetLocalResponse(request, target); | ||
| if (status.ok()) { | ||
|
|
@@ -110,12 +112,15 @@ class FanoutOperationBase { | |
| this->RpcDone(); | ||
| }); | ||
| } else { | ||
| auto client = client_pool_->GetClient(target.address); | ||
| std::string client_address = absl::StrCat( | ||
| target.socket_address.primary_endpoint, ":", | ||
| coordinator::GetCoordinatorPort(target.socket_address.port)); | ||
| auto client = client_pool_->GetClient(client_address); | ||
| if (!client) { | ||
| ++Metrics::GetStats().info_fanout_fail_cnt; | ||
| VMSDK_LOG_EVERY_N_SEC(WARNING, nullptr, 1) | ||
| << "FANOUT_DEBUG: Found invalid client on target " | ||
| << target.address; | ||
| << client_address; | ||
| this->OnError(grpc::Status(grpc::StatusCode::INTERNAL, ""), | ||
| coordinator::FanoutErrorType::COMMUNICATION_ERROR, | ||
| target); | ||
|
|
@@ -124,14 +129,14 @@ class FanoutOperationBase { | |
| } | ||
| this->InvokeRemoteRpc( | ||
| client.get(), request, | ||
| [this, target](grpc::Status status, Response& resp) { | ||
| [this, target, client_address](grpc::Status status, Response& resp) { | ||
| if (status.ok()) { | ||
| this->OnResponse(resp, target); | ||
| } else { | ||
| ++Metrics::GetStats().info_fanout_fail_cnt; | ||
| VMSDK_LOG_EVERY_N_SEC(WARNING, nullptr, 1) | ||
| << "FANOUT_DEBUG: InvokeRemoteRpc error on target " | ||
| << target.address << ", status code: " << status.error_code() | ||
| << client_address << ", status code: " << status.error_code() | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. same, here you're passing duplicate information. Imagine if there was a programming error that caused the client_address to not match target->GetAddress(), then the log message would actually hide the problem. |
||
| << ", error message: " << status.error_message(); | ||
| // if grpc failed, the response is invalid, so we need to manually | ||
| // set the error type | ||
|
|
@@ -151,7 +156,7 @@ class FanoutOperationBase { | |
| } | ||
|
|
||
| virtual std::pair<grpc::Status, Response> GetLocalResponse( | ||
| const Request&, [[maybe_unused]] const FanoutSearchTarget&) = 0; | ||
| const Request&, [[maybe_unused]] const vmsdk::cluster_map::NodeInfo&) = 0; | ||
|
|
||
| virtual void InvokeRemoteRpc(coordinator::Client*, const Request&, | ||
| std::function<void(grpc::Status, Response&)>, | ||
|
|
@@ -160,14 +165,15 @@ class FanoutOperationBase { | |
| virtual unsigned GetTimeoutMs() const = 0; | ||
|
|
||
| virtual Request GenerateRequest( | ||
| [[maybe_unused]] const FanoutSearchTarget&) = 0; | ||
| [[maybe_unused]] const vmsdk::cluster_map::NodeInfo&) = 0; | ||
|
|
||
| virtual void OnResponse(const Response&, | ||
| [[maybe_unused]] const FanoutSearchTarget&) = 0; | ||
| virtual void OnResponse( | ||
| const Response&, | ||
| [[maybe_unused]] const vmsdk::cluster_map::NodeInfo&) = 0; | ||
|
|
||
| virtual void OnError(grpc::Status status, | ||
| coordinator::FanoutErrorType error_type, | ||
| const FanoutSearchTarget& target) { | ||
| const vmsdk::cluster_map::NodeInfo& target) { | ||
| absl::MutexLock lock(&mutex_); | ||
| if (error_type == coordinator::FanoutErrorType::INDEX_NAME_ERROR) { | ||
| index_name_error_nodes.push_back(target); | ||
|
|
@@ -205,39 +211,51 @@ class FanoutOperationBase { | |
| // Log index name errors | ||
| if (!index_name_error_nodes.empty()) { | ||
| error_message = "Index name not found."; | ||
| for (const FanoutSearchTarget& target : index_name_error_nodes) { | ||
| if (target.type == FanoutSearchTarget::Type::kLocal) { | ||
| for (const vmsdk::cluster_map::NodeInfo& target : | ||
| index_name_error_nodes) { | ||
| if (target.is_local) { | ||
| VMSDK_LOG_EVERY_N_SEC(WARNING, ctx, 1) | ||
| << INDEX_NAME_ERROR_LOG_PREFIX << "LOCAL NODE"; | ||
| } else { | ||
| VMSDK_LOG_EVERY_N_SEC(WARNING, ctx, 1) | ||
| << INDEX_NAME_ERROR_LOG_PREFIX << target.address; | ||
| << INDEX_NAME_ERROR_LOG_PREFIX | ||
| << absl::StrCat(target.socket_address.primary_endpoint, ":", | ||
| coordinator::GetCoordinatorPort( | ||
| target.socket_address.port)); | ||
| } | ||
| } | ||
| } | ||
| // Log communication errors | ||
| if (!communication_error_nodes.empty()) { | ||
| error_message = "Communication error between nodes found."; | ||
| for (const FanoutSearchTarget& target : communication_error_nodes) { | ||
| if (target.type == FanoutSearchTarget::Type::kLocal) { | ||
| for (const vmsdk::cluster_map::NodeInfo& target : | ||
| communication_error_nodes) { | ||
| if (target.is_local) { | ||
| VMSDK_LOG_EVERY_N_SEC(WARNING, ctx, 1) | ||
| << COMMUNICATION_ERROR_LOG_PREFIX << "LOCAL NODE"; | ||
| } else { | ||
| VMSDK_LOG_EVERY_N_SEC(WARNING, ctx, 1) | ||
| << COMMUNICATION_ERROR_LOG_PREFIX << target.address; | ||
| << COMMUNICATION_ERROR_LOG_PREFIX | ||
| << absl::StrCat(target.socket_address.primary_endpoint, ":", | ||
| coordinator::GetCoordinatorPort( | ||
| target.socket_address.port)); | ||
| } | ||
| } | ||
| } | ||
| // Log inconsistent state errors | ||
| if (!inconsistent_state_error_nodes.empty()) { | ||
| error_message = "Inconsistent index state error found."; | ||
| for (const FanoutSearchTarget& target : inconsistent_state_error_nodes) { | ||
| if (target.type == FanoutSearchTarget::Type::kLocal) { | ||
| for (const vmsdk::cluster_map::NodeInfo& target : | ||
| inconsistent_state_error_nodes) { | ||
| if (target.is_local) { | ||
| VMSDK_LOG_EVERY_N_SEC(WARNING, ctx, 1) | ||
| << INCONSISTENT_STATE_ERROR_LOG_PREFIX << "LOCAL NODE"; | ||
| } else { | ||
| VMSDK_LOG_EVERY_N_SEC(WARNING, ctx, 1) | ||
| << INCONSISTENT_STATE_ERROR_LOG_PREFIX << target.address; | ||
| << INCONSISTENT_STATE_ERROR_LOG_PREFIX | ||
| << absl::StrCat(target.socket_address.primary_endpoint, ":", | ||
| coordinator::GetCoordinatorPort( | ||
| target.socket_address.port)); | ||
| } | ||
| } | ||
| } | ||
|
|
@@ -286,10 +304,10 @@ class FanoutOperationBase { | |
| unsigned outstanding_{0}; | ||
| absl::Mutex mutex_; | ||
| std::unique_ptr<vmsdk::BlockedClient> blocked_client_; | ||
| std::vector<FanoutSearchTarget> index_name_error_nodes; | ||
| std::vector<FanoutSearchTarget> inconsistent_state_error_nodes; | ||
| std::vector<FanoutSearchTarget> communication_error_nodes; | ||
| std::vector<FanoutSearchTarget> targets_; | ||
| std::vector<vmsdk::cluster_map::NodeInfo> index_name_error_nodes; | ||
| std::vector<vmsdk::cluster_map::NodeInfo> inconsistent_state_error_nodes; | ||
| std::vector<vmsdk::cluster_map::NodeInfo> communication_error_nodes; | ||
| std::vector<vmsdk::cluster_map::NodeInfo> targets_; | ||
| std::chrono::steady_clock::time_point deadline_tp_; | ||
| bool timeout_occurred_ = false; | ||
| }; | ||
|
|
||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
client->GetAddress() ?? If it doesn't have this ability, let's add it.