Skip to content

Commit 9adf20d

Browse files
committed
add caching, cleanup bs
1 parent c6edf49 commit 9adf20d

File tree

8 files changed

+191
-44
lines changed

8 files changed

+191
-44
lines changed

docs/en/operations/external-authenticators/tokens.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ To define an access token processor, add `access_token_processors` section to `c
2424
<gogoogle>
2525
<provider>Google</provider>
2626
<email_filter>^[A-Za-z0-9._%+-]+@example\.com$</email_filter>
27+
<cache_lifetime>600</cache_lifetime>
2728
</gogoogle>
2829
<azuure>
2930
<provider>azure</provider>
@@ -41,10 +42,16 @@ Different providers have different sets of parameters.
4142
**Parameters**
4243

4344
- `provider` -- name of identity provider. Mandatory, case-insensitive. Supported options: "Google", "Azure".
45+
- `cache_lifetime` -- maximum lifetime of cached token (in seconds). Optional, default: 3600.
4446
- `email_filter` -- Regex for validation of user emails. Optional parameter, only for Google IdP.
4547
- `client_id` -- Azure AD (Entra ID) client ID. Optional parameter, only for Azure IdP.
4648
- `tenant_id` -- Azure AD (Entra ID) tenant ID. Optional parameter, only for Azure IdP.
4749

50+
### Tokens cache
51+
To reduce number of requests to IdP, tokens are cached internally for no longer then `cache_lifetime` seconds.
52+
If token expires sooner than `cache_lifetime`, then cache entry for this token will only be valid while token is valid.
53+
If token lifetime is longer than `cache_lifetime`, cache entry for this token will be valid for `cache_lifetime`.
54+
4855
## IdP as External Authenticator {#idp-external-authenticator}
4956

5057
Locally defined users can be authenticated with an access token. To allow this, `jwt` must be specified as user's authentication method. Example:

src/Access/AccessTokenProcessor.cpp

Lines changed: 107 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ namespace DB
99

1010
namespace
1111
{
12-
/// The JSON reply from provider has only a few key-value pairs, so no need for SimdJSON/RapidJSON.
12+
/// The JSON reply from provider has only a few key-value pairs, so no need for any advanced parsing.
1313
/// Reduce complexity by using picojson.
1414
picojson::object parseJSON(const String & json_string) {
1515
picojson::value jsonValue;
@@ -26,18 +26,20 @@ namespace
2626
return jsonValue.get<picojson::object>();
2727
}
2828

29-
std::string getValueByKey(const picojson::object & jsonObject, const std::string & key) {
29+
template<typename ValueType = std::string>
30+
ValueType getValueByKey(const picojson::object & jsonObject, const std::string & key) {
3031
auto it = jsonObject.find(key); // Find the key in the object
31-
if (it == jsonObject.end()) {
32+
if (it == jsonObject.end())
33+
{
3234
throw std::runtime_error("Key not found: " + key);
3335
}
3436

35-
const picojson::value &value = it->second;
36-
if (!value.is<std::string>()) {
37-
throw std::runtime_error("Value for key '" + key + "' is not a string");
37+
const picojson::value & value = it->second;
38+
if (!value.is<ValueType>()) {
39+
throw std::runtime_error("Value for key '" + key + "' has incorrect type.");
3840
}
3941

40-
return value.get<std::string>();
42+
return value.get<ValueType>();
4143
}
4244

4345
picojson::object getObjectFromURI(const Poco::URI & uri, const String & token = "")
@@ -96,9 +98,12 @@ std::unique_ptr<IAccessTokenProcessor> IAccessTokenProcessor::parseTokenProcesso
9698
String email_regex_str = config.hasProperty(prefix + ".email_filter") ? config.getString(
9799
prefix + ".email_filter") : "";
98100

101+
UInt64 cache_lifetime = config.hasProperty(prefix + ".cache_lifetime") ? config.getUInt64(
102+
prefix + ".cache_lifetime") : 3600;
103+
99104
if (provider == "google")
100105
{
101-
return std::make_unique<GoogleAccessTokenProcessor>(name, email_regex_str);
106+
return std::make_unique<GoogleAccessTokenProcessor>(name, cache_lifetime, email_regex_str);
102107
}
103108
else if (provider == "azure")
104109
{
@@ -110,11 +115,9 @@ std::unique_ptr<IAccessTokenProcessor> IAccessTokenProcessor::parseTokenProcesso
110115
throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER,
111116
"Could not parse access token processor {}: tenant_id must be specified", name);
112117

113-
String client_id_str = config.getString(prefix + ".client_id");
114118
String tenant_id_str = config.getString(prefix + ".tenant_id");
115-
String client_secret_str = config.hasProperty(prefix + ".client_secret") ? config.getString(prefix + ".client_secret") : "";
116119

117-
return std::make_unique<AzureAccessTokenProcessor>(name, email_regex_str, client_id_str, tenant_id_str, client_secret_str);
120+
return std::make_unique<AzureAccessTokenProcessor>(name, cache_lifetime, email_regex_str, tenant_id_str);
118121
}
119122
else
120123
throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER,
@@ -132,10 +135,11 @@ bool GoogleAccessTokenProcessor::resolveAndValidate(const TokenCredentials & cre
132135

133136
auto user_info = getUserInfo(token);
134137
String user_name = user_info["sub"];
138+
bool has_email = user_info.contains("email");
135139

136140
if (email_regex.ok())
137141
{
138-
if (!user_info.contains("email"))
142+
if (!has_email)
139143
{
140144
LOG_TRACE(getLogger("AccessTokenProcessor"), "{}: Failed to validate {} by e-mail", name, user_name);
141145
return false;
@@ -149,10 +153,54 @@ bool GoogleAccessTokenProcessor::resolveAndValidate(const TokenCredentials & cre
149153
}
150154

151155
}
156+
152157
/// Credentials are passed as const everywhere up the flow, so we have to comply,
153158
/// in this case const_cast looks acceptable.
154159
const_cast<TokenCredentials &>(credentials).setUserName(user_name);
155-
const_cast<TokenCredentials &>(credentials).setGroups({});
160+
161+
auto token_info = getObjectFromURI(Poco::URI(token_info_uri), token);
162+
if (token_info.contains("exp"))
163+
const_cast<TokenCredentials &>(credentials).setExpiresAt(std::chrono::system_clock::from_time_t((getValueByKey<time_t>(token_info, "exp"))));
164+
165+
/// Groups info can only be retrieved if user email is known.
166+
/// If no email found in user info, we skip this step and there are no external groups for the user.
167+
if (has_email)
168+
{
169+
std::set<String> external_groups_names;
170+
const Poco::URI get_groups_uri = Poco::URI("https://cloudidentity.googleapis.com/v1/groups/-/memberships:searchDirectGroups?query=member_key_id==" + user_info["email"] + "'");
171+
172+
try
173+
{
174+
auto groups_response = getObjectFromURI(get_groups_uri, token);
175+
176+
if (!groups_response.contains("memberships")) {
177+
LOG_TRACE(getLogger("AccessTokenProcessor"),
178+
"{}: Failed to get Google groups: invalid content in response from server", name);
179+
return true;
180+
}
181+
182+
picojson::array groups_array = groups_response["memberships"].get<picojson::array>();
183+
184+
/// TODO: check for invalid JSON, LOG something meaningful
185+
for (const auto & group: groups_array)
186+
{
187+
auto group_data = group.get<picojson::object>();
188+
String group_name = getValueByKey(group_data["groupKey"].get<picojson::object>(), "id");
189+
external_groups_names.insert(group_name);
190+
LOG_TRACE(getLogger("AccessTokenProcessor"),
191+
"{}: User {}: new external group {}", name, user_name, group_name);
192+
}
193+
194+
const_cast<TokenCredentials &>(credentials).setGroups(external_groups_names);
195+
}
196+
catch (const Exception & e)
197+
{
198+
/// Could not get groups info. Log it and skip it.
199+
LOG_TRACE(getLogger("AccessTokenProcessor"),
200+
"{}: Failed to get Google groups, no external roles will be mapped. reason: {}", name, e.what());
201+
return true;
202+
}
203+
}
156204

157205
return true;
158206
}
@@ -177,8 +225,9 @@ std::unordered_map<String, String> GoogleAccessTokenProcessor::getUserInfo(const
177225

178226
bool AzureAccessTokenProcessor::resolveAndValidate(const TokenCredentials & credentials)
179227
{
180-
/// Token is a JWT in this case, but we cannot directly verify it against Azure AD JWKS. We will not trust any data in this token.
181-
/// e.g. see here: https://stackoverflow.com/questions/60778634/failing-signature-validation-of-jwt-tokens-from-azure-ad
228+
/// Token is a JWT in this case, but we cannot directly verify it against Azure AD JWKS.
229+
/// We will not trust user data in this token except for 'exp' value to determine caching duration.
230+
/// Explanation here: https://stackoverflow.com/questions/60778634/failing-signature-validation-of-jwt-tokens-from-azure-ad
182231
/// Let Azure validate it: only valid tokens will be accepted.
183232
/// Use GET https://graph.microsoft.com/oidc/userinfo to verify token and get sub at the same time
184233

@@ -202,8 +251,49 @@ bool AzureAccessTokenProcessor::resolveAndValidate(const TokenCredentials & cred
202251
return false;
203252
}
204253

205-
/// TODO: do not store it in credentials.
206-
const_cast<TokenCredentials &>(credentials).setGroups({});
254+
try
255+
{
256+
const_cast<TokenCredentials &>(credentials).setExpiresAt(jwt::decode(token).get_expires_at());
257+
}
258+
catch (...) {
259+
LOG_TRACE(getLogger("AccessTokenProcessor"),
260+
"{}: No expiration data found in a valid token, will use default cache lifetime", name);
261+
}
262+
263+
std::set<String> external_groups_names;
264+
const Poco::URI get_groups_uri = Poco::URI("https://graph.microsoft.com/v1.0/me/memberOf");
265+
266+
try
267+
{
268+
auto groups_response = getObjectFromURI(get_groups_uri, token);
269+
270+
if (!groups_response.contains("value")) {
271+
LOG_TRACE(getLogger("AccessTokenProcessor"),
272+
"{}: Failed to get Azure groups: invalid content in response from server", name);
273+
return true;
274+
}
275+
276+
picojson::array groups_array = groups_response["value"].get<picojson::array>();
277+
278+
/// TODO: check for invalid JSON
279+
for (const auto & group: groups_array)
280+
{
281+
auto group_data = group.get<picojson::object>();
282+
String group_name = getValueByKey(group_data, "id");
283+
external_groups_names.insert(group_name);
284+
LOG_TRACE(getLogger("AccessTokenProcessor"),
285+
"{}: User {}: new external group {}", name, credentials.getUserName(), group_name);
286+
}
287+
}
288+
catch (const Exception & e)
289+
{
290+
/// Could not get groups info. Log it and skip it.
291+
LOG_TRACE(getLogger("AccessTokenProcessor"),
292+
"{}: Failed to get Azure groups, no external roles will be mapped. reason: {}", name, e.what());
293+
return true;
294+
}
295+
296+
const_cast<TokenCredentials &>(credentials).setGroups(external_groups_names);
207297

208298
return true;
209299
}

src/Access/AccessTokenProcessor.h

Lines changed: 18 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,12 @@ class GoogleAccessTokenProcessor;
2626
class IAccessTokenProcessor
2727
{
2828
public:
29-
IAccessTokenProcessor(const String & name_, const String & email_regex_str) : name(name_), email_regex(email_regex_str)
29+
IAccessTokenProcessor(const String & name_,
30+
const UInt64 cache_invalidation_interval_,
31+
const String & email_regex_str)
32+
: name(name_),
33+
cache_invalidation_interval(cache_invalidation_interval_),
34+
email_regex(email_regex_str)
3035
{
3136
if (!email_regex_str.empty())
3237
{
@@ -36,19 +41,12 @@ class IAccessTokenProcessor
3641
}
3742
}
3843

39-
String getName()
40-
{
41-
return name;
42-
}
43-
4444
virtual ~IAccessTokenProcessor() = default;
4545

46-
virtual bool resolveAndValidate(const TokenCredentials & credentials) = 0;
46+
String getName() { return name; }
47+
UInt64 getCacheInvalidationInterval() { return cache_invalidation_interval; }
4748

48-
virtual std::set<String> getGroups([[maybe_unused]] const TokenCredentials & credentials)
49-
{
50-
return {};
51-
}
49+
virtual bool resolveAndValidate(const TokenCredentials & credentials) = 0;
5250

5351
static std::unique_ptr<DB::IAccessTokenProcessor> parseTokenProcessor(
5452
const Poco::Util::AbstractConfiguration & config,
@@ -57,14 +55,18 @@ class IAccessTokenProcessor
5755

5856
protected:
5957
const String name;
58+
const UInt64 cache_invalidation_interval;
6059
re2::RE2 email_regex;
6160
};
6261

6362

6463
class GoogleAccessTokenProcessor : public IAccessTokenProcessor
6564
{
6665
public:
67-
GoogleAccessTokenProcessor(const String & name_, const String & email_regex_str) : IAccessTokenProcessor(name_, email_regex_str) {}
66+
GoogleAccessTokenProcessor(const String & name_,
67+
const UInt64 cache_invalidation_interval_,
68+
const String & email_regex_str)
69+
: IAccessTokenProcessor(name_, cache_invalidation_interval_, email_regex_str) {}
6870

6971
bool resolveAndValidate(const TokenCredentials & credentials) override;
7072

@@ -80,24 +82,16 @@ class AzureAccessTokenProcessor : public IAccessTokenProcessor
8082
{
8183
public:
8284
AzureAccessTokenProcessor(const String & name_,
85+
const UInt64 cache_invalidation_interval_,
8386
const String & email_regex_str,
84-
const String & client_id_,
85-
const String & tenant_id_,
86-
const String & client_secret_)
87-
: IAccessTokenProcessor(name_, email_regex_str),
88-
client_id(client_id_),
89-
tenant_id(tenant_id_),
90-
client_secret(client_secret_),
91-
jwks_uri_str("https://login.microsoftonline.com/" + tenant_id + "/discovery/v2.0/keys") {}
87+
const String & tenant_id_)
88+
: IAccessTokenProcessor(name_, cache_invalidation_interval_, email_regex_str),
89+
jwks_uri_str("https://login.microsoftonline.com/" + tenant_id_ + "/discovery/v2.0/keys") {}
9290

9391
bool resolveAndValidate(const TokenCredentials & credentials) override;
9492
private:
9593
static const Poco::URI user_info_uri;
9694

97-
const String client_id;
98-
const String tenant_id;
99-
const String client_secret;
100-
10195
const String jwks_uri_str;
10296

10397
String validateTokenAndGetUsername(const String & token) const;

src/Access/Credentials.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,6 @@ const String & BasicCredentials::getPassword() const
101101
}
102102

103103
/// Unless the token is validated, we will not use any data from it, including username.
104-
TokenCredentials::TokenCredentials(const String & token_) : Credentials(""), token(token_) {}
104+
TokenCredentials::TokenCredentials(const String & token_) : Credentials(""), token(token_), expires_at(std::chrono::system_clock::now() + std::chrono::hours(1)) {}
105105

106106
}

src/Access/Credentials.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -154,9 +154,18 @@ class TokenCredentials : public Credentials
154154
{
155155
groups = groups_;
156156
}
157+
std::optional<std::chrono::system_clock::time_point> getExpiresAt() const
158+
{
159+
return expires_at;
160+
}
161+
void setExpiresAt(std::chrono::system_clock::time_point expires_at_)
162+
{
163+
expires_at = expires_at_;
164+
}
157165
private:
158166
String token;
159167
std::set<String> groups;
168+
std::optional<std::chrono::system_clock::time_point> expires_at;
160169
};
161170

162171
}

src/Access/ExternalAuthenticators.cpp

Lines changed: 38 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -621,7 +621,6 @@ HTTPAuthClientParams ExternalAuthenticators::getHTTPAuthenticationParams(const S
621621
return it->second;
622622
}
623623

624-
/// TODO: remove redundancy
625624
bool ExternalAuthenticators::resolveJWTCredentials(const TokenCredentials & credentials, bool throw_not_configured = true) const
626625
{
627626
std::lock_guard lock{mutex};
@@ -679,10 +678,48 @@ bool ExternalAuthenticators::checkAccessTokenCredentials(const TokenCredentials
679678
if (access_token_processors.empty())
680679
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Access token authentication is not configured");
681680

681+
/// lookup token in local cache if not expired.
682+
auto cached_entry_iter = access_token_cache.find(credentials.getToken());
683+
if (cached_entry_iter != access_token_cache.end())
684+
{
685+
if (cached_entry_iter->second.expires_at <= std::chrono::system_clock::now())
686+
{
687+
LOG_TRACE(getLogger("AccessTokenAuthentication"), "Cache entry for user {} expired, removing", cached_entry_iter->second.user_name);
688+
access_token_cache.erase(cached_entry_iter);
689+
}
690+
else
691+
{
692+
const auto & user_data = cached_entry_iter->second;
693+
const_cast<TokenCredentials &>(credentials).setUserName(user_data.user_name);
694+
const_cast<TokenCredentials &>(credentials).setGroups(user_data.external_roles);
695+
LOG_TRACE(getLogger("AccessTokenAuthentication"), "Cache entry for user {} found, using it to authenticate", cached_entry_iter->second.user_name);
696+
return true;
697+
}
698+
}
699+
682700
for (const auto & it : access_token_processors)
683701
{
684702
if (it.second->resolveAndValidate(credentials))
685703
{
704+
AccessTokenCacheEntry cache_entry;
705+
cache_entry.user_name = credentials.getUserName();
706+
cache_entry.external_roles = credentials.getGroups();
707+
708+
auto default_expiration_ts = std::chrono::system_clock::now()
709+
+ std::chrono::minutes(it.second->getCacheInvalidationInterval());
710+
711+
if (credentials.getExpiresAt().has_value())
712+
{
713+
if (credentials.getExpiresAt().value() < default_expiration_ts)
714+
cache_entry.expires_at = credentials.getExpiresAt().value();
715+
}
716+
else
717+
{
718+
cache_entry.expires_at = default_expiration_ts;
719+
}
720+
LOG_TRACE(getLogger("AccessTokenAuthentication"), "Cache entry for user {} added", cached_entry_iter->second.user_name);
721+
722+
access_token_cache[credentials.getToken()] = cache_entry;
686723
LOG_DEBUG(getLogger("AccessTokenAuthentication"), "Authenticated user {} with access token by {}", credentials.getUserName(), it.first);
687724
return true;
688725
}

0 commit comments

Comments
 (0)