Skip to content

Commit 46dc78f

Browse files
committed
add caching, cleanup bs
fix credentials cast + some better code
1 parent f293613 commit 46dc78f

9 files changed

+205
-46
lines changed

docs/en/operations/external-authenticators/tokens.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ To define an access token processor, add `access_token_processors` section to `c
2424
<gogoogle>
2525
<provider>Google</provider>
2626
<email_filter>^[A-Za-z0-9._%+-]+@example\.com$</email_filter>
27+
<cache_lifetime>600</cache_lifetime>
2728
</gogoogle>
2829
<azuure>
2930
<provider>azure</provider>
@@ -41,10 +42,16 @@ Different providers have different sets of parameters.
4142
**Parameters**
4243

4344
- `provider` -- name of identity provider. Mandatory, case-insensitive. Supported options: "Google", "Azure".
45+
- `cache_lifetime` -- maximum lifetime of cached token (in seconds). Optional, default: 3600.
4446
- `email_filter` -- Regex for validation of user emails. Optional parameter, only for Google IdP.
4547
- `client_id` -- Azure AD (Entra ID) client ID. Optional parameter, only for Azure IdP.
4648
- `tenant_id` -- Azure AD (Entra ID) tenant ID. Optional parameter, only for Azure IdP.
4749

50+
### Tokens cache
51+
To reduce number of requests to IdP, tokens are cached internally for no longer then `cache_lifetime` seconds.
52+
If token expires sooner than `cache_lifetime`, then cache entry for this token will only be valid while token is valid.
53+
If token lifetime is longer than `cache_lifetime`, cache entry for this token will be valid for `cache_lifetime`.
54+
4855
## IdP as External Authenticator {#idp-external-authenticator}
4956

5057
Locally defined users can be authenticated with an access token. To allow this, `jwt` must be specified as user's authentication method. Example:

src/Access/AccessTokenProcessor.cpp

Lines changed: 119 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ namespace DB
99

1010
namespace
1111
{
12-
/// The JSON reply from provider has only a few key-value pairs, so no need for SimdJSON/RapidJSON.
12+
/// The JSON reply from provider has only a few key-value pairs, so no need for any advanced parsing.
1313
/// Reduce complexity by using picojson.
1414
picojson::object parseJSON(const String & json_string) {
1515
picojson::value jsonValue;
@@ -26,18 +26,20 @@ namespace
2626
return jsonValue.get<picojson::object>();
2727
}
2828

29-
std::string getValueByKey(const picojson::object & jsonObject, const std::string & key) {
29+
template<typename ValueType = std::string>
30+
ValueType getValueByKey(const picojson::object & jsonObject, const std::string & key) {
3031
auto it = jsonObject.find(key); // Find the key in the object
31-
if (it == jsonObject.end()) {
32+
if (it == jsonObject.end())
33+
{
3234
throw std::runtime_error("Key not found: " + key);
3335
}
3436

35-
const picojson::value &value = it->second;
36-
if (!value.is<std::string>()) {
37-
throw std::runtime_error("Value for key '" + key + "' is not a string");
37+
const picojson::value & value = it->second;
38+
if (!value.is<ValueType>()) {
39+
throw std::runtime_error("Value for key '" + key + "' has incorrect type.");
3840
}
3941

40-
return value.get<std::string>();
42+
return value.get<ValueType>();
4143
}
4244

4345
picojson::object getObjectFromURI(const Poco::URI & uri, const String & token = "")
@@ -96,9 +98,12 @@ std::unique_ptr<IAccessTokenProcessor> IAccessTokenProcessor::parseTokenProcesso
9698
String email_regex_str = config.hasProperty(prefix + ".email_filter") ? config.getString(
9799
prefix + ".email_filter") : "";
98100

101+
UInt64 cache_lifetime = config.hasProperty(prefix + ".cache_lifetime") ? config.getUInt64(
102+
prefix + ".cache_lifetime") : 3600;
103+
99104
if (provider == "google")
100105
{
101-
return std::make_unique<GoogleAccessTokenProcessor>(name, email_regex_str);
106+
return std::make_unique<GoogleAccessTokenProcessor>(name, cache_lifetime, email_regex_str);
102107
}
103108
else if (provider == "azure")
104109
{
@@ -110,11 +115,9 @@ std::unique_ptr<IAccessTokenProcessor> IAccessTokenProcessor::parseTokenProcesso
110115
throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER,
111116
"Could not parse access token processor {}: tenant_id must be specified", name);
112117

113-
String client_id_str = config.getString(prefix + ".client_id");
114118
String tenant_id_str = config.getString(prefix + ".tenant_id");
115-
String client_secret_str = config.hasProperty(prefix + ".client_secret") ? config.getString(prefix + ".client_secret") : "";
116119

117-
return std::make_unique<AzureAccessTokenProcessor>(name, email_regex_str, client_id_str, tenant_id_str, client_secret_str);
120+
return std::make_unique<AzureAccessTokenProcessor>(name, cache_lifetime, email_regex_str, tenant_id_str);
118121
}
119122
else
120123
throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER,
@@ -132,10 +135,11 @@ bool GoogleAccessTokenProcessor::resolveAndValidate(const TokenCredentials & cre
132135

133136
auto user_info = getUserInfo(token);
134137
String user_name = user_info["sub"];
138+
bool has_email = user_info.contains("email");
135139

136140
if (email_regex.ok())
137141
{
138-
if (!user_info.contains("email"))
142+
if (!has_email)
139143
{
140144
LOG_TRACE(getLogger("AccessTokenProcessor"), "{}: Failed to validate {} by e-mail", name, user_name);
141145
return false;
@@ -149,10 +153,59 @@ bool GoogleAccessTokenProcessor::resolveAndValidate(const TokenCredentials & cre
149153
}
150154

151155
}
156+
152157
/// Credentials are passed as const everywhere up the flow, so we have to comply,
153158
/// in this case const_cast looks acceptable.
154159
const_cast<TokenCredentials &>(credentials).setUserName(user_name);
155-
const_cast<TokenCredentials &>(credentials).setGroups({});
160+
161+
auto token_info = getObjectFromURI(Poco::URI(token_info_uri), token);
162+
if (token_info.contains("exp"))
163+
const_cast<TokenCredentials &>(credentials).setExpiresAt(std::chrono::system_clock::from_time_t((getValueByKey<time_t>(token_info, "exp"))));
164+
165+
/// Groups info can only be retrieved if user email is known.
166+
/// If no email found in user info, we skip this step and there are no external groups for the user.
167+
if (has_email)
168+
{
169+
std::set<String> external_groups_names;
170+
const Poco::URI get_groups_uri = Poco::URI("https://cloudidentity.googleapis.com/v1/groups/-/memberships:searchDirectGroups?query=member_key_id==" + user_info["email"] + "'");
171+
172+
try
173+
{
174+
auto groups_response = getObjectFromURI(get_groups_uri, token);
175+
176+
if (!groups_response.contains("memberships") || !groups_response["memberships"].is<picojson::array>())
177+
{
178+
LOG_TRACE(getLogger("AccessTokenProcessor"),
179+
"{}: Failed to get Google groups: invalid content in response from server", name);
180+
return true;
181+
}
182+
183+
for (const auto & group: groups_response["memberships"].get<picojson::array>())
184+
{
185+
if (!group.is<picojson::object>())
186+
{
187+
LOG_TRACE(getLogger("AccessTokenProcessor"),
188+
"{}: Failed to get Google groups: invalid content in response from server", name);
189+
continue;
190+
}
191+
192+
auto group_data = group.get<picojson::object>();
193+
String group_name = getValueByKey(group_data["groupKey"].get<picojson::object>(), "id");
194+
external_groups_names.insert(group_name);
195+
LOG_TRACE(getLogger("AccessTokenProcessor"),
196+
"{}: User {}: new external group {}", name, user_name, group_name);
197+
}
198+
199+
const_cast<TokenCredentials &>(credentials).setGroups(external_groups_names);
200+
}
201+
catch (const Exception & e)
202+
{
203+
/// Could not get groups info. Log it and skip it.
204+
LOG_TRACE(getLogger("AccessTokenProcessor"),
205+
"{}: Failed to get Google groups, no external roles will be mapped. reason: {}", name, e.what());
206+
return true;
207+
}
208+
}
156209

157210
return true;
158211
}
@@ -177,8 +230,9 @@ std::unordered_map<String, String> GoogleAccessTokenProcessor::getUserInfo(const
177230

178231
bool AzureAccessTokenProcessor::resolveAndValidate(const TokenCredentials & credentials)
179232
{
180-
/// Token is a JWT in this case, but we cannot directly verify it against Azure AD JWKS. We will not trust any data in this token.
181-
/// e.g. see here: https://stackoverflow.com/questions/60778634/failing-signature-validation-of-jwt-tokens-from-azure-ad
233+
/// Token is a JWT in this case, but we cannot directly verify it against Azure AD JWKS.
234+
/// We will not trust user data in this token except for 'exp' value to determine caching duration.
235+
/// Explanation here: https://stackoverflow.com/questions/60778634/failing-signature-validation-of-jwt-tokens-from-azure-ad
182236
/// Let Azure validate it: only valid tokens will be accepted.
183237
/// Use GET https://graph.microsoft.com/oidc/userinfo to verify token and get sub at the same time
184238

@@ -202,8 +256,56 @@ bool AzureAccessTokenProcessor::resolveAndValidate(const TokenCredentials & cred
202256
return false;
203257
}
204258

205-
/// TODO: do not store it in credentials.
206-
const_cast<TokenCredentials &>(credentials).setGroups({});
259+
try
260+
{
261+
const_cast<TokenCredentials &>(credentials).setExpiresAt(jwt::decode(token).get_expires_at());
262+
}
263+
catch (...) {
264+
LOG_TRACE(getLogger("AccessTokenProcessor"),
265+
"{}: No expiration data found in a valid token, will use default cache lifetime", name);
266+
}
267+
268+
std::set<String> external_groups_names;
269+
const Poco::URI get_groups_uri = Poco::URI("https://graph.microsoft.com/v1.0/me/memberOf");
270+
271+
try
272+
{
273+
auto groups_response = getObjectFromURI(get_groups_uri, token);
274+
275+
if (!groups_response.contains("value") || !groups_response["value"].is<picojson::array>())
276+
{
277+
LOG_TRACE(getLogger("AccessTokenProcessor"),
278+
"{}: Failed to get Azure groups: invalid content in response from server", name);
279+
return true;
280+
}
281+
282+
picojson::array groups_array = groups_response["value"].get<picojson::array>();
283+
284+
for (const auto & group: groups_array)
285+
{
286+
/// Got some invalid response. Ignore this, log this.
287+
if (!group.is<picojson::object >())
288+
{
289+
LOG_TRACE(getLogger("AccessTokenProcessor"),
290+
"{}: Failed to get Azure groups: invalid content in response from server", name);
291+
continue;
292+
}
293+
294+
auto group_data = group.get<picojson::object>();
295+
String group_name = getValueByKey(group_data, "id");
296+
external_groups_names.insert(group_name);
297+
LOG_TRACE(getLogger("AccessTokenProcessor"), "{}: User {}: new external group {}", name, credentials.getUserName(), group_name);
298+
}
299+
}
300+
catch (const Exception & e)
301+
{
302+
/// Could not get groups info. Log it and skip it.
303+
LOG_TRACE(getLogger("AccessTokenProcessor"),
304+
"{}: Failed to get Azure groups, no external roles will be mapped. reason: {}", name, e.what());
305+
return true;
306+
}
307+
308+
const_cast<TokenCredentials &>(credentials).setGroups(external_groups_names);
207309

208310
return true;
209311
}

src/Access/AccessTokenProcessor.h

Lines changed: 18 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,12 @@ class GoogleAccessTokenProcessor;
2626
class IAccessTokenProcessor
2727
{
2828
public:
29-
IAccessTokenProcessor(const String & name_, const String & email_regex_str) : name(name_), email_regex(email_regex_str)
29+
IAccessTokenProcessor(const String & name_,
30+
const UInt64 cache_invalidation_interval_,
31+
const String & email_regex_str)
32+
: name(name_),
33+
cache_invalidation_interval(cache_invalidation_interval_),
34+
email_regex(email_regex_str)
3035
{
3136
if (!email_regex_str.empty())
3237
{
@@ -36,19 +41,12 @@ class IAccessTokenProcessor
3641
}
3742
}
3843

39-
String getName()
40-
{
41-
return name;
42-
}
43-
4444
virtual ~IAccessTokenProcessor() = default;
4545

46-
virtual bool resolveAndValidate(const TokenCredentials & credentials) = 0;
46+
String getName() { return name; }
47+
UInt64 getCacheInvalidationInterval() { return cache_invalidation_interval; }
4748

48-
virtual std::set<String> getGroups([[maybe_unused]] const TokenCredentials & credentials)
49-
{
50-
return {};
51-
}
49+
virtual bool resolveAndValidate(const TokenCredentials & credentials) = 0;
5250

5351
static std::unique_ptr<DB::IAccessTokenProcessor> parseTokenProcessor(
5452
const Poco::Util::AbstractConfiguration & config,
@@ -57,14 +55,18 @@ class IAccessTokenProcessor
5755

5856
protected:
5957
const String name;
58+
const UInt64 cache_invalidation_interval;
6059
re2::RE2 email_regex;
6160
};
6261

6362

6463
class GoogleAccessTokenProcessor : public IAccessTokenProcessor
6564
{
6665
public:
67-
GoogleAccessTokenProcessor(const String & name_, const String & email_regex_str) : IAccessTokenProcessor(name_, email_regex_str) {}
66+
GoogleAccessTokenProcessor(const String & name_,
67+
const UInt64 cache_invalidation_interval_,
68+
const String & email_regex_str)
69+
: IAccessTokenProcessor(name_, cache_invalidation_interval_, email_regex_str) {}
6870

6971
bool resolveAndValidate(const TokenCredentials & credentials) override;
7072

@@ -80,24 +82,16 @@ class AzureAccessTokenProcessor : public IAccessTokenProcessor
8082
{
8183
public:
8284
AzureAccessTokenProcessor(const String & name_,
85+
const UInt64 cache_invalidation_interval_,
8386
const String & email_regex_str,
84-
const String & client_id_,
85-
const String & tenant_id_,
86-
const String & client_secret_)
87-
: IAccessTokenProcessor(name_, email_regex_str),
88-
client_id(client_id_),
89-
tenant_id(tenant_id_),
90-
client_secret(client_secret_),
91-
jwks_uri_str("https://login.microsoftonline.com/" + tenant_id + "/discovery/v2.0/keys") {}
87+
const String & tenant_id_)
88+
: IAccessTokenProcessor(name_, cache_invalidation_interval_, email_regex_str),
89+
jwks_uri_str("https://login.microsoftonline.com/" + tenant_id_ + "/discovery/v2.0/keys") {}
9290

9391
bool resolveAndValidate(const TokenCredentials & credentials) override;
9492
private:
9593
static const Poco::URI user_info_uri;
9694

97-
const String client_id;
98-
const String tenant_id;
99-
const String client_secret;
100-
10195
const String jwks_uri_str;
10296

10397
String validateTokenAndGetUsername(const String & token) const;

src/Access/Credentials.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,6 @@ const String & BasicCredentials::getPassword() const
101101
}
102102

103103
/// Unless the token is validated, we will not use any data from it, including username.
104-
TokenCredentials::TokenCredentials(const String & token_) : Credentials(""), token(token_) {}
104+
TokenCredentials::TokenCredentials(const String & token_) : Credentials(""), token(token_), expires_at(std::chrono::system_clock::now() + std::chrono::hours(1)) {}
105105

106106
}

src/Access/Credentials.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -180,9 +180,18 @@ class TokenCredentials : public Credentials
180180
{
181181
groups = groups_;
182182
}
183+
std::optional<std::chrono::system_clock::time_point> getExpiresAt() const
184+
{
185+
return expires_at;
186+
}
187+
void setExpiresAt(std::chrono::system_clock::time_point expires_at_)
188+
{
189+
expires_at = expires_at_;
190+
}
183191
private:
184192
String token;
185193
std::set<String> groups;
194+
std::optional<std::chrono::system_clock::time_point> expires_at;
186195
};
187196

188197
}

0 commit comments

Comments
 (0)