-
Notifications
You must be signed in to change notification settings - Fork 244
Persistence implementations for list pagination #1555
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
3375ff5
255e44b
015e637
434ffb1
8ffc29d
66d20aa
a5d62ed
91e61eb
cfdf11e
903b947
ccc612e
f566fd1
fea135d
99e2a37
2908207
ae4027a
6d2047e
62914f9
c6611be
5def8fa
aa90da2
95f8410
80aee49
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -34,6 +34,7 @@ | |
import java.util.function.Predicate; | ||
import java.util.stream.Collectors; | ||
import org.apache.polaris.core.PolarisCallContext; | ||
import org.apache.polaris.core.PolarisDiagnostics; | ||
import org.apache.polaris.core.entity.EntityNameLookupRecord; | ||
import org.apache.polaris.core.entity.PolarisBaseEntity; | ||
import org.apache.polaris.core.entity.PolarisChangeTrackingVersions; | ||
|
@@ -49,8 +50,10 @@ | |
import org.apache.polaris.core.persistence.PolicyMappingAlreadyExistsException; | ||
import org.apache.polaris.core.persistence.PrincipalSecretsGenerator; | ||
import org.apache.polaris.core.persistence.RetryOnConcurrencyException; | ||
import org.apache.polaris.core.persistence.pagination.EntityIdPageToken; | ||
import org.apache.polaris.core.persistence.pagination.HasPageSize; | ||
import org.apache.polaris.core.persistence.pagination.Page; | ||
import org.apache.polaris.core.persistence.pagination.PageRequest; | ||
import org.apache.polaris.core.persistence.pagination.PageToken; | ||
import org.apache.polaris.core.policy.PolarisPolicyMappingRecord; | ||
import org.apache.polaris.core.policy.PolicyType; | ||
|
@@ -72,16 +75,19 @@ public class JdbcBasePersistenceImpl implements BasePersistence, IntegrationPers | |
private final PrincipalSecretsGenerator secretsGenerator; | ||
private final PolarisStorageIntegrationProvider storageIntegrationProvider; | ||
private final String realmId; | ||
private final PolarisDiagnostics polarisDiagnostics; | ||
|
||
public JdbcBasePersistenceImpl( | ||
DatasourceOperations databaseOperations, | ||
PrincipalSecretsGenerator secretsGenerator, | ||
PolarisStorageIntegrationProvider storageIntegrationProvider, | ||
String realmId) { | ||
String realmId, | ||
PolarisDiagnostics polarisDiagnostics) { | ||
this.datasourceOperations = databaseOperations; | ||
this.secretsGenerator = secretsGenerator; | ||
this.storageIntegrationProvider = storageIntegrationProvider; | ||
this.realmId = realmId; | ||
this.polarisDiagnostics = polarisDiagnostics; | ||
} | ||
|
||
@Override | ||
|
@@ -359,15 +365,15 @@ public Page<EntityNameLookupRecord> listEntities( | |
long catalogId, | ||
long parentId, | ||
@Nonnull PolarisEntityType entityType, | ||
@Nonnull PageToken pageToken) { | ||
@Nonnull PageRequest pageRequest) { | ||
return listEntities( | ||
callCtx, | ||
catalogId, | ||
parentId, | ||
entityType, | ||
entity -> true, | ||
EntityNameLookupRecord::new, | ||
pageToken); | ||
pageRequest); | ||
} | ||
|
||
@Nonnull | ||
|
@@ -378,15 +384,15 @@ public Page<EntityNameLookupRecord> listEntities( | |
long parentId, | ||
@Nonnull PolarisEntityType entityType, | ||
@Nonnull Predicate<PolarisBaseEntity> entityFilter, | ||
@Nonnull PageToken pageToken) { | ||
@Nonnull PageRequest pageRequest) { | ||
return listEntities( | ||
callCtx, | ||
catalogId, | ||
parentId, | ||
entityType, | ||
entityFilter, | ||
EntityNameLookupRecord::new, | ||
pageToken); | ||
pageRequest); | ||
} | ||
|
||
@Nonnull | ||
|
@@ -398,7 +404,7 @@ public <T> Page<T> listEntities( | |
PolarisEntityType entityType, | ||
@Nonnull Predicate<PolarisBaseEntity> entityFilter, | ||
@Nonnull Function<PolarisBaseEntity, T> transformer, | ||
@Nonnull PageToken pageToken) { | ||
@Nonnull PageRequest pageRequest) { | ||
Map<String, Object> params = | ||
Map.of( | ||
"catalog_id", | ||
|
@@ -413,6 +419,11 @@ public <T> Page<T> listEntities( | |
// Limit can't be pushed down, due to client side filtering | ||
// absence of transaction. | ||
String query = QueryGenerator.generateSelectQuery(new ModelEntity(), params); | ||
PageToken pageToken = buildPageToken(pageRequest); | ||
if (pageToken instanceof EntityIdPageToken entityIdPageToken) { | ||
query += String.format(" AND id > %d ORDER BY id ASC", entityIdPageToken.getId()); | ||
eric-maynard marked this conversation as resolved.
Show resolved
Hide resolved
|
||
} | ||
|
||
try { | ||
List<PolarisBaseEntity> results = new ArrayList<>(); | ||
datasourceOperations.executeSelectOverStream( | ||
|
@@ -425,11 +436,8 @@ public <T> Page<T> listEntities( | |
} | ||
data.forEach(results::add); | ||
}); | ||
List<T> resultsOrEmpty = | ||
results == null | ||
? Collections.emptyList() | ||
: results.stream().filter(entityFilter).map(transformer).collect(Collectors.toList()); | ||
return Page.fromItems(resultsOrEmpty); | ||
List<T> resultsOrEmpty = results.stream().map(transformer).collect(Collectors.toList()); | ||
return pageToken.buildNextPage(resultsOrEmpty); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It looks like Here, the call would look like: Note: the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Side note: if we want to avoid empty last pages (when the list ends exactly on the last element from the query) we may need to request one more entry from the database, but not return it to the caller. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
In this particular implementation, no. In other implementations, such as the previous Empty last pages are fine. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
That aside, what information would need to flow from the previous page token to the next directly? I suppose all of that is inside There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
In the case of In the case of
In the sense that |
||
} catch (SQLException e) { | ||
throw new RuntimeException( | ||
String.format("Failed to retrieve polaris entities due to %s", e.getMessage()), e); | ||
|
@@ -915,4 +923,8 @@ PolarisStorageIntegration<T> loadPolarisStorageIntegration( | |
private interface QueryAction { | ||
Integer apply(String query) throws SQLException; | ||
} | ||
|
||
private PageToken buildPageToken(PageRequest pageRequest) { | ||
return EntityIdPageToken.fromPageRequest(pageRequest); | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
How would this work with
org.apache.polaris.extension.persistence.relational.jdbc.IdGenerator
?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think you're right that it won't; this logic is copied from EclipseLink where IDs are always increasing but does not work with the current way that the JDBC metastore creates IDs.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'd propose to change the Page/PageToken contract in a way to push the parameter "as is" down to the persistence layer and let the persistence implementation deal with it.
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I spoke with @singhpk234 who noted this is probably the same discussion as here on the old PR. With that context, I think we might be OK here.
IMO it's alright that the list ordering you'd get across metastores won't be the same. Other than that difference, seems like everything should work with JDBC's
IdGenerator
. Although the IDs aren't generated sequentially, pagination only uses the entity ID as an essentially arbitrary consistent ordering.The key implication here is that if an entity gets created in the middle of a listing operation (e.g. between list calls 2 and 3) it may or may not show up in the next page. An alternative would be to try to filter it out so that the behavior is more obvious & consistent, but I think the simple approach that ultimately gives the user a chance to see these new entities is good.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Losing new entities that are stored after pagination start is fine from my POV. The JDBC persistence does not implement catalog-level versioning, so this is unavoidable, I guess.
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Agreed that we will naturally lose some entities, the question is whether we are OK with entities stored after pagination start being lost nondeterministically rather than always. Right now, whether the new entity is lost or not depends on what entity ID it gets. If it gets a high entity ID you might see it in a later page and if it gets a low ID you might not.
My thought on this question is "yes", because it's better to show the entity if we can and it simplifies the code.
But if we feel like this is too unintuitive, we can add a secondary filter on the entity's creation time to try and get rid of these entities (on a best-effort basis, since clocks are not perfect).
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think current pagination behaviour wrt concurrent changes is fine.
Making it deterministic would be a great addition to Polaris, but that, I think, has a much broader scope. For example, if an entry is deleted after pagination starts, but a client re-submits a page request using an old token, the new response would still be inconsistent with the old response.
From my POV a complete and deterministic pagination solution implies catalog-level versioning.