diff --git a/api/src/main/java/org/apache/iceberg/UpdateLocation.java b/api/src/main/java/org/apache/iceberg/UpdateLocation.java index b069d32f1086..dfc3b8f0c8ca 100644 --- a/api/src/main/java/org/apache/iceberg/UpdateLocation.java +++ b/api/src/main/java/org/apache/iceberg/UpdateLocation.java @@ -18,10 +18,10 @@ */ package org.apache.iceberg; -/** API for setting a table's or view's base location. */ +/** API for setting a table's, view's or index's base location. */ public interface UpdateLocation extends PendingUpdate { /** - * Set the table's or view's location. + * Set the table's, view's or index's location. * * @param location a String location * @return this for method chaining diff --git a/api/src/main/java/org/apache/iceberg/catalog/IndexCatalog.java b/api/src/main/java/org/apache/iceberg/catalog/IndexCatalog.java new file mode 100644 index 000000000000..b7afafe25534 --- /dev/null +++ b/api/src/main/java/org/apache/iceberg/catalog/IndexCatalog.java @@ -0,0 +1,135 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.catalog; + +import java.util.List; +import java.util.Map; +import org.apache.iceberg.exceptions.AlreadyExistsException; +import org.apache.iceberg.exceptions.NoSuchIndexException; +import org.apache.iceberg.exceptions.NoSuchTableException; +import org.apache.iceberg.index.Index; +import org.apache.iceberg.index.IndexBuilder; +import org.apache.iceberg.index.IndexDefinition; +import org.apache.iceberg.index.IndexType; + +/** + * A Catalog API for index create, drop, and load operations. + * + *

Indexes are specialized data structures that improve the speed of data retrieval operations on + * a database table. An index instance is uniquely identified by its {@link IndexIdentifier}, which + * is constructed by combining the {@link TableIdentifier} with the index name. + */ +public interface IndexCatalog { + + /** + * Return the name for this catalog. + * + * @return this catalog's name + */ + String name(); + + /** + * Return a list of index instances for the specified table, filtered to include only those whose + * type matches one of the provided types. + * + *

This enables query optimizers to discover the indexes available for a given table. The + * returned list is already filtered to include only index types supported by the engine. + * + * @param tableIdentifier the identifier of the table to list indexes for + * @param types the index types to filter by; if empty, returns all indexes + * @return a list of index summaries matching the criteria + * @throws NoSuchTableException if the table does not exist + */ + List listIndexes(TableIdentifier tableIdentifier, IndexType... types); + + /** + * Load an index. + * + * @param identifier an index identifier + * @return instance of {@link Index} implementation referred by the identifier + * @throws NoSuchIndexException if the index does not exist + */ + Index loadIndex(IndexIdentifier identifier); + + /** + * Check whether an index exists. + * + * @param identifier an index identifier + * @return true if the index exists, false otherwise + */ + default boolean indexExists(IndexIdentifier identifier) { + try { + loadIndex(identifier); + return true; + } catch (NoSuchIndexException e) { + return false; + } + } + + /** + * Instantiate a builder to create or update an index. + * + * @param identifier a view identifier + * @return a view builder + */ + IndexBuilder buildIndex(IndexIdentifier identifier); + + /** + * Drop an index. + * + * @param identifier an index identifier + * @return true if the index was dropped, false if the index did not exist + */ + boolean dropIndex(IndexIdentifier identifier); + + /** + * Invalidate cached index metadata from current catalog. + * + *

If the index is already loaded or cached, drop cached data. If the index does not exist or + * is not cached, do nothing. + * + * @param identifier a index identifier + */ + default void invalidateIndex(IndexIdentifier identifier) {} + + /** + * Register an index with the catalog if it does not exist. + * + * @param identifier a index identifier + * @param metadataFileLocation the location of a metadata file + * @return an Index instance + * @throws AlreadyExistsException if an index with the same identifier already exists in the + * catalog. + */ + default Index registerIndex(IndexIdentifier identifier, String metadataFileLocation) { + throw new UnsupportedOperationException("Registering index is not supported"); + } + + /** + * Initialize an index catalog given a custom name and a map of catalog properties. + * + *

A custom index catalog implementation must have a no-arg constructor. A compute engine like + * Spark or Flink will first initialize the catalog without any arguments, and then call this + * method to complete catalog initialization with properties passed into the engine. + * + * @param name a custom name for the catalog + * @param properties catalog properties + */ + default void initialize(String name, Map properties) {} +} diff --git a/api/src/main/java/org/apache/iceberg/catalog/IndexIdentifier.java b/api/src/main/java/org/apache/iceberg/catalog/IndexIdentifier.java new file mode 100644 index 000000000000..22e9c0351a40 --- /dev/null +++ b/api/src/main/java/org/apache/iceberg/catalog/IndexIdentifier.java @@ -0,0 +1,159 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.catalog; + +import java.io.Serializable; +import java.util.Arrays; +import java.util.Objects; +import org.apache.iceberg.relocated.com.google.common.base.Preconditions; + +/** + * Identifies an index instance within a catalog. + * + *

An index instance is uniquely identified by combining the {@link TableIdentifier} with the + * index name. This ensures that index names are scoped to their respective tables. + * + *

Example: For a table "persons" in the "company" database with an index named + * "nationality_index", the resulting IndexIdentifier would be: "company.persons.nationality_index" + */ +public class IndexIdentifier implements Serializable { + + private final TableIdentifier tableIdentifier; + private final String name; + + private IndexIdentifier(TableIdentifier tableIdentifier, String name) { + Preconditions.checkArgument(tableIdentifier != null, "Table identifier cannot be null"); + Preconditions.checkArgument( + name != null && !name.isEmpty(), "Index name cannot be null or empty"); + this.tableIdentifier = tableIdentifier; + this.name = name; + } + + /** + * Creates an IndexIdentifier from a table identifier and index name. + * + * @param tableIdentifier the table identifier + * @param name the index name + * @return an IndexIdentifier + */ + public static IndexIdentifier of(TableIdentifier tableIdentifier, String name) { + return new IndexIdentifier(tableIdentifier, name); + } + + /** + * Creates an IndexIdentifier from a namespace, table name, and index name. + * + * @param namespace the namespace + * @param tableName the table name + * @param indexName the index name + * @return an IndexIdentifier + */ + public static IndexIdentifier of(Namespace namespace, String tableName, String indexName) { + return new IndexIdentifier(TableIdentifier.of(namespace, tableName), indexName); + } + + /** + * Creates an IndexIdentifier by parsing a string representation. + * + *

The string should be in the format "namespace.table.indexName" where namespace can contain + * multiple levels separated by dots. + * + * @param identifier the string representation of the index identifier + * @return an IndexIdentifier + * @throws IllegalArgumentException if the identifier string is invalid + */ + public static IndexIdentifier parse(String identifier) { + Preconditions.checkArgument( + identifier != null && !identifier.isEmpty(), + "Cannot parse index identifier: null or empty"); + + return IndexIdentifier.of(identifier.split("\\.")); + } + + public static IndexIdentifier of(String... names) { + Preconditions.checkArgument(names != null, "Cannot create index identifier from null array"); + Preconditions.checkArgument( + names.length > 0, "Cannot create index identifier without a index name"); + Preconditions.checkArgument( + names.length > 1, "Cannot create index identifier without a table name"); + + return new IndexIdentifier( + TableIdentifier.of(Arrays.copyOf(names, names.length - 1)), names[names.length - 1]); + } + + /** + * Returns the table identifier for this index. + * + * @return the table identifier + */ + public TableIdentifier tableIdentifier() { + return tableIdentifier; + } + + /** + * Returns the namespace for this index (same as the table's namespace). + * + * @return the namespace + */ + public Namespace namespace() { + return tableIdentifier.namespace(); + } + + /** + * Returns the name of the table this index belongs to. + * + * @return the table name + */ + public String tableName() { + return tableIdentifier.name(); + } + + /** + * Returns the name of this index. + * + * @return the index name + */ + public String name() { + return name; + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + + if (o == null || getClass() != o.getClass()) { + return false; + } + + IndexIdentifier that = (IndexIdentifier) o; + return tableIdentifier.equals(that.tableIdentifier) && name.equals(that.name); + } + + @Override + public int hashCode() { + return Objects.hash(tableIdentifier, name); + } + + @Override + public String toString() { + return tableIdentifier.toString() + "." + name; + } +} diff --git a/api/src/main/java/org/apache/iceberg/catalog/IndexSessionCatalog.java b/api/src/main/java/org/apache/iceberg/catalog/IndexSessionCatalog.java new file mode 100644 index 000000000000..23b1a35cdf6f --- /dev/null +++ b/api/src/main/java/org/apache/iceberg/catalog/IndexSessionCatalog.java @@ -0,0 +1,146 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.catalog; + +import java.util.List; +import java.util.Map; +import org.apache.iceberg.exceptions.AlreadyExistsException; +import org.apache.iceberg.exceptions.NoSuchIndexException; +import org.apache.iceberg.exceptions.NoSuchTableException; +import org.apache.iceberg.index.Index; +import org.apache.iceberg.index.IndexBuilder; +import org.apache.iceberg.index.IndexDefinition; +import org.apache.iceberg.index.IndexType; + +/** + * A session Catalog API for index create, drop, and load operations. + * + *

Indexes are specialized data structures that improve the speed of data retrieval operations on + * a database table. An index instance is uniquely identified by its {@link IndexIdentifier}, which + * is constructed by combining the {@link TableIdentifier} with the index name. + */ +public interface IndexSessionCatalog { + + /** + * Return the name for this catalog. + * + * @return this catalog's name + */ + String name(); + + /** + * Return a list of index instances for the specified table, filtered to include only those whose + * type matches one of the provided types. + * + *

This enables query optimizers to discover the indexes available for a given table. The + * returned list is already filtered to include only index types supported by the engine. + * + * @param context a session context + * @param tableIdentifier the identifier of the table to list indexes for + * @param types the index types to filter by; if empty, returns all indexes + * @return a list of index summaries matching the criteria + * @throws NoSuchTableException if the table does not exist + */ + List listIndexes( + SessionCatalog.SessionContext context, TableIdentifier tableIdentifier, IndexType... types); + + /** + * Load an index. + * + * @param context a session context + * @param identifier an index identifier + * @return instance of {@link Index} implementation referred by the identifier + * @throws NoSuchIndexException if the index does not exist + */ + Index loadIndex(SessionCatalog.SessionContext context, IndexIdentifier identifier); + + /** + * Check whether an index exists. + * + * @param context a session context + * @param identifier an index identifier + * @return true if the index exists, false otherwise + */ + default boolean indexExists(SessionCatalog.SessionContext context, IndexIdentifier identifier) { + try { + loadIndex(context, identifier); + return true; + } catch (NoSuchIndexException e) { + return false; + } + } + + /** + * Instantiate a builder to create or update an index. + * + * @param context a session context + * @param identifier an index identifier + * @return an index builder + */ + IndexBuilder buildIndex(SessionCatalog.SessionContext context, IndexIdentifier identifier); + + /** + * Drop an index. + * + * @param context a session context + * @param identifier an index identifier + * @return true if the index was dropped, false if the index did not exist + */ + boolean dropIndex(SessionCatalog.SessionContext context, IndexIdentifier identifier); + + /** + * Invalidate cached index metadata from current catalog. + * + *

If the index is already loaded or cached, drop cached data. If the index does not exist or + * is not cached, do nothing. + * + * @param context a session context + * @param identifier an index identifier + */ + default void invalidateIndex(SessionCatalog.SessionContext context, IndexIdentifier identifier) {} + + /** + * Register an index with the catalog if it does not exist. + * + * @param context a session context + * @param identifier an index identifier + * @param metadataFileLocation the location of a metadata file + * @return an Index instance + * @throws AlreadyExistsException if an index with the same identifier already exists in the + * catalog. + */ + default Index registerIndex( + SessionCatalog.SessionContext context, + IndexIdentifier identifier, + String metadataFileLocation) { + throw new UnsupportedOperationException("Registering index is not supported"); + } + + /** + * Initialize an index catalog given a custom name and a map of catalog properties. + * + *

A custom index catalog implementation must have a no-arg constructor. A compute engine like + * Spark or Flink will first initialize the catalog without any arguments, and then call this + * method to complete catalog initialization with properties passed into the engine. + * + * @param name a custom name for the catalog + * @param properties catalog properties + */ + void initialize(String name, Map properties); +} diff --git a/api/src/main/java/org/apache/iceberg/exceptions/NoSuchIndexException.java b/api/src/main/java/org/apache/iceberg/exceptions/NoSuchIndexException.java new file mode 100644 index 000000000000..161fe124b401 --- /dev/null +++ b/api/src/main/java/org/apache/iceberg/exceptions/NoSuchIndexException.java @@ -0,0 +1,34 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.exceptions; + +import com.google.errorprone.annotations.FormatMethod; + +/** Exception raised when attempting to load an index that does not exist. */ +public class NoSuchIndexException extends RuntimeException implements CleanableFailure { + @FormatMethod + public NoSuchIndexException(String message, Object... args) { + super(String.format(message, args)); + } + + @FormatMethod + public NoSuchIndexException(Throwable cause, String message, Object... args) { + super(String.format(message, args), cause); + } +} diff --git a/api/src/main/java/org/apache/iceberg/index/AddIndexSnapshot.java b/api/src/main/java/org/apache/iceberg/index/AddIndexSnapshot.java new file mode 100644 index 000000000000..061a32c5e895 --- /dev/null +++ b/api/src/main/java/org/apache/iceberg/index/AddIndexSnapshot.java @@ -0,0 +1,37 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.index; + +import org.apache.iceberg.PendingUpdate; + +/** + * API for adding a new snapshot to an {@link Index}. + * + *

This interface combines {@link PendingUpdate} for committing changes with {@link + * SnapshotBuilder} for fluent snapshot construction. + * + *

The {@link #apply()} method returns the new {@link IndexSnapshot} for validation. + * + *

When committing, these new snapshot will be added to the {@link Index} metadata. + * + *

Commit conflicts will be resolved by applying the pending changes to the new {@link Index} + * metadata. + */ +public interface AddIndexSnapshot + extends PendingUpdate, SnapshotBuilder {} diff --git a/api/src/main/java/org/apache/iceberg/index/AddIndexVersion.java b/api/src/main/java/org/apache/iceberg/index/AddIndexVersion.java new file mode 100644 index 000000000000..c25c85a06e87 --- /dev/null +++ b/api/src/main/java/org/apache/iceberg/index/AddIndexVersion.java @@ -0,0 +1,37 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.index; + +import org.apache.iceberg.PendingUpdate; + +/** + * API for adding a new {@link Index} version. + * + *

This interface combines {@link PendingUpdate} for committing changes with {@link + * VersionBuilder} for fluent version construction. + * + *

The {@link #apply()} method returns the new {@link IndexVersion} for validation. + * + *

When committing, the new version will be added to the {@link Index} metadata. + * + *

Commit conflicts will be resolved by applying the pending changes to the new {@link Index} + * metadata. + */ +public interface AddIndexVersion + extends PendingUpdate, VersionBuilder {} diff --git a/api/src/main/java/org/apache/iceberg/index/Index.java b/api/src/main/java/org/apache/iceberg/index/Index.java new file mode 100644 index 000000000000..9afef85560c3 --- /dev/null +++ b/api/src/main/java/org/apache/iceberg/index/Index.java @@ -0,0 +1,188 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.index; + +import java.util.List; +import java.util.UUID; +import org.apache.iceberg.UpdateLocation; + +/** + * Interface for index definition. + * + *

An index is a specialized data structure that improves the speed of data retrieval operations + * on a database table. The index can be computed either synchronously and committed along the + * DDL/DML changes or asynchronously and updated by an index maintenance process. + */ +public interface Index { + + /** + * Return the name of this index. + * + * @return the index name + */ + String name(); + + /** + * Return the UUID that identifies this index. + * + *

Generated when the index is created. Implementations must throw an exception if an index's + * UUID does not match the expected UUID after refreshing metadata. + * + * @return the index UUID + */ + UUID uuid(); + + /** + * Return the UUID of the table that this index is associated with. + * + *

Set when the index is created and must not be changed afterward. + * + * @return the table UUID + */ + String tableUuid(); + + /** + * Return the format version for this index. + * + *

An integer version number for the index metadata format. + * + * @return the format version + */ + int formatVersion(); + + /** + * Return the type of this index. + * + *

One of the supported index-types. Must be supplied during the creation of an index and must + * not be changed. + * + * @return the index type + */ + IndexType type(); + + /** + * Return the column IDs contained by this index. + * + *

The ids of the columns contained by the index. Must be supplied during the creation of an + * index and must not be changed. + * + * @return a list of column IDs + */ + List indexColumnIds(); + + /** + * Return the column IDs that this index is optimized for. + * + *

The ids of the columns that the index is designed to optimize for retrieval. Must be + * supplied during the creation of an index and must not be changed. + * + * @return a list of column IDs + */ + List optimizedColumnIds(); + + /** + * Return the index's base location. + * + *

Used to create index file locations. + * + * @return the index location + */ + String location(); + + /** Return the ID of the current version of this index. */ + int currentVersionId(); + + /** Get the current version for this index. */ + IndexVersion currentVersion(); + + /** + * Get the versions of this index. + * + *

A list of known versions of the index, the number of versions retained is + * implementation-specific. A version with a current-version-id must be present in this list. + * + * @return an Iterable of versions of this index + */ + Iterable versions(); + + /** + * Get a version in this index by ID. + * + * @param versionId version ID + * @return a version, or null if the ID cannot be found + */ + IndexVersion version(int versionId); + + /** + * Get the version history of this index. + * + *

A list of version log entries with the timestamp and version-id for every change to + * current-version-id. The number of entries retained is implementation-specific. + * current-version-id may or may not be present in this list. + * + * @return a list of {@link IndexHistoryEntry} + */ + List history(); + + /** + * Get the snapshots of this index. + * + *

During index maintenance a new index snapshot is generated for the specific Table snapshot, + * and it is added to the snapshots list. + * + * @return a list of {@link IndexSnapshot} + */ + List snapshots(); + + /** + * Get a snapshot in this index by index snapshot ID. + * + * @param indexSnapshotId index snapshot ID + * @return a snapshot, or null if the ID cannot be found + */ + IndexSnapshot snapshot(long indexSnapshotId); + + /** + * Get a snapshot in this index by table snapshot ID. + * + * @param tableSnapshotId table snapshot ID + * @return a snapshot, or null if no index snapshot exists for the table snapshot + */ + IndexSnapshot snapshotForTableSnapshot(long tableSnapshotId); + + /** Create a new {@link AddIndexVersion} to replace the properties for the index. */ + default AddIndexVersion addVersion() { + throw new UnsupportedOperationException("Updating index version is not supported."); + } + + /** Create a new {@link UpdateLocation} to set the index's location. */ + default UpdateLocation updateLocation() { + throw new UnsupportedOperationException("Updating index location is not supported."); + } + + /** Create a new {@link AddIndexSnapshot} for the index. */ + default AddIndexSnapshot addIndexSnapshot() { + throw new UnsupportedOperationException("Adding index snapshot is not supported."); + } + + /** Create a new {@link RemoveIndexSnapshots} to remove snapshots from the index. */ + default RemoveIndexSnapshots removeIndexSnapshots() { + throw new UnsupportedOperationException("Removing index snapshots is not supported."); + } +} diff --git a/api/src/main/java/org/apache/iceberg/index/IndexBuilder.java b/api/src/main/java/org/apache/iceberg/index/IndexBuilder.java new file mode 100644 index 000000000000..661a533728cb --- /dev/null +++ b/api/src/main/java/org/apache/iceberg/index/IndexBuilder.java @@ -0,0 +1,109 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.index; + +import java.util.List; +import java.util.UUID; +import org.apache.iceberg.catalog.IndexCatalog; +import org.apache.iceberg.catalog.IndexIdentifier; + +/** + * A builder used to create or replace an {@link Index}. + * + *

Call {@link IndexCatalog#buildIndex(IndexIdentifier)} to create a new builder. + */ +public interface IndexBuilder + extends VersionBuilder, + SnapshotBuilder, + RemoveSnapshotsBuilder { + + /** + * Set the index type. + * + * @param type the type of the index (e.g., BTREE, TERM, IVF) + */ + IndexBuilder withType(IndexType type); + + /** + * Set the column IDs to be stored losslessly in the index. + * + * @param columnIds the IDs of columns contained by the index + */ + IndexBuilder withIndexColumnIds(List columnIds); + + /** + * Set the column IDs to be stored losslessly in the index. + * + * @param columnIds the IDs of columns contained by the index + */ + IndexBuilder withIndexColumnIds(int... columnIds); + + /** + * Set the column IDs that this index is optimized for retrieval operations. + * + * @param columnIds the IDs of columns the index is optimized for retrieval + */ + IndexBuilder withOptimizedColumnIds(List columnIds); + + /** + * Set the column IDs that this index is optimized for. + * + * @param columnIds the IDs of columns the index is optimized for retrieval + */ + IndexBuilder withOptimizedColumnIds(int... columnIds); + + /** + * Set the UUID of the table that this index is associated with. + * + *

Must be set during index creation and cannot be changed afterward. + * + * @param tableUuid the table UUID + */ + IndexBuilder withTableUuid(UUID tableUuid); + + /** + * Sets a location for the index. + * + * @param location the base location to set for the index; used to create index file locations + */ + default IndexBuilder withLocation(String location) { + throw new UnsupportedOperationException("Setting an index's location is not supported"); + } + + /** + * Create the index. + * + * @return the index created + */ + Index create(); + + /** + * Replace the index. + * + * @return the {@link Index} replaced + */ + Index replace(); + + /** + * Create or replace the index. + * + * @return the {@link Index} created or replaced + */ + Index createOrReplace(); +} diff --git a/api/src/main/java/org/apache/iceberg/index/IndexDefinition.java b/api/src/main/java/org/apache/iceberg/index/IndexDefinition.java new file mode 100644 index 000000000000..42b338326328 --- /dev/null +++ b/api/src/main/java/org/apache/iceberg/index/IndexDefinition.java @@ -0,0 +1,52 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.index; + +import org.apache.iceberg.Table; +import org.apache.iceberg.catalog.IndexCatalog; +import org.apache.iceberg.catalog.IndexIdentifier; +import org.apache.iceberg.catalog.TableIdentifier; + +/** + * A compact representation of {@link Index} metadata for discovery and evaluation purposes. + * + *

This interface exposes only the essential attributes required by query optimizers to determine + * whether an index is applicable to a given query. Available indexes for a {@link Table} can be + * retrieved via {@link IndexCatalog#listIndexes(TableIdentifier, IndexType...)}. + * + *

To access complete index metadata, load the full {@link Index} instance using {@link + * IndexCatalog#loadIndex(IndexIdentifier)}. + */ +public interface IndexDefinition { + + /** Returns the unique identifier for this index instance. */ + IndexIdentifier id(); + + /** Returns the type of this index instance. */ + IndexType type(); + + /** Returns the IDs of columns which are stored losslessly in the index. */ + int[] indexColumnIds(); + + /** Returns the IDs of columns that the index is optimized for retrieval. */ + int[] optimizedColumnIds(); + + /** Returns the table snapshot IDs for that have corresponding index snapshots. */ + long[] availableTableSnapshots(); +} diff --git a/api/src/main/java/org/apache/iceberg/index/IndexHistoryEntry.java b/api/src/main/java/org/apache/iceberg/index/IndexHistoryEntry.java new file mode 100644 index 000000000000..610de812857e --- /dev/null +++ b/api/src/main/java/org/apache/iceberg/index/IndexHistoryEntry.java @@ -0,0 +1,49 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.index; + +/** + * Represents a history entry for an {@link Index}. + * + *

An entry contains a change to the index state. At the given timestamp, the current version was + * set to the given version ID. + * + *

The version log tracks changes to the index's current version. This is the index's history and + * allows reconstructing what version of the index would have been used at some point in time. + * + *

Note that this is not the version's creation time, which is stored in each version's metadata. + * An {@link IndexVersion} can appear multiple times in the version log, indicating that the index + * definition was rolled back. + */ +public interface IndexHistoryEntry { + + /** + * Return the timestamp in milliseconds of the change. + * + *

Timestamp when the index's current-version-id was updated (ms from epoch). + */ + long timestampMillis(); + + /** + * Return ID of the new current version. + * + *

ID that current-version-id was set to. + */ + int versionId(); +} diff --git a/api/src/main/java/org/apache/iceberg/index/IndexSnapshot.java b/api/src/main/java/org/apache/iceberg/index/IndexSnapshot.java new file mode 100644 index 000000000000..3051dec6b1a5 --- /dev/null +++ b/api/src/main/java/org/apache/iceberg/index/IndexSnapshot.java @@ -0,0 +1,62 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.index; + +import java.util.Map; +import org.apache.iceberg.Table; + +/** + * {@link Index} snapshot linking an index snapshot to a specific {@link Table} snapshot. + * + *

Index data is versioned using snapshots, in a manner similar to table data. Each index + * snapshot is derived from a specific table snapshot, ensuring consistency between the index and + * the table state. + * + *

When an engine queries a particular table snapshot, it must determine which index snapshots + * are available for that snapshot. If a corresponding index snapshot is not available, the engine + * may choose to use a different index snapshot, provided that the semantics of the given index type + * allow it. When a specific index snapshot is selected, the snapshot parameters and user‑provided + * parameters stored with the referenced index version are used when evaluating the index. + * + *

This relationship is tracked in the index metadata file via an index–snapshot mapping. The + * mapping is updated whenever an index maintenance process creates a new index snapshot and + * associates it with a base table snapshot using {@link AddIndexSnapshot}, or when index snapshots + * are expired as part of index maintenance and removed through {@link RemoveIndexSnapshots}. + */ +public interface IndexSnapshot { + + /** Return the table snapshot ID which is the base of the index snapshot. */ + long tableSnapshotId(); + + /** Return the index snapshot ID. */ + long indexSnapshotId(); + + /** Return the index version ID when the snapshot was created. */ + int versionId(); + + /** + * Return the properties for this snapshot. + * + *

A map of index snapshot properties, represented as string-to-string pairs, supplied by the + * Index Maintenance process. + * + * @return an unmodifiable map of string properties, or empty if none set + */ + Map properties(); +} diff --git a/api/src/main/java/org/apache/iceberg/index/IndexType.java b/api/src/main/java/org/apache/iceberg/index/IndexType.java new file mode 100644 index 000000000000..0c3b7817de36 --- /dev/null +++ b/api/src/main/java/org/apache/iceberg/index/IndexType.java @@ -0,0 +1,83 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.index; + +import java.util.Locale; + +/** + * Enum representing the supported index types. + * + *

An index type defines the algorithm and the underlying data structure that governs the + * behavior of the index. + */ +public enum IndexType { + /** + * Bloom Filter index for probabilistic file testing. + * + *

Efficient for file skipping on low cardinality columns with equality or in predicates. + */ + BLOOM("bloom"), + + /** + * B-Tree index for ordered data access. + * + *

Efficient for range queries and point lookups on orderable columns. + */ + BTREE("btree"), + + /** + * Term index for full-text search capabilities. + * + *

Efficient for text matching, tokenized search, and keyword lookups on string columns. + */ + TERM("term"), + + /** + * IVF (Inverted File) index for vector similarity search. + * + *

Efficient for approximate nearest neighbor queries on high-dimensional vector columns. + */ + IVF("ivf"); + + private final String name; + + IndexType(String name) { + this.name = name; + } + + public String typeName() { + return name; + } + + public static IndexType fromString(String typeName) { + for (IndexType type : IndexType.values()) { + if (type.name.equalsIgnoreCase(typeName)) { + return type; + } + } + + throw new IllegalArgumentException( + String.format("Unknown index type: %s", typeName.toLowerCase(Locale.ROOT))); + } + + @Override + public String toString() { + return name; + } +} diff --git a/api/src/main/java/org/apache/iceberg/index/IndexVersion.java b/api/src/main/java/org/apache/iceberg/index/IndexVersion.java new file mode 100644 index 000000000000..1151902e0f2a --- /dev/null +++ b/api/src/main/java/org/apache/iceberg/index/IndexVersion.java @@ -0,0 +1,53 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.index; + +import java.util.Map; + +/** + * A version of the {@link Index} created at a specific point in time, defined by user‑provided + * properties. + * + *

Versions are added to the index using the {@link AddIndexVersion} operation. + */ +public interface IndexVersion { + + /** + * Returns this version's unique identifier. + * + *

Version ids are monotonically increasing. + */ + int versionId(); + + /** + * Returns this version's creation timestamp in milliseconds since epoch. + * + *

This timestamp is the same as those produced by {@link System#currentTimeMillis()}. + */ + long timestampMillis(); + + /** + * Returns the user-supplied properties for this version. + * + *

A map of index properties, represented as string-to-string pairs, supplied by the user. + * + * @return an unmodifiable of properties, or empty if none set + */ + Map properties(); +} diff --git a/api/src/main/java/org/apache/iceberg/index/RemoveIndexSnapshots.java b/api/src/main/java/org/apache/iceberg/index/RemoveIndexSnapshots.java new file mode 100644 index 000000000000..94ddf4e5a7f1 --- /dev/null +++ b/api/src/main/java/org/apache/iceberg/index/RemoveIndexSnapshots.java @@ -0,0 +1,53 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.index; + +import java.util.List; +import java.util.Set; +import org.apache.iceberg.PendingUpdate; + +/** + * A builder interface for removing {@link IndexSnapshot} instances from an {@link Index}. + * + *

This API accumulates snapshot deletions and the {@link #apply()} method returns the collected + * list for validation. + * + *

When committing, the collected snapshots will be removed from the index metadata. + * + *

Commit conflicts will be resolved by applying the pending changes to the new index metadata. + */ +public interface RemoveIndexSnapshots extends PendingUpdate> { + + /** + * Adds a snapshot to remove by its index snapshot ID. + * + * @param indexSnapshotId the index snapshot ID to remove + */ + RemoveIndexSnapshots removeSnapshotById(long indexSnapshotId); + + /** + * Adds multiple snapshots to remove by their index snapshot IDs. + * + * @param indexSnapshotIds the index snapshot IDs to remove + */ + RemoveIndexSnapshots removeSnapshotsByIds(Set indexSnapshotIds); + + /** Varargs overload of @see #removeSnapshotsByIds(Set) */ + RemoveIndexSnapshots removeSnapshotsByIds(long... indexSnapshotIds); +} diff --git a/api/src/main/java/org/apache/iceberg/index/RemoveSnapshotsBuilder.java b/api/src/main/java/org/apache/iceberg/index/RemoveSnapshotsBuilder.java new file mode 100644 index 000000000000..d3f6872905da --- /dev/null +++ b/api/src/main/java/org/apache/iceberg/index/RemoveSnapshotsBuilder.java @@ -0,0 +1,48 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.index; + +import java.util.Set; + +/** + * A builder interface for removing {@link IndexSnapshot}s from an {@link Index}. + * + *

This API accumulates snapshot deletions for fluent chaining. + * + * @param the concrete builder type for method chaining + */ +public interface RemoveSnapshotsBuilder { + + /** + * Adds a snapshot to remove by its index snapshot ID. + * + * @param indexSnapshotId the index snapshot ID to remove + */ + T removeSnapshotById(long indexSnapshotId); + + /** + * Adds multiple snapshots to remove by their index snapshot IDs. + * + * @param indexSnapshotIds the index snapshot IDs to remove + */ + T removeSnapshotsByIds(Set indexSnapshotIds); + + /** Varargs overload of @see #removeSnapshotsByIds(Set) */ + T removeSnapshotsByIds(long... indexSnapshotIds); +} diff --git a/api/src/main/java/org/apache/iceberg/index/SnapshotBuilder.java b/api/src/main/java/org/apache/iceberg/index/SnapshotBuilder.java new file mode 100644 index 000000000000..fde71fab42ce --- /dev/null +++ b/api/src/main/java/org/apache/iceberg/index/SnapshotBuilder.java @@ -0,0 +1,59 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.index; + +import java.util.Map; + +/** + * A builder interface for creating {@link IndexSnapshot} instances. + * + *

This API collects snapshot configuration for fluent chaining. + * + * @param the concrete builder type for method chaining + */ +public interface SnapshotBuilder { + /** + * Sets the table snapshot ID which is the base of the index snapshot. + * + * @param tableSnapshotId the table snapshot ID + */ + T withTableSnapshotId(long tableSnapshotId); + + /** + * Sets the index snapshot ID. + * + * @param indexSnapshotId the index snapshot ID + */ + T withIndexSnapshotId(long indexSnapshotId); + + /** + * Sets properties for the index snapshot. + * + * @param properties a map of string properties + */ + T withSnapshotProperties(Map properties); + + /** + * Adds a key/value property to the index snapshot. + * + * @param key the property key + * @param value the property value + */ + T withSnapshotProperty(String key, String value); +} diff --git a/api/src/main/java/org/apache/iceberg/index/VersionBuilder.java b/api/src/main/java/org/apache/iceberg/index/VersionBuilder.java new file mode 100644 index 000000000000..f810acb0885c --- /dev/null +++ b/api/src/main/java/org/apache/iceberg/index/VersionBuilder.java @@ -0,0 +1,48 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.index; + +import java.util.Map; + +/** + * Builder interface for constructing {@link IndexVersion} instances with custom properties. + * + *

Properties from the {@link Index}'s current version are not inherited. Only properties added + * through this builder are applied to the newly built version. + * + *

This API collects version configuration for fluent chaining. + * + * @param the concrete builder type for method chaining + */ +public interface VersionBuilder { + /** + * Adds key/value properties to the index. + * + * @param properties key/value properties + */ + T withProperties(Map properties); + + /** + * Adds a key/value property to the index version. + * + * @param key the property key + * @param value the property value + */ + T withProperty(String key, String value); +} diff --git a/core/src/main/java/org/apache/iceberg/catalog/BaseIndexSessionCatalog.java b/core/src/main/java/org/apache/iceberg/catalog/BaseIndexSessionCatalog.java new file mode 100644 index 000000000000..c1a1b86dbc91 --- /dev/null +++ b/core/src/main/java/org/apache/iceberg/catalog/BaseIndexSessionCatalog.java @@ -0,0 +1,94 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.catalog; + +import com.github.benmanes.caffeine.cache.Cache; +import com.github.benmanes.caffeine.cache.Caffeine; +import java.util.List; +import java.util.Map; +import java.util.concurrent.TimeUnit; +import org.apache.iceberg.index.Index; +import org.apache.iceberg.index.IndexBuilder; +import org.apache.iceberg.index.IndexDefinition; +import org.apache.iceberg.index.IndexType; + +public abstract class BaseIndexSessionCatalog extends BaseViewSessionCatalog + implements IndexSessionCatalog { + + private final Cache indexCatalogs = + Caffeine.newBuilder().expireAfterAccess(10, TimeUnit.MINUTES).build(); + + public IndexCatalog asIndexCatalog(SessionCatalog.SessionContext context) { + return indexCatalogs.get(context.sessionId(), id -> new AsIndexCatalog(context)); + } + + public class AsIndexCatalog implements IndexCatalog { + private final SessionCatalog.SessionContext context; + + private AsIndexCatalog(SessionCatalog.SessionContext context) { + this.context = context; + } + + @Override + public String name() { + return BaseIndexSessionCatalog.this.name(); + } + + @Override + public List listIndexes(TableIdentifier tableIdentifier, IndexType... types) { + return BaseIndexSessionCatalog.this.listIndexes(context, tableIdentifier, types); + } + + @Override + public Index loadIndex(IndexIdentifier identifier) { + return BaseIndexSessionCatalog.this.loadIndex(context, identifier); + } + + @Override + public boolean indexExists(IndexIdentifier identifier) { + return BaseIndexSessionCatalog.this.indexExists(context, identifier); + } + + @Override + public IndexBuilder buildIndex(IndexIdentifier identifier) { + return BaseIndexSessionCatalog.this.buildIndex(context, identifier); + } + + @Override + public boolean dropIndex(IndexIdentifier identifier) { + return BaseIndexSessionCatalog.this.dropIndex(context, identifier); + } + + @Override + public void invalidateIndex(IndexIdentifier identifier) { + BaseIndexSessionCatalog.this.invalidateIndex(context, identifier); + } + + @Override + public Index registerIndex(IndexIdentifier identifier, String metadataFileLocation) { + return BaseIndexSessionCatalog.this.registerIndex(context, identifier, metadataFileLocation); + } + + @Override + public void initialize(String name, Map properties) { + throw new UnsupportedOperationException( + this.getClass().getSimpleName() + " doesn't support initialization"); + } + } +} diff --git a/core/src/main/java/org/apache/iceberg/catalog/IndexIdentifierParser.java b/core/src/main/java/org/apache/iceberg/catalog/IndexIdentifierParser.java new file mode 100644 index 000000000000..b50e312f39bf --- /dev/null +++ b/core/src/main/java/org/apache/iceberg/catalog/IndexIdentifierParser.java @@ -0,0 +1,89 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.catalog; + +import com.fasterxml.jackson.core.JsonGenerator; +import com.fasterxml.jackson.databind.JsonNode; +import java.io.IOException; +import java.util.List; +import org.apache.iceberg.relocated.com.google.common.base.Preconditions; +import org.apache.iceberg.util.JsonUtil; + +/** + * Parses IndexIdentifiers from a JSON representation, which is the JSON representation utilized in + * the REST catalog. + * + *

For IndexIdentifier.of(Namespace.of("dogs"), "owners", "name_idx"), we'd have the following + * JSON representation: + * + *

+ * {
+ *   "namespace": ["dogs"],
+ *   "table": "owners",
+ *   "name": "name_idx"
+ * }
+ * 
+ */ +public class IndexIdentifierParser { + + private static final String NAMESPACE = "namespace"; + private static final String TABLE = "table"; + private static final String NAME = "name"; + + private IndexIdentifierParser() {} + + public static String toJson(IndexIdentifier identifier) { + return toJson(identifier, false); + } + + public static String toJson(IndexIdentifier identifier, boolean pretty) { + return JsonUtil.generate(gen -> toJson(identifier, gen), pretty); + } + + public static void toJson(IndexIdentifier identifier, JsonGenerator generator) + throws IOException { + generator.writeStartObject(); + generator.writeFieldName(NAMESPACE); + generator.writeArray(identifier.namespace().levels(), 0, identifier.namespace().length()); + generator.writeStringField(TABLE, identifier.tableName()); + generator.writeStringField(NAME, identifier.name()); + generator.writeEndObject(); + } + + public static IndexIdentifier fromJson(String json) { + Preconditions.checkArgument( + json != null, "Cannot parse index identifier from invalid JSON: null"); + Preconditions.checkArgument( + !json.isEmpty(), "Cannot parse index identifier from invalid JSON: ''"); + return JsonUtil.parse(json, IndexIdentifierParser::fromJson); + } + + public static IndexIdentifier fromJson(JsonNode node) { + Preconditions.checkArgument( + node != null && !node.isNull() && node.isObject(), + "Cannot parse missing or non-object index identifier: %s", + node); + List levels = JsonUtil.getStringListOrNull(NAMESPACE, node); + String tableName = JsonUtil.getString(TABLE, node); + String indexName = JsonUtil.getString(NAME, node); + Namespace namespace = + levels == null ? Namespace.empty() : Namespace.of(levels.toArray(new String[0])); + return IndexIdentifier.of(namespace, tableName, indexName); + } +} diff --git a/core/src/main/java/org/apache/iceberg/index/BaseIndex.java b/core/src/main/java/org/apache/iceberg/index/BaseIndex.java new file mode 100644 index 000000000000..46ef60081fd4 --- /dev/null +++ b/core/src/main/java/org/apache/iceberg/index/BaseIndex.java @@ -0,0 +1,150 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.index; + +import java.io.Serializable; +import java.util.List; +import java.util.UUID; +import org.apache.iceberg.UpdateLocation; + +/** + * Base implementation of the {@link Index} interface. + * + *

This class provides a concrete implementation backed by {@link IndexOperations} which manages + * the index metadata. + */ +public class BaseIndex implements Index, Serializable { + + private final IndexOperations ops; + private final String name; + + public BaseIndex(IndexOperations ops, String name) { + this.ops = ops; + this.name = name; + } + + @Override + public String name() { + return name; + } + + public IndexOperations operations() { + return ops; + } + + @Override + public UUID uuid() { + return UUID.fromString(operations().current().uuid()); + } + + @Override + public String tableUuid() { + return operations().current().tableUuid(); + } + + @Override + public int formatVersion() { + return operations().current().formatVersion(); + } + + @Override + public IndexType type() { + return operations().current().type(); + } + + @Override + public List indexColumnIds() { + return operations().current().indexColumnIds(); + } + + @Override + public List optimizedColumnIds() { + return operations().current().optimizedColumnIds(); + } + + @Override + public String location() { + return operations().current().location(); + } + + @Override + public int currentVersionId() { + return operations().current().currentVersionId(); + } + + @Override + public IndexVersion currentVersion() { + return operations().current().currentVersion(); + } + + @Override + public Iterable versions() { + return operations().current().versions(); + } + + @Override + public IndexVersion version(int versionId) { + return operations().current().version(versionId); + } + + @Override + public List history() { + return operations().current().history(); + } + + @Override + public List snapshots() { + return operations().current().snapshots(); + } + + @Override + public IndexSnapshot snapshot(long indexSnapshotId) { + return operations().current().snapshot(indexSnapshotId); + } + + @Override + public IndexSnapshot snapshotForTableSnapshot(long tableSnapshotId) { + return operations().current().snapshotForTableSnapshot(tableSnapshotId); + } + + @Override + public AddIndexVersion addVersion() { + return new IndexVersionAdd(ops); + } + + @Override + public UpdateLocation updateLocation() { + return new SetIndexLocation(ops); + } + + @Override + public AddIndexSnapshot addIndexSnapshot() { + return new IndexSnapshotAdd(ops); + } + + @Override + public RemoveIndexSnapshots removeIndexSnapshots() { + return new IndexSnapshotsRemove(ops); + } + + @Override + public String toString() { + return name(); + } +} diff --git a/core/src/main/java/org/apache/iceberg/index/BaseIndexCatalog.java b/core/src/main/java/org/apache/iceberg/index/BaseIndexCatalog.java new file mode 100644 index 000000000000..fc4819ba185d --- /dev/null +++ b/core/src/main/java/org/apache/iceberg/index/BaseIndexCatalog.java @@ -0,0 +1,549 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.index; + +import java.util.Arrays; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.UUID; +import java.util.stream.Collectors; +import org.apache.iceberg.catalog.Catalog; +import org.apache.iceberg.catalog.IndexCatalog; +import org.apache.iceberg.catalog.IndexIdentifier; +import org.apache.iceberg.catalog.TableIdentifier; +import org.apache.iceberg.exceptions.AlreadyExistsException; +import org.apache.iceberg.exceptions.CommitFailedException; +import org.apache.iceberg.exceptions.NoSuchIndexException; +import org.apache.iceberg.exceptions.NoSuchTableException; +import org.apache.iceberg.relocated.com.google.common.base.Preconditions; +import org.apache.iceberg.relocated.com.google.common.collect.Lists; +import org.apache.iceberg.relocated.com.google.common.collect.Maps; +import org.apache.iceberg.relocated.com.google.common.collect.Sets; +import org.apache.iceberg.util.PropertyUtil; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Base implementation of {@link IndexCatalog} that provides a base implementation for index + * catalogs. + */ +public abstract class BaseIndexCatalog implements IndexCatalog { + private static final Logger LOG = LoggerFactory.getLogger(BaseIndexCatalog.class); + + public static final String INDEX_DEFAULT_PREFIX = "index-default."; + public static final String INDEX_OVERRIDE_PREFIX = "index-override."; + + private String catalogName; + private Map catalogProperties; + + /** + * Create new index operations for the given identifier. + * + * @param identifier an index identifier + * @return index operations for the identifier + */ + protected abstract IndexOperations newIndexOps(IndexIdentifier identifier); + + /** + * Return the catalog that manages tables. + * + * @return the table catalog + */ + protected abstract Catalog tableCatalog(); + + /** + * Return the default location for an index. + * + * @param identifier the index identifier + * @return the default location for the index + */ + protected abstract String defaultIndexLocation(IndexIdentifier identifier); + + /** + * Check if a table exists. + * + * @param identifier the table identifier + * @return true if the table exists + */ + protected boolean tableExists(TableIdentifier identifier) { + return tableCatalog().tableExists(identifier); + } + + /** + * Check if an index identifier is valid. + * + * @param identifier the index identifier + * @return true if the identifier is valid + */ + protected boolean isValidIdentifier(IndexIdentifier identifier) { + return identifier != null + && identifier.tableIdentifier() != null + && identifier.name() != null + && !identifier.name().isEmpty(); + } + + /** + * Return the catalog properties. + * + * @return the catalog properties + */ + protected Map properties() { + return catalogProperties; + } + + @Override + public void initialize(String name, Map properties) { + this.catalogName = name; + this.catalogProperties = properties; + } + + @Override + public String name() { + return catalogName; + } + + @Override + public Index loadIndex(IndexIdentifier identifier) { + if (isValidIdentifier(identifier)) { + IndexOperations ops = newIndexOps(identifier); + if (ops.current() == null) { + throw new NoSuchIndexException("Index does not exist: %s", identifier); + } else { + return new BaseIndex(ops, fullIndexName(identifier)); + } + } + + throw new NoSuchIndexException("Invalid index identifier: %s", identifier); + } + + @Override + public List listIndexes(TableIdentifier tableIdentifier, IndexType... types) { + if (!tableExists(tableIdentifier)) { + throw new NoSuchTableException("Table does not exist: %s", tableIdentifier); + } + + return doListIndexes(tableIdentifier, types); + } + + /** + * List all indexes for a table. Subclasses must implement this method. + * + * @param tableIdentifier the table identifier + * @return a list of index summaries + */ + protected abstract List doListIndexes(TableIdentifier tableIdentifier); + + /** + * List all indexes for a table. Subclasses could implement this method for efficient filtering. + * + * @param tableIdentifier the table identifier + * @param types the index types to filter by + * @return a list of index summaries + */ + protected List doListIndexes( + TableIdentifier tableIdentifier, IndexType[] types) { + List allIndexes = doListIndexes(tableIdentifier); + + if (types == null || types.length == 0) { + return allIndexes; + } + + List typeFilter = Arrays.asList(types); + return allIndexes.stream() + .filter(summary -> typeFilter.contains(summary.type())) + .collect(Collectors.toList()); + } + + @Override + public IndexBuilder buildIndex(IndexIdentifier identifier) { + return new BaseIndexBuilder(identifier); + } + + @Override + public boolean dropIndex(IndexIdentifier identifier) { + if (!isValidIdentifier(identifier)) { + return false; + } + + IndexOperations ops = newIndexOps(identifier); + if (ops.current() == null) { + return false; + } + + return doDropIndex(identifier); + } + + /** + * Drop an index. Subclasses must implement this method. + * + * @param identifier the index identifier + * @return true if the index was dropped + */ + protected abstract boolean doDropIndex(IndexIdentifier identifier); + + @Override + public Index registerIndex(IndexIdentifier identifier, String metadataFileLocation) { + Preconditions.checkArgument( + identifier != null && isValidIdentifier(identifier), "Invalid identifier: %s", identifier); + Preconditions.checkArgument( + metadataFileLocation != null && !metadataFileLocation.isEmpty(), + "Cannot register an empty metadata file location as an index"); + + if (indexExists(identifier)) { + throw new AlreadyExistsException("Index already exists: %s", identifier); + } + + if (!tableExists(identifier.tableIdentifier())) { + throw new NoSuchTableException("Table does not exist: %s", identifier.tableIdentifier()); + } + + IndexOperations ops = newIndexOps(identifier); + IndexMetadata metadata = + IndexMetadataParser.read(((BaseIndexOperations) ops).io(), metadataFileLocation); + ops.commit(null, metadata); + + return new BaseIndex(ops, fullIndexName(identifier)); + } + + /** + * Return the full name for an index. + * + * @param identifier the index identifier + * @return the full index name + */ + protected String fullIndexName(IndexIdentifier identifier) { + return String.format("%s.%s", name(), identifier); + } + + /** Base implementation of {@link IndexBuilder}. */ + protected class BaseIndexBuilder implements IndexBuilder { + private final IndexIdentifier identifier; + private UUID tableUuid = null; + private Map properties = null; + private Map snapshotProperties = null; + private Set snapshotIdsToRemove = null; + private IndexType type = null; + private List indexColumnIds = null; + private List optimizedColumnIds = null; + private String location = null; + private long tableSnapshotId = -1L; + private long indexSnapshotId = -1L; + + protected BaseIndexBuilder(IndexIdentifier identifier) { + Preconditions.checkArgument( + isValidIdentifier(identifier), "Invalid index identifier: %s", identifier); + this.identifier = identifier; + } + + /** + * Get default index properties set at Catalog level through catalog properties. + * + * @return default index properties specified in catalog properties + */ + private Map indexDefaultProperties() { + Map indexDefaultProperties = + PropertyUtil.propertiesWithPrefix(properties(), INDEX_DEFAULT_PREFIX); + LOG.info( + "Index properties set at catalog level through catalog properties: {}", + indexDefaultProperties); + return indexDefaultProperties; + } + + /** + * Get index properties that are enforced at Catalog level through catalog properties. + * + * @return overriding index properties enforced through catalog properties + */ + private Map indexOverrideProperties() { + Map indexOverrideProperties = + PropertyUtil.propertiesWithPrefix(properties(), INDEX_OVERRIDE_PREFIX); + LOG.info( + "Index properties enforced at catalog level through catalog properties: {}", + indexOverrideProperties); + return indexOverrideProperties; + } + + @Override + public IndexBuilder withTableUuid(UUID uuid) { + this.tableUuid = uuid; + return this; + } + + @Override + public IndexBuilder withType(IndexType indexType) { + this.type = indexType; + return this; + } + + @Override + public IndexBuilder withIndexColumnIds(List columnIds) { + this.indexColumnIds = Lists.newArrayList(columnIds); + return this; + } + + @Override + public IndexBuilder withIndexColumnIds(int... columnIds) { + this.indexColumnIds = Lists.newArrayList(); + for (int columnId : columnIds) { + this.indexColumnIds.add(columnId); + } + + return this; + } + + @Override + public IndexBuilder withOptimizedColumnIds(List columnIds) { + this.optimizedColumnIds = Lists.newArrayList(columnIds); + return this; + } + + @Override + public IndexBuilder withOptimizedColumnIds(int... columnIds) { + this.optimizedColumnIds = Lists.newArrayList(); + for (int columnId : columnIds) { + this.optimizedColumnIds.add(columnId); + } + + return this; + } + + @Override + public IndexBuilder withProperties(Map newProperties) { + newProperties.forEach(this::withProperty); + return this; + } + + @Override + public IndexBuilder withProperty(String key, String value) { + if (properties == null) { + this.properties = Maps.newHashMap(); + properties.putAll(indexDefaultProperties()); + } + + properties.put(key, value); + return this; + } + + @Override + public IndexBuilder withSnapshotProperties(Map newProperties) { + newProperties.forEach(this::withSnapshotProperty); + return this; + } + + @Override + public IndexBuilder withSnapshotProperty(String key, String value) { + if (snapshotProperties == null) { + this.snapshotProperties = Maps.newHashMap(); + } + + snapshotProperties.put(key, value); + return this; + } + + @Override + public IndexBuilder withLocation(String newLocation) { + this.location = newLocation; + return this; + } + + @Override + public IndexBuilder withTableSnapshotId(long snapshotId) { + this.tableSnapshotId = snapshotId; + return this; + } + + @Override + public IndexBuilder withIndexSnapshotId(long snapshotId) { + this.indexSnapshotId = snapshotId; + return this; + } + + @Override + public IndexBuilder removeSnapshotById(long snapshotId) { + if (snapshotIdsToRemove == null) { + snapshotIdsToRemove = Sets.newHashSet(); + } + + snapshotIdsToRemove.add(snapshotId); + return this; + } + + @Override + public IndexBuilder removeSnapshotsByIds(Set snapshotIds) { + snapshotIds.forEach(this::removeSnapshotById); + return this; + } + + @Override + public IndexBuilder removeSnapshotsByIds(long... snapshotIds) { + for (long id : snapshotIds) { + removeSnapshotById(id); + } + + return this; + } + + @Override + public Index create() { + return create(newIndexOps(identifier)); + } + + @Override + public Index replace() { + return replace(newIndexOps(identifier)); + } + + @Override + public Index createOrReplace() { + IndexOperations ops = newIndexOps(identifier); + if (null == ops.current()) { + return create(ops); + } else { + return replace(ops); + } + } + + private Index create(IndexOperations ops) { + if (null != ops.current()) { + throw new AlreadyExistsException("Index already exists: %s", identifier); + } + + if (!tableExists(identifier.tableIdentifier())) { + throw new NoSuchTableException("Table does not exist: %s", identifier.tableIdentifier()); + } + + Preconditions.checkState( + tableUuid != null, "Cannot create index without specifying a table UUID"); + Preconditions.checkState(null != type, "Cannot create index without specifying a type"); + Preconditions.checkState( + indexColumnIds != null && !indexColumnIds.isEmpty(), + "Cannot create index without specifying index column ids"); + Preconditions.checkState( + optimizedColumnIds != null && !optimizedColumnIds.isEmpty(), + "Cannot create index without specifying optimized column ids"); + + IndexVersion indexVersion = indexVersion(1); + IndexMetadata.Builder builder = + IndexMetadata.builder() + .setTableUuid(tableUuid.toString()) + .setType(type) + .setIndexColumnIds(indexColumnIds) + .setOptimizedColumnIds(optimizedColumnIds) + .setLocation(null != location ? location : defaultIndexLocation(identifier)) + .addVersion(indexVersion) + .setCurrentVersion(indexVersion.versionId()); + + IndexSnapshot indexSnapshot = indexSnapshot(1); + if (indexSnapshot != null) { + builder.addSnapshot(indexSnapshot); + } + + try { + ops.commit(null, builder.build()); + } catch (CommitFailedException ignored) { + throw new AlreadyExistsException("Index was created concurrently: %s", identifier); + } + + return new BaseIndex(ops, fullIndexName(identifier)); + } + + private Index replace(IndexOperations ops) { + if (null == ops.current()) { + throw new NoSuchIndexException("Index does not exist: %s", identifier); + } + + Preconditions.checkState(type == null, "Cannot update index type"); + Preconditions.checkState(indexColumnIds == null, "Cannot update index column ids"); + Preconditions.checkState(optimizedColumnIds == null, "Cannot update optimized column ids"); + + IndexMetadata metadata = ops.current(); + + IndexMetadata.Builder builder = IndexMetadata.buildFrom(metadata); + + int currentVersionId = metadata.currentVersionId(); + if (properties != null) { + int maxVersionId = + metadata.versions().stream() + .map(IndexVersion::versionId) + .max(Integer::compareTo) + .orElseGet(metadata::currentVersionId); + + IndexVersion indexVersion = indexVersion(maxVersionId + 1); + builder = builder.addVersion(indexVersion).setCurrentVersion(indexVersion.versionId()); + currentVersionId = indexVersion.versionId(); + } + + IndexSnapshot snapshot = indexSnapshot(currentVersionId); + + if (snapshot != null) { + builder = builder.addSnapshot(snapshot); + } + + if (snapshotIdsToRemove != null) { + builder.removeSnapshots(snapshotIdsToRemove); + } + + if (location != null) { + builder.setLocation(location); + } + + IndexMetadata replacement = builder.build(); + + try { + ops.commit(metadata, replacement); + } catch (CommitFailedException e) { + throw new CommitFailedException(e, "Cannot replace index %s: concurrent modification", identifier); + } + + return new BaseIndex(ops, fullIndexName(identifier)); + } + + private IndexVersion indexVersion(int versionId) { + Map mergedProperties = + Maps.newHashMap(properties != null ? properties : indexDefaultProperties()); + mergedProperties.putAll(indexOverrideProperties()); + return ImmutableIndexVersion.builder() + .versionId(versionId) + .timestampMillis(System.currentTimeMillis()) + .properties(mergedProperties) + .build(); + } + + private IndexSnapshot indexSnapshot(int versionId) { + if (snapshotProperties != null || tableSnapshotId != -1L || indexSnapshotId != -1L) { + Preconditions.checkArgument( + tableSnapshotId != -1L, + "Cannot create index snapshot without specifying tableSnapshotId"); + Preconditions.checkArgument( + indexSnapshotId != -1L, + "Cannot create index snapshot without specifying indexSnapshotId"); + + return ImmutableIndexSnapshot.builder() + .indexSnapshotId(indexSnapshotId) + .tableSnapshotId(tableSnapshotId) + .versionId(versionId) + .properties(snapshotProperties) + .build(); + } else { + return null; + } + } + } +} diff --git a/core/src/main/java/org/apache/iceberg/index/BaseIndexDefinition.java b/core/src/main/java/org/apache/iceberg/index/BaseIndexDefinition.java new file mode 100644 index 000000000000..d87cc4b6338c --- /dev/null +++ b/core/src/main/java/org/apache/iceberg/index/BaseIndexDefinition.java @@ -0,0 +1,53 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.index; + +import org.apache.iceberg.catalog.IndexIdentifier; +import org.immutables.value.Value; + +/** + * A lightweight summary of an index instance. + * + *

This interface provides the essential information needed for query optimizers to discover and + * evaluate indexes available for a given table. It contains only the fields required for the + * optimizer to decide whether the index is applicable to a query or should be skipped. + */ +@Value.Immutable +@SuppressWarnings("ImmutablesStyle") +@Value.Style( + typeImmutable = "ImmutableIndexSummary", + visibilityString = "PUBLIC", + builderVisibilityString = "PUBLIC") +interface BaseIndexDefinition extends IndexDefinition { + + @Override + IndexIdentifier id(); + + @Override + IndexType type(); + + @Override + int[] indexColumnIds(); + + @Override + int[] optimizedColumnIds(); + + @Override + long[] availableTableSnapshots(); +} diff --git a/core/src/main/java/org/apache/iceberg/index/BaseIndexHistoryEntry.java b/core/src/main/java/org/apache/iceberg/index/BaseIndexHistoryEntry.java new file mode 100644 index 000000000000..1c97084c5e7e --- /dev/null +++ b/core/src/main/java/org/apache/iceberg/index/BaseIndexHistoryEntry.java @@ -0,0 +1,35 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.index; + +import org.immutables.value.Value; + +/** + * Index history entry. + * + *

An entry contains a change to the index state. At the given timestamp, the current version was + * set to the given version ID. + */ +@Value.Immutable +@SuppressWarnings("ImmutablesStyle") +@Value.Style( + typeImmutable = "ImmutableIndexHistoryEntry", + visibilityString = "PUBLIC", + builderVisibilityString = "PUBLIC") +interface BaseIndexHistoryEntry extends IndexHistoryEntry {} diff --git a/core/src/main/java/org/apache/iceberg/index/BaseIndexOperations.java b/core/src/main/java/org/apache/iceberg/index/BaseIndexOperations.java new file mode 100644 index 000000000000..46afa74d7790 --- /dev/null +++ b/core/src/main/java/org/apache/iceberg/index/BaseIndexOperations.java @@ -0,0 +1,245 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.index; + +import java.util.Locale; +import java.util.UUID; +import java.util.concurrent.atomic.AtomicReference; +import java.util.function.Function; +import java.util.function.Predicate; +import org.apache.iceberg.BaseMetastoreOperations; +import org.apache.iceberg.TableMetadataParser; +import org.apache.iceberg.exceptions.AlreadyExistsException; +import org.apache.iceberg.exceptions.CommitFailedException; +import org.apache.iceberg.exceptions.NoSuchIndexException; +import org.apache.iceberg.exceptions.NotFoundException; +import org.apache.iceberg.io.FileIO; +import org.apache.iceberg.io.OutputFile; +import org.apache.iceberg.relocated.com.google.common.base.Objects; +import org.apache.iceberg.util.LocationUtil; +import org.apache.iceberg.util.Tasks; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public abstract class BaseIndexOperations extends BaseMetastoreOperations + implements IndexOperations { + private static final Logger LOG = LoggerFactory.getLogger(BaseIndexOperations.class); + + private static final String METADATA_FOLDER_NAME = "metadata"; + + private IndexMetadata currentMetadata = null; + private String currentMetadataLocation = null; + private boolean shouldRefresh = true; + private int version = -1; + + protected BaseIndexOperations() {} + + protected void requestRefresh() { + this.shouldRefresh = true; + } + + protected void disableRefresh() { + this.shouldRefresh = false; + } + + protected abstract void doRefresh(); + + protected abstract void doCommit(IndexMetadata base, IndexMetadata metadata); + + protected abstract String indexName(); + + protected abstract FileIO io(); + + protected String currentMetadataLocation() { + return currentMetadataLocation; + } + + protected int currentVersion() { + return version; + } + + @Override + public IndexMetadata current() { + if (shouldRefresh) { + return refresh(); + } + + return currentMetadata; + } + + @Override + public IndexMetadata refresh() { + boolean currentMetadataWasAvailable = currentMetadata != null; + try { + doRefresh(); + } catch (NoSuchIndexException e) { + if (currentMetadataWasAvailable) { + LOG.warn("Could not find the index during refresh, setting current metadata to null", e); + shouldRefresh = true; + } + + currentMetadata = null; + currentMetadataLocation = null; + version = -1; + throw e; + } + + return current(); + } + + @Override + @SuppressWarnings("ImmutablesReferenceEquality") + public void commit(IndexMetadata base, IndexMetadata metadata) { + // if the metadata is already out of date, reject it + if (base != current()) { + if (base != null) { + throw new CommitFailedException("Cannot commit: stale index metadata"); + } else { + // when current is non-null, the index exists. but when base is null, the commit is trying + // to create the index + throw new AlreadyExistsException("Index already exists: %s", indexName()); + } + } + + // if the metadata is not changed, return early + if (base == metadata) { + LOG.info("Nothing to commit."); + return; + } + + long start = System.currentTimeMillis(); + doCommit(base, metadata); + requestRefresh(); + + LOG.info( + "Successfully committed to index {} in {} ms", + indexName(), + System.currentTimeMillis() - start); + } + + private String writeNewMetadata(IndexMetadata metadata, int newVersion) { + String newMetadataFilePath = newMetadataFilePath(metadata, newVersion); + OutputFile newMetadataLocation = io().newOutputFile(newMetadataFilePath); + + // write the new metadata + // use overwrite to avoid negative caching in S3. this is safe because the metadata location is + // always unique because it includes a UUID. + IndexMetadataParser.overwrite(metadata, newMetadataLocation); + + return newMetadataLocation.location(); + } + + protected String writeNewMetadataIfRequired(IndexMetadata metadata) { + return null != metadata.metadataFileLocation() + ? metadata.metadataFileLocation() + : writeNewMetadata(metadata, version + 1); + } + + private String newMetadataFilePath(IndexMetadata metadata, int newVersion) { + String codecName = + metadata.currentVersion().properties() != null + ? metadata + .currentVersion() + .properties() + .getOrDefault( + IndexProperties.METADATA_COMPRESSION, + IndexProperties.METADATA_COMPRESSION_DEFAULT) + : IndexProperties.METADATA_COMPRESSION_DEFAULT; + String fileExtension = TableMetadataParser.getFileExtension(codecName); + return metadataFileLocation( + metadata, + String.format(Locale.ROOT, "%05d-%s%s", newVersion, UUID.randomUUID(), fileExtension)); + } + + private String metadataFileLocation(IndexMetadata metadata, String filename) { + String metadataLocation = + metadata.currentVersion().properties() != null + ? metadata.currentVersion().properties().get(IndexProperties.WRITE_METADATA_LOCATION) + : null; + + if (metadataLocation != null) { + return String.format("%s/%s", LocationUtil.stripTrailingSlash(metadataLocation), filename); + } else { + return String.format( + "%s/%s/%s", + LocationUtil.stripTrailingSlash(metadata.location()), METADATA_FOLDER_NAME, filename); + } + } + + protected void refreshFromMetadataLocation(String newLocation) { + refreshFromMetadataLocation(newLocation, null, 20); + } + + protected void refreshFromMetadataLocation( + String newLocation, Predicate shouldRetry, int numRetries) { + refreshFromMetadataLocation( + newLocation, + shouldRetry, + numRetries, + metadataLocation -> IndexMetadataParser.read(io(), metadataLocation)); + } + + protected void refreshFromMetadataLocation( + String newLocation, + Predicate shouldRetry, + int numRetries, + Function metadataLoader) { + if (!Objects.equal(currentMetadataLocation, newLocation)) { + LOG.info("Refreshing index metadata from new version: {}", newLocation); + + AtomicReference newMetadata = new AtomicReference<>(); + Tasks.foreach(newLocation) + .retry(numRetries) + .exponentialBackoff(100, 5000, 600000, 4.0 /* 100, 400, 1600, ... */) + .throwFailureWhenFinished() + .stopRetryOn(NotFoundException.class) // overridden if shouldRetry is non-null + .shouldRetryTest(shouldRetry) + .run(metadataLocation -> newMetadata.set(metadataLoader.apply(metadataLocation))); + + this.currentMetadata = newMetadata.get(); + this.currentMetadataLocation = newLocation; + this.version = parseVersion(newLocation); + } + + this.shouldRefresh = false; + } + + /** + * Parse the version from index metadata file name. + * + * @param metadataLocation index metadata file location + * @return version of the index metadata file in success case and -1 if the version is not + * parsable (as a sign that the metadata is not part of this catalog) + */ + private static int parseVersion(String metadataLocation) { + int versionStart = metadataLocation.lastIndexOf('/') + 1; // if '/' isn't found, this will be 0 + int versionEnd = metadataLocation.indexOf('-', versionStart); + if (versionEnd < 0) { + // found filesystem index's metadata + return -1; + } + + try { + return Integer.parseInt(metadataLocation.substring(versionStart, versionEnd)); + } catch (NumberFormatException e) { + LOG.warn("Unable to parse version from metadata location: {}", metadataLocation, e); + return -1; + } + } +} diff --git a/core/src/main/java/org/apache/iceberg/index/BaseIndexSnapshot.java b/core/src/main/java/org/apache/iceberg/index/BaseIndexSnapshot.java new file mode 100644 index 000000000000..1529512c8f5e --- /dev/null +++ b/core/src/main/java/org/apache/iceberg/index/BaseIndexSnapshot.java @@ -0,0 +1,42 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.index; + +import java.util.Map; +import javax.annotation.Nullable; +import org.immutables.value.Value; + +/** + * Index snapshot linking an index snapshot to a specific table snapshot. + * + *

Index data is versioned using snapshots, similar to table data. Each index snapshot is derived + * from a specific table snapshot, ensuring consistency. + */ +@Value.Immutable +@SuppressWarnings("ImmutablesStyle") +@Value.Style( + typeImmutable = "ImmutableIndexSnapshot", + visibilityString = "PUBLIC", + builderVisibilityString = "PUBLIC") +interface BaseIndexSnapshot extends IndexSnapshot { + + @Override + @Nullable + Map properties(); +} diff --git a/core/src/main/java/org/apache/iceberg/index/BaseIndexVersion.java b/core/src/main/java/org/apache/iceberg/index/BaseIndexVersion.java new file mode 100644 index 000000000000..8c2dbc483bda --- /dev/null +++ b/core/src/main/java/org/apache/iceberg/index/BaseIndexVersion.java @@ -0,0 +1,43 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.index; + +import java.util.Map; +import javax.annotation.Nullable; +import org.immutables.value.Value; + +/** + * A version of the index at a point in time. + * + *

A version consists of index metadata and user-supplied properties. + * + *

Versions are created by index operations, like Create and Alter. + */ +@Value.Immutable +@SuppressWarnings("ImmutablesStyle") +@Value.Style( + typeImmutable = "ImmutableIndexVersion", + visibilityString = "PUBLIC", + builderVisibilityString = "PUBLIC") +interface BaseIndexVersion extends IndexVersion { + + @Override + @Nullable + Map properties(); +} diff --git a/core/src/main/java/org/apache/iceberg/index/IndexHistoryEntryParser.java b/core/src/main/java/org/apache/iceberg/index/IndexHistoryEntryParser.java new file mode 100644 index 000000000000..db468ab66148 --- /dev/null +++ b/core/src/main/java/org/apache/iceberg/index/IndexHistoryEntryParser.java @@ -0,0 +1,60 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.index; + +import com.fasterxml.jackson.core.JsonGenerator; +import com.fasterxml.jackson.databind.JsonNode; +import java.io.IOException; +import org.apache.iceberg.relocated.com.google.common.base.Preconditions; +import org.apache.iceberg.util.JsonUtil; + +class IndexHistoryEntryParser { + + private static final String VERSION_ID = "version-id"; + private static final String TIMESTAMP_MS = "timestamp-ms"; + + private IndexHistoryEntryParser() {} + + static String toJson(IndexHistoryEntry entry) { + return JsonUtil.generate(gen -> toJson(entry, gen), false); + } + + static void toJson(IndexHistoryEntry entry, JsonGenerator generator) throws IOException { + Preconditions.checkArgument(entry != null, "Invalid index history entry: null"); + generator.writeStartObject(); + generator.writeNumberField(TIMESTAMP_MS, entry.timestampMillis()); + generator.writeNumberField(VERSION_ID, entry.versionId()); + generator.writeEndObject(); + } + + static IndexHistoryEntry fromJson(String json) { + return JsonUtil.parse(json, IndexHistoryEntryParser::fromJson); + } + + static IndexHistoryEntry fromJson(JsonNode node) { + Preconditions.checkArgument(node != null, "Cannot parse index history entry from null object"); + Preconditions.checkArgument( + node.isObject(), "Cannot parse index history entry from non-object: %s", node); + + return ImmutableIndexHistoryEntry.builder() + .versionId(JsonUtil.getInt(VERSION_ID, node)) + .timestampMillis(JsonUtil.getLong(TIMESTAMP_MS, node)) + .build(); + } +} diff --git a/core/src/main/java/org/apache/iceberg/index/IndexMetadata.java b/core/src/main/java/org/apache/iceberg/index/IndexMetadata.java new file mode 100644 index 000000000000..0b79213fa93f --- /dev/null +++ b/core/src/main/java/org/apache/iceberg/index/IndexMetadata.java @@ -0,0 +1,663 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.index; + +import java.io.Serializable; +import java.util.Comparator; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.Set; +import java.util.UUID; +import java.util.stream.Collectors; +import java.util.stream.Stream; +import javax.annotation.Nullable; +import org.apache.iceberg.relocated.com.google.common.annotations.VisibleForTesting; +import org.apache.iceberg.relocated.com.google.common.base.Preconditions; +import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList; +import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; +import org.apache.iceberg.relocated.com.google.common.collect.ImmutableSet; +import org.apache.iceberg.relocated.com.google.common.collect.Lists; +import org.apache.iceberg.relocated.com.google.common.collect.Maps; +import org.apache.iceberg.util.PropertyUtil; +import org.immutables.value.Value; + +/** + * Metadata for an index. + * + *

Index metadata is stored in metadata files and contains the full state of an index at a point + * in time. The actual index data is stored separately, typically in the index location, and the + * metadata contains pointers to these data files. Engines can use the index type to understand + * where the data is stored and in what format. + */ +@SuppressWarnings("ImmutablesStyle") +@Value.Immutable(builder = false) +@Value.Style(allParameters = true, visibilityString = "PACKAGE") +public interface IndexMetadata extends Serializable { + + int SUPPORTED_INDEX_FORMAT_VERSION = 1; + int DEFAULT_INDEX_FORMAT_VERSION = 1; + + /** + * Return the UUID that identifies this index. + * + *

Generated when the index is created. Implementations must throw an exception if an index's + * UUID does not match the expected UUID after refreshing metadata. + * + * @return the index UUID as a string + */ + String uuid(); + + /** + * Return the UUID of the table that this index is associated with. + * + *

Set when the index is created and must not be changed afterward. + * + * @return the table UUID as a string + */ + String tableUuid(); + + /** + * Return the format version for this index. + * + *

An integer version number for the index metadata format; format-version is 1 for current + * version of spec. + * + * @return the format version + */ + int formatVersion(); + + /** + * Return the type of this index. + * + *

One of the supported index-types. For example: BTREE, TERM, IVF. Must be supplied during the + * creation of an index and must not be changed. + * + * @return the index type + */ + IndexType type(); + + /** + * Return the column IDs contained by this index. + * + *

The ids of the columns which are stored losslessly in the index instance. Must be supplied + * during the creation of an index and must not be changed. + * + * @return a list of column IDs + */ + List indexColumnIds(); + + /** + * Return the column IDs that this index is optimized for. + * + *

The ids of the columns that the index is designed to optimize for retrieval. Must be + * supplied during the creation of an index and must not be changed. + * + * @return a list of column IDs + */ + List optimizedColumnIds(); + + /** + * Return the index's base location. + * + *

Used to create index file locations. + * + * @return the index location + */ + String location(); + + /** + * Return the ID of the current version of this index. + * + * @return the current version ID + */ + int currentVersionId(); + + /** + * Return the list of known versions of this index. + * + *

The number of versions retained is implementation-specific. current-version-id must be + * present in this list. + * + * @return a list of index versions + */ + List versions(); + + /** + * Return the version history of this index. + * + *

A list of version log entries with the timestamp and version-id for every change to + * current-version-id. The number of entries retained is implementation-specific. + * current-version-id may or may not be present in this list. + * + * @return a list of index history entries + */ + List history(); + + /** + * Return the snapshots of this index. + * + *

During index maintenance a new index snapshot is generated for the specific Table snapshot, + * and it is added to the snapshots list. + * + * @return a list of index snapshots + */ + List snapshots(); + + /** + * Return the list of metadata changes for this index. + * + * @return a list of index updates + */ + List changes(); + + /** + * Return the metadata file location for this index metadata. + * + * @return the metadata file location, or null if not set + */ + @Nullable + String metadataFileLocation(); + + /** + * Return the current version of this index. + * + * @return the current index version + */ + default IndexVersion currentVersion() { + Preconditions.checkArgument( + versionsById().containsKey(currentVersionId()), + "Cannot find current version %s in index versions: %s", + currentVersionId(), + versionsById().keySet()); + + return versionsById().get(currentVersionId()); + } + + /** + * Return a version by ID. + * + * @param versionId the version ID + * @return the version, or null if not found + */ + default IndexVersion version(int versionId) { + return versionsById().get(versionId); + } + + /** + * Return a map of version ID to version. + * + * @return a map of versions by ID + */ + @Value.Derived + default Map versionsById() { + ImmutableMap.Builder builder = ImmutableMap.builder(); + for (IndexVersion version : versions()) { + builder.put(version.versionId(), version); + } + + return builder.build(); + } + + /** + * Return a map of index snapshot ID to snapshot. + * + * @return a map of snapshots by index snapshot ID + */ + @Value.Derived + default Map snapshotsById() { + ImmutableMap.Builder builder = ImmutableMap.builder(); + for (IndexSnapshot snapshot : snapshots()) { + builder.put(snapshot.indexSnapshotId(), snapshot); + } + + return builder.build(); + } + + /** + * Return a map of table snapshot ID to index snapshot. + * + * @return a map of snapshots by table snapshot ID + */ + @Value.Derived + default Map snapshotsByTableSnapshotId() { + ImmutableMap.Builder builder = ImmutableMap.builder(); + for (IndexSnapshot snapshot : snapshots()) { + builder.put(snapshot.tableSnapshotId(), snapshot); + } + + return builder.build(); + } + + /** + * Return a snapshot by index snapshot ID. + * + * @param indexSnapshotId the index snapshot ID + * @return the snapshot, or null if not found + */ + default IndexSnapshot snapshot(long indexSnapshotId) { + return snapshotsById().get(indexSnapshotId); + } + + /** + * Return a snapshot by table snapshot ID. + * + * @param tableSnapshotId the table snapshot ID + * @return the snapshot, or null if no index snapshot exists for the table snapshot + */ + default IndexSnapshot snapshotForTableSnapshot(long tableSnapshotId) { + return snapshotsByTableSnapshotId().get(tableSnapshotId); + } + + @Value.Check + default void check() { + Preconditions.checkArgument( + formatVersion() > 0 && formatVersion() <= IndexMetadata.SUPPORTED_INDEX_FORMAT_VERSION, + "Unsupported format version: %s", + formatVersion()); + } + + static Builder builder() { + return new Builder(); + } + + static Builder buildFrom(IndexMetadata base) { + return new Builder(base); + } + + /** Builder for IndexMetadata. */ + class Builder { + private final List versions; + private final List history; + private final List snapshots; + private final List changes; + private int formatVersion = DEFAULT_INDEX_FORMAT_VERSION; + private int currentVersionId = 0; + private String location; + private String uuid; + private String tableUuid; + private String metadataLocation; + private IndexType type; + private List indexColumnIds; + private List optimizedColumnIds; + + // internal change tracking + private final Map newVersionsByUserVersionId = Maps.newHashMap(); + private final List newVersions = Lists.newArrayList(); + private final Map newSnapshotsByTableSnapshotId = Maps.newHashMap(); + private IndexHistoryEntry historyEntry = null; + + // indexes + private final Map versionsById; + private final Map snapshotsByTableSnapshotId; + + private Builder() { + this.versions = Lists.newArrayList(); + this.versionsById = Maps.newHashMap(); + this.history = Lists.newArrayList(); + this.snapshots = Lists.newArrayList(); + this.snapshotsByTableSnapshotId = Maps.newHashMap(); + this.changes = Lists.newArrayList(); + this.indexColumnIds = ImmutableList.of(); + this.optimizedColumnIds = ImmutableList.of(); + this.uuid = UUID.randomUUID().toString(); + } + + private Builder(IndexMetadata base) { + this.versions = Lists.newArrayList(base.versions()); + this.versionsById = Maps.newHashMap(base.versionsById()); + this.history = Lists.newArrayList(base.history()); + this.snapshots = Lists.newArrayList(base.snapshots()); + this.snapshotsByTableSnapshotId = Maps.newHashMap(base.snapshotsByTableSnapshotId()); + this.changes = Lists.newArrayList(); + this.formatVersion = base.formatVersion(); + this.currentVersionId = base.currentVersionId(); + this.location = base.location(); + this.uuid = base.uuid(); + this.tableUuid = base.tableUuid(); + this.type = base.type(); + this.indexColumnIds = ImmutableList.copyOf(base.indexColumnIds()); + this.optimizedColumnIds = ImmutableList.copyOf(base.optimizedColumnIds()); + this.metadataLocation = null; + } + + public Builder upgradeFormatVersion(int newFormatVersion) { + Preconditions.checkArgument( + newFormatVersion >= formatVersion, + "Cannot downgrade v%s index to v%s", + formatVersion, + newFormatVersion); + + if (formatVersion == newFormatVersion) { + return this; + } + + this.formatVersion = newFormatVersion; + changes.add(new IndexUpdate.UpgradeFormatVersion(newFormatVersion)); + return this; + } + + public Builder setLocation(String newLocation) { + Preconditions.checkArgument(null != newLocation, "Invalid location: null"); + if (!newLocation.equals(location)) { + this.location = newLocation; + changes.add(new IndexUpdate.SetLocation(newLocation)); + } + + return this; + } + + public Builder setType(IndexType newType) { + Preconditions.checkArgument(null != newType, "Invalid index type: null"); + this.type = newType; + return this; + } + + public Builder setTableUuid(String newTableUuid) { + Preconditions.checkArgument(null != newTableUuid, "Invalid table UUID: null"); + this.tableUuid = newTableUuid; + return this; + } + + public Builder setIndexColumnIds(List newIndexColumnIds) { + Preconditions.checkArgument(null != newIndexColumnIds, "Invalid index column ids: null"); + this.indexColumnIds = ImmutableList.copyOf(newIndexColumnIds); + return this; + } + + public Builder setOptimizedColumnIds(List newOptimizedColumnIds) { + Preconditions.checkArgument( + null != newOptimizedColumnIds, "Invalid optimized column ids: null"); + this.optimizedColumnIds = ImmutableList.copyOf(newOptimizedColumnIds); + return this; + } + + public Builder addVersion(IndexVersion version) { + Preconditions.checkArgument(version != null, "Invalid index version: null"); + IndexVersion existingVersion = findSameVersion(version); + if (existingVersion == null) { + Preconditions.checkArgument( + !newVersionsByUserVersionId.containsKey(version.versionId()), + "Invalid index version id. Version %s already added to the index with different properties: %s.", + version.versionId(), + newVersionsByUserVersionId.get(version.versionId())); + + int newVersionId = findNewVersionId(); + IndexVersion newVersion; + if (newVersionId != version.versionId()) { + // We need to generate a new version id + newVersion = + ImmutableIndexVersion.builder().from(version).versionId(newVersionId).build(); + } else { + newVersion = version; + } + + newVersionsByUserVersionId.put(version.versionId(), newVersion); + newVersions.add(newVersion); + } else if (existingVersion.versionId() != version.versionId()) { + // A version with identical properties already exists under a different ID; map the + // requested ID to the existing version to avoid duplication. + newVersionsByUserVersionId.put(version.versionId(), existingVersion); + } + + changes.add(new IndexUpdate.AddVersion(version)); + return this; + } + + public Builder setCurrentVersion(int versionId) { + IndexVersion newVersion = newVersionsByUserVersionId.get(versionId); + if (newVersion == null) { + newVersion = versionsById.get(versionId); + } + + Preconditions.checkArgument( + newVersion != null, "Cannot set current version to unknown version: %s", versionId); + + if (currentVersionId != newVersion.versionId()) { + this.currentVersionId = newVersion.versionId(); + this.historyEntry = + ImmutableIndexHistoryEntry.builder() + .versionId(newVersion.versionId()) + .timestampMillis( + versionsById.containsKey(newVersion.versionId()) + ? System.currentTimeMillis() + : newVersion.timestampMillis()) + .build(); + } + + changes.add(new IndexUpdate.SetCurrentVersion(versionId)); + return this; + } + + public Builder addSnapshot(IndexSnapshot snapshot) { + Preconditions.checkArgument( + !snapshotsByTableSnapshotId.containsKey(snapshot.tableSnapshotId()) + && !newSnapshotsByTableSnapshotId.containsKey(snapshot.tableSnapshotId()), + "Invalid table snapshot id. Snapshot for table snapshot %s already added to the index.", + snapshot.tableSnapshotId()); + Preconditions.checkArgument( + versionsById.containsKey(snapshot.versionId()) + || newVersionsByUserVersionId.containsKey(snapshot.versionId()), + "Invalid index version id. Cannot add snapshot with unknown version id: %s", + snapshot.versionId()); + + IndexSnapshot newSnapshot = snapshot; + IndexVersion newVersion = newVersionsByUserVersionId.get(snapshot.versionId()); + if (newVersion != null && newVersion.versionId() != snapshot.versionId()) { + newSnapshot = + ImmutableIndexSnapshot.builder() + .from(snapshot) + .versionId(newVersion.versionId()) + .build(); + } + + newSnapshotsByTableSnapshotId.put(newSnapshot.tableSnapshotId(), newSnapshot); + + changes.add(new IndexUpdate.AddSnapshot(snapshot)); + return this; + } + + public Builder removeSnapshots(Set indexSnapshotIdsToRemove) { + Preconditions.checkArgument( + indexSnapshotIdsToRemove != null && !indexSnapshotIdsToRemove.isEmpty(), + "Invalid snapshot id set to remove: %s", + snapshots); + + snapshots.removeIf(snapshot -> indexSnapshotIdsToRemove.contains(snapshot.indexSnapshotId())); + // Rebuild snapshotsByTableSnapshotId from the remaining snapshots + snapshotsByTableSnapshotId.clear(); + for (IndexSnapshot snapshot : snapshots) { + snapshotsByTableSnapshotId.put(snapshot.tableSnapshotId(), snapshot); + } + + changes.add(new IndexUpdate.RemoveSnapshots(indexSnapshotIdsToRemove)); + return this; + } + + public IndexMetadata build() { + Preconditions.checkArgument(null != location, "Invalid location: null"); + Preconditions.checkArgument(null != tableUuid, "Invalid table uuid: null"); + Preconditions.checkArgument( + !versions.isEmpty() || !newVersions.isEmpty(), "Invalid index: no versions were added"); + Preconditions.checkArgument(null != type, "Invalid index type: null"); + Preconditions.checkArgument(!indexColumnIds.isEmpty(), "Index column IDs cannot be empty"); + Preconditions.checkArgument( + !optimizedColumnIds.isEmpty(), "Optimized column IDs cannot be empty"); + + // when associated with a metadata file, metadata must have no changes so that the metadata + // matches exactly what is in the metadata file, which does not store changes. metadata + // location with changes is inconsistent. + Preconditions.checkArgument( + metadataLocation == null || changes.isEmpty(), + "Cannot create index metadata with a metadata location and changes"); + + versions.addAll(newVersions); + newVersions.forEach(version -> versionsById.put(version.versionId(), version)); + snapshots.addAll(newSnapshotsByTableSnapshotId.values()); + + if (null != historyEntry) { + history.add(historyEntry); + } + + Map indexProperties = versionsById.get(currentVersionId).properties(); + int historySize = + PropertyUtil.propertyAsInt( + indexProperties == null ? ImmutableMap.of() : indexProperties, + IndexProperties.VERSION_HISTORY_SIZE, + IndexProperties.VERSION_HISTORY_SIZE_DEFAULT); + + Preconditions.checkArgument( + historySize > 0, + "%s must be positive but was %s", + IndexProperties.VERSION_HISTORY_SIZE, + historySize); + + // expire old versions, but keep at least the versions added in this builder and the current + // version + int numVersions = + ImmutableSet.builder() + .addAll( + changes(IndexUpdate.AddVersion.class) + .map(v -> v.indexVersion().versionId()) + .collect(Collectors.toSet())) + .add(currentVersionId) + .build() + .size(); + int numVersionsToKeep = Math.max(numVersions, historySize); + + List retainedVersions; + List retainedHistory; + if (versions.size() > numVersionsToKeep) { + retainedVersions = + expireVersions(versionsById, numVersionsToKeep, versionsById.get(currentVersionId)); + Set retainedVersionIds = + retainedVersions.stream().map(IndexVersion::versionId).collect(Collectors.toSet()); + retainedHistory = updateHistory(history, retainedVersionIds); + } else { + retainedVersions = versions; + retainedHistory = history; + } + + return ImmutableIndexMetadata.of( + null == uuid ? UUID.randomUUID().toString() : uuid, + tableUuid, + formatVersion, + type, + indexColumnIds, + optimizedColumnIds, + location, + currentVersionId, + retainedVersions, + retainedHistory, + snapshots, + changes, + metadataLocation); + } + + /** + * Checks whether the given view versions would behave the same while ignoring the view version + * id, the creation timestamp, and the operation. + * + * @param one the view version to compare + * @param two the view version to compare + * @return true if the given view versions would behave the same + */ + private boolean sameIndexVersion(IndexVersion one, IndexVersion two) { + return Objects.equals(one.properties(), two.properties()) + || (one.properties() == null && two.properties().isEmpty()) + || (two.properties() == null && one.properties().isEmpty()); + } + + private IndexVersion findSameVersion(IndexVersion indexVersion) { + return versions.stream() + .filter(version -> sameIndexVersion(version, indexVersion)) + .findAny() + .orElseGet( + () -> + newVersionsByUserVersionId.values().stream() + .filter(version -> sameIndexVersion(version, indexVersion)) + .findAny() + .orElse(null)); + } + + private int findNewVersionId() { + int newVersionId = 1; + for (IndexVersion v : versions) { + if (v.versionId() >= newVersionId) { + newVersionId = v.versionId() + 1; + } + } + + for (IndexVersion v : newVersionsByUserVersionId.values()) { + if (v.versionId() >= newVersionId) { + newVersionId = v.versionId() + 1; + } + } + + return newVersionId; + } + + @VisibleForTesting + static List expireVersions( + Map versionsById, + int numVersionsToKeep, + IndexVersion currentVersion) { + // version ids are assigned sequentially. keep the latest versions by ID. + List ids = Lists.newArrayList(versionsById.keySet()); + ids.sort(Comparator.reverseOrder()); + + List retainedVersions = Lists.newArrayList(); + // always retain the current version + retainedVersions.add(currentVersion); + + for (int idToKeep : ids.subList(0, Math.min(numVersionsToKeep, ids.size()))) { + if (retainedVersions.size() == numVersionsToKeep) { + break; + } + + IndexVersion version = versionsById.get(idToKeep); + if (currentVersion.versionId() != version.versionId()) { + retainedVersions.add(version); + } + } + + return retainedVersions; + } + + @VisibleForTesting + static List updateHistory( + List history, Set ids) { + List retainedHistory = Lists.newArrayList(); + for (IndexHistoryEntry entry : history) { + if (ids.contains(entry.versionId())) { + retainedHistory.add(entry); + } else { + // clear history past any unknown version + retainedHistory.clear(); + } + } + + return retainedHistory; + } + + private Stream changes(Class updateClass) { + return changes.stream().filter(updateClass::isInstance).map(updateClass::cast); + } + } +} diff --git a/core/src/main/java/org/apache/iceberg/index/IndexMetadataParser.java b/core/src/main/java/org/apache/iceberg/index/IndexMetadataParser.java new file mode 100644 index 000000000000..36bf56ea50b8 --- /dev/null +++ b/core/src/main/java/org/apache/iceberg/index/IndexMetadataParser.java @@ -0,0 +1,208 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.index; + +import com.fasterxml.jackson.core.JsonGenerator; +import com.fasterxml.jackson.databind.JsonNode; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.io.OutputStreamWriter; +import java.io.UncheckedIOException; +import java.nio.charset.StandardCharsets; +import java.util.List; +import java.util.zip.GZIPInputStream; +import java.util.zip.GZIPOutputStream; +import org.apache.iceberg.TableMetadataParser.Codec; +import org.apache.iceberg.io.FileIO; +import org.apache.iceberg.io.InputFile; +import org.apache.iceberg.io.OutputFile; +import org.apache.iceberg.relocated.com.google.common.base.Preconditions; +import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList; +import org.apache.iceberg.relocated.com.google.common.collect.Lists; +import org.apache.iceberg.util.JsonUtil; + +public class IndexMetadataParser { + + static final String INDEX_UUID = "index-uuid"; + static final String TABLE_UUID = "table-uuid"; + static final String FORMAT_VERSION = "format-version"; + static final String INDEX_TYPE = "index-type"; + static final String INDEX_COLUMN_IDS = "index-column-ids"; + static final String OPTIMIZED_COLUMN_IDS = "optimized-column-ids"; + static final String LOCATION = "location"; + static final String CURRENT_VERSION_ID = "current-version-id"; + static final String VERSIONS = "versions"; + static final String VERSION_LOG = "version-log"; + static final String SNAPSHOTS = "snapshots"; + + private IndexMetadataParser() {} + + public static String toJson(IndexMetadata metadata) { + return toJson(metadata, false); + } + + public static String toJson(IndexMetadata metadata, boolean pretty) { + return JsonUtil.generate(gen -> toJson(metadata, gen), pretty); + } + + public static void toJson(IndexMetadata metadata, JsonGenerator gen) throws IOException { + Preconditions.checkArgument(null != metadata, "Invalid index metadata: null"); + + gen.writeStartObject(); + + gen.writeStringField(INDEX_UUID, metadata.uuid()); + gen.writeStringField(TABLE_UUID, metadata.tableUuid()); + gen.writeNumberField(FORMAT_VERSION, metadata.formatVersion()); + gen.writeStringField(INDEX_TYPE, metadata.type().typeName()); + JsonUtil.writeIntegerArray(INDEX_COLUMN_IDS, metadata.indexColumnIds(), gen); + JsonUtil.writeIntegerArray(OPTIMIZED_COLUMN_IDS, metadata.optimizedColumnIds(), gen); + gen.writeStringField(LOCATION, metadata.location()); + + gen.writeNumberField(CURRENT_VERSION_ID, metadata.currentVersionId()); + + gen.writeArrayFieldStart(VERSIONS); + for (IndexVersion version : metadata.versions()) { + IndexVersionParser.toJson(version, gen); + } + gen.writeEndArray(); + + gen.writeArrayFieldStart(VERSION_LOG); + for (IndexHistoryEntry historyEntry : metadata.history()) { + IndexHistoryEntryParser.toJson(historyEntry, gen); + } + gen.writeEndArray(); + + gen.writeArrayFieldStart(SNAPSHOTS); + for (IndexSnapshot snapshot : metadata.snapshots()) { + IndexSnapshotParser.toJson(snapshot, gen); + } + gen.writeEndArray(); + + gen.writeEndObject(); + } + + public static IndexMetadata fromJson(String metadataLocation, String json) { + return JsonUtil.parse(json, node -> IndexMetadataParser.fromJson(metadataLocation, node)); + } + + public static IndexMetadata fromJson(String json) { + Preconditions.checkArgument(json != null, "Cannot parse index metadata from null string"); + return JsonUtil.parse(json, IndexMetadataParser::fromJson); + } + + public static IndexMetadata fromJson(JsonNode json) { + return fromJson(null, json); + } + + public static IndexMetadata fromJson(String metadataLocation, JsonNode json) { + Preconditions.checkArgument(json != null, "Cannot parse index metadata from null object"); + Preconditions.checkArgument( + json.isObject(), "Cannot parse index metadata from non-object: %s", json); + + String uuid = JsonUtil.getString(INDEX_UUID, json); + String tableUuid = JsonUtil.getString(TABLE_UUID, json); + int formatVersion = JsonUtil.getInt(FORMAT_VERSION, json); + IndexType type = IndexType.fromString(JsonUtil.getString(INDEX_TYPE, json)); + List indexColumnIds = JsonUtil.getIntegerList(INDEX_COLUMN_IDS, json); + List optimizedColumnIds = JsonUtil.getIntegerList(OPTIMIZED_COLUMN_IDS, json); + String location = JsonUtil.getString(LOCATION, json); + + int currentVersionId = JsonUtil.getInt(CURRENT_VERSION_ID, json); + + JsonNode versionsNode = JsonUtil.get(VERSIONS, json); + Preconditions.checkArgument( + versionsNode.isArray(), "Cannot parse versions from non-array: %s", versionsNode); + List versions = Lists.newArrayListWithExpectedSize(versionsNode.size()); + for (JsonNode versionNode : versionsNode) { + versions.add(IndexVersionParser.fromJson(versionNode)); + } + + JsonNode versionLogNode = JsonUtil.get(VERSION_LOG, json); + Preconditions.checkArgument( + versionLogNode.isArray(), "Cannot parse version-log from non-array: %s", versionLogNode); + List historyEntries = + Lists.newArrayListWithExpectedSize(versionLogNode.size()); + for (JsonNode vLog : versionLogNode) { + historyEntries.add(IndexHistoryEntryParser.fromJson(vLog)); + } + + JsonNode snapshotsNode = JsonUtil.get(SNAPSHOTS, json); + Preconditions.checkArgument( + snapshotsNode.isArray(), "Cannot parse snapshots from non-array: %s", snapshotsNode); + List snapshots = Lists.newArrayListWithExpectedSize(snapshotsNode.size()); + for (JsonNode snapshotNode : snapshotsNode) { + snapshots.add(IndexSnapshotParser.fromJson(snapshotNode)); + } + + return ImmutableIndexMetadata.of( + uuid, + tableUuid, + formatVersion, + type, + indexColumnIds, + optimizedColumnIds, + location, + currentVersionId, + versions, + historyEntries, + snapshots, + ImmutableList.of(), + metadataLocation); + } + + public static void overwrite(IndexMetadata metadata, OutputFile outputFile) { + internalWrite(metadata, outputFile, true); + } + + public static void write(IndexMetadata metadata, OutputFile outputFile) { + internalWrite(metadata, outputFile, false); + } + + public static IndexMetadata read(FileIO io, String path) { + return read(io.newInputFile(path)); + } + + public static IndexMetadata read(InputFile file) { + Codec codec = Codec.fromFileName(file.location()); + try (InputStream is = + codec == Codec.GZIP ? new GZIPInputStream(file.newStream()) : file.newStream()) { + return fromJson(file.location(), JsonUtil.mapper().readValue(is, JsonNode.class)); + } catch (IOException e) { + throw new UncheckedIOException( + String.format("Failed to read json file: %s", file.location()), e); + } + } + + private static void internalWrite( + IndexMetadata metadata, OutputFile outputFile, boolean overwrite) { + boolean isGzip = Codec.fromFileName(outputFile.location()) == Codec.GZIP; + OutputStream stream = overwrite ? outputFile.createOrOverwrite() : outputFile.create(); + try (OutputStreamWriter writer = + new OutputStreamWriter( + isGzip ? new GZIPOutputStream(stream) : stream, StandardCharsets.UTF_8)) { + JsonGenerator generator = JsonUtil.factory().createGenerator(writer); + toJson(metadata, generator); + generator.flush(); + } catch (IOException e) { + throw new UncheckedIOException( + String.format("Failed to write json to file: %s", outputFile.location()), e); + } + } +} diff --git a/core/src/main/java/org/apache/iceberg/index/IndexOperations.java b/core/src/main/java/org/apache/iceberg/index/IndexOperations.java new file mode 100644 index 000000000000..17cfbc7a8e19 --- /dev/null +++ b/core/src/main/java/org/apache/iceberg/index/IndexOperations.java @@ -0,0 +1,61 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.index; + +/** SPI interface to abstract index metadata access and updates. */ +public interface IndexOperations { + + /** + * Return the currently loaded index metadata, without checking for updates. + * + * @return index metadata + */ + IndexMetadata current(); + + /** + * Return the current index metadata after checking for updates. + * + * @return index metadata + */ + IndexMetadata refresh(); + + /** + * Replace the base index metadata with a new version. + * + *

This method should implement and document atomicity guarantees. + * + *

Implementations must check that the base metadata is current to avoid overwriting updates. + * Once the atomic commit operation succeeds, implementations must not perform any operations that + * may fail because failure in this method cannot be distinguished from commit failure. + * + *

Implementations should throw a {@link + * org.apache.iceberg.exceptions.CommitStateUnknownException} in cases where it cannot be + * determined if the commit succeeded or failed. For example if a network partition causes the + * confirmation of the commit to be lost, the implementation should throw a + * CommitStateUnknownException. An unknown state indicates to downstream users of this API that it + * is not safe to perform clean up and remove any files. In general, strict metadata cleanup will + * only trigger cleanups when the commit fails with an exception implementing the marker interface + * {@link org.apache.iceberg.exceptions.CleanableFailure}. All other exceptions will be treated as + * if the commit has failed. + * + * @param base index metadata on which changes were based + * @param metadata new index metadata with updates + */ + void commit(IndexMetadata base, IndexMetadata metadata); +} diff --git a/core/src/main/java/org/apache/iceberg/index/IndexProperties.java b/core/src/main/java/org/apache/iceberg/index/IndexProperties.java new file mode 100644 index 000000000000..dbe83dd2936e --- /dev/null +++ b/core/src/main/java/org/apache/iceberg/index/IndexProperties.java @@ -0,0 +1,34 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.index; + +/** Index properties that can be set during CREATE/ALTER index or using updateProperties API. */ +public class IndexProperties { + public static final String VERSION_HISTORY_SIZE = "version.history.num-entries"; + public static final int VERSION_HISTORY_SIZE_DEFAULT = 10; + + public static final String METADATA_COMPRESSION = "write.metadata.compression-codec"; + public static final String METADATA_COMPRESSION_DEFAULT = "gzip"; + + public static final String WRITE_METADATA_LOCATION = "write.metadata.path"; + + public static final String COMMENT = "comment"; + + private IndexProperties() {} +} diff --git a/core/src/main/java/org/apache/iceberg/index/IndexRequirement.java b/core/src/main/java/org/apache/iceberg/index/IndexRequirement.java new file mode 100644 index 000000000000..ced39844949e --- /dev/null +++ b/core/src/main/java/org/apache/iceberg/index/IndexRequirement.java @@ -0,0 +1,64 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.index; + +import org.apache.iceberg.MetadataUpdate; +import org.apache.iceberg.exceptions.CommitFailedException; +import org.apache.iceberg.exceptions.ValidationException; +import org.apache.iceberg.relocated.com.google.common.base.Preconditions; + +/** Represents a requirement for a {@link MetadataUpdate} */ +public interface IndexRequirement { + default void validate(IndexMetadata base) { + throw new ValidationException( + "Cannot validate %s against an index", this.getClass().getSimpleName()); + } + + class AssertIndexDoesNotExist implements IndexRequirement { + public AssertIndexDoesNotExist() {} + + @Override + public void validate(IndexMetadata base) { + if (base != null) { + throw new CommitFailedException("Requirement failed: table already exists"); + } + } + } + + class AssertIndexUUID implements IndexRequirement { + private final String uuid; + + public AssertIndexUUID(String uuid) { + Preconditions.checkArgument(uuid != null, "Invalid required UUID: null"); + this.uuid = uuid; + } + + public String uuid() { + return uuid; + } + + @Override + public void validate(IndexMetadata base) { + if (!uuid.equalsIgnoreCase(base.uuid())) { + throw new CommitFailedException( + "Requirement failed: UUID does not match: expected %s != %s", base.uuid(), uuid); + } + } + } +} diff --git a/core/src/main/java/org/apache/iceberg/index/IndexRequirementParser.java b/core/src/main/java/org/apache/iceberg/index/IndexRequirementParser.java new file mode 100644 index 000000000000..f2b4caefeda3 --- /dev/null +++ b/core/src/main/java/org/apache/iceberg/index/IndexRequirementParser.java @@ -0,0 +1,126 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.index; + +import com.fasterxml.jackson.core.JsonGenerator; +import com.fasterxml.jackson.databind.JsonNode; +import java.io.IOException; +import java.util.Locale; +import java.util.Map; +import org.apache.iceberg.relocated.com.google.common.base.Preconditions; +import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; +import org.apache.iceberg.util.JsonUtil; + +public class IndexRequirementParser { + + private IndexRequirementParser() {} + + private static final String TYPE = "type"; + + // assertion types + static final String ASSERT_INDEX_UUID = "assert-index-uuid"; + static final String ASSERT_INDEX_DOES_NOT_EXIST = "assert-create"; + + // AssertIndexUUID + private static final String UUID = "uuid"; + + private static final Map, String> TYPES = + ImmutableMap., String>builder() + .put(IndexRequirement.AssertIndexUUID.class, ASSERT_INDEX_UUID) + .put(IndexRequirement.AssertIndexDoesNotExist.class, ASSERT_INDEX_DOES_NOT_EXIST) + .buildOrThrow(); + + public static String toJson(IndexRequirement updateRequirement) { + return toJson(updateRequirement, false); + } + + public static String toJson(IndexRequirement indexRequirement, boolean pretty) { + return JsonUtil.generate(gen -> toJson(indexRequirement, gen), pretty); + } + + public static void toJson(IndexRequirement indexRequirement, JsonGenerator generator) + throws IOException { + String requirementType = TYPES.get(indexRequirement.getClass()); + + generator.writeStartObject(); + generator.writeStringField(TYPE, requirementType); + + switch (requirementType) { + case ASSERT_INDEX_DOES_NOT_EXIST: + // No fields beyond the requirement itself + break; + case ASSERT_INDEX_UUID: + writeAssertIndexUUID((IndexRequirement.AssertIndexUUID) indexRequirement, generator); + break; + default: + throw new IllegalArgumentException( + String.format( + "Cannot convert update requirement to json. Unrecognized type: %s", + requirementType)); + } + + generator.writeEndObject(); + } + + /** + * Read MetadataUpdate from a JSON string. + * + * @param json a JSON string of a MetadataUpdate + * @return a MetadataUpdate object + */ + public static IndexRequirement fromJson(String json) { + return JsonUtil.parse(json, IndexRequirementParser::fromJson); + } + + public static IndexRequirement fromJson(JsonNode jsonNode) { + Preconditions.checkArgument( + jsonNode != null && jsonNode.isObject(), + "Cannot parse update requirement from non-object value: %s", + jsonNode); + Preconditions.checkArgument( + jsonNode.hasNonNull(TYPE), "Cannot parse update requirement. Missing field: type"); + String type = JsonUtil.getString(TYPE, jsonNode).toLowerCase(Locale.ROOT); + + switch (type) { + case ASSERT_INDEX_DOES_NOT_EXIST: + return readAssertIndexDoesNotExist(jsonNode); + case ASSERT_INDEX_UUID: + return readAssertIndexUUID(jsonNode); + default: + throw new UnsupportedOperationException( + String.format("Unrecognized update requirement. Cannot convert to json: %s", type)); + } + } + + private static void writeAssertIndexUUID( + IndexRequirement.AssertIndexUUID requirement, JsonGenerator gen) throws IOException { + gen.writeStringField(UUID, requirement.uuid()); + } + + @SuppressWarnings( + "unused") // Keep same signature in case this requirement class evolves and gets fields + private static IndexRequirement readAssertIndexDoesNotExist(JsonNode node) { + return new IndexRequirement.AssertIndexDoesNotExist(); + } + + private static IndexRequirement readAssertIndexUUID(JsonNode node) { + String uuid = JsonUtil.getString(UUID, node); + return new IndexRequirement.AssertIndexUUID(uuid); + } +} diff --git a/core/src/main/java/org/apache/iceberg/index/IndexRequirements.java b/core/src/main/java/org/apache/iceberg/index/IndexRequirements.java new file mode 100644 index 000000000000..1e7c02b3cc17 --- /dev/null +++ b/core/src/main/java/org/apache/iceberg/index/IndexRequirements.java @@ -0,0 +1,69 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.index; + +import java.util.List; +import org.apache.iceberg.relocated.com.google.common.base.Preconditions; +import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList; + +public class IndexRequirements { + + private IndexRequirements() {} + + public static List forCreateIndex(List metadataUpdates) { + Preconditions.checkArgument(null != metadataUpdates, "Invalid metadata updates: null"); + Builder builder = new Builder(); + builder.require(new IndexRequirement.AssertIndexDoesNotExist()); + metadataUpdates.forEach(builder::update); + return builder.build(); + } + + public static List forReplaceIndex( + IndexMetadata base, List metadataUpdates) { + Preconditions.checkArgument(null != base, "Invalid index metadata: null"); + Preconditions.checkArgument(null != metadataUpdates, "Invalid metadata updates: null"); + Builder builder = new Builder(); + builder.require(new IndexRequirement.AssertIndexUUID(base.uuid())); + metadataUpdates.forEach(builder::update); + return builder.build(); + } + + private static class Builder { + private final ImmutableList.Builder requirements = ImmutableList.builder(); + + private Builder() {} + + private Builder require(IndexRequirement requirement) { + Preconditions.checkArgument(requirement != null, "Invalid requirement: null"); + requirements.add(requirement); + return this; + } + + private Builder update(IndexUpdate update) { + Preconditions.checkArgument(update != null, "Invalid update: null"); + + // No check at this point + return this; + } + + private List build() { + return requirements.build(); + } + } +} diff --git a/core/src/main/java/org/apache/iceberg/index/IndexSnapshotAdd.java b/core/src/main/java/org/apache/iceberg/index/IndexSnapshotAdd.java new file mode 100644 index 000000000000..5ce5cefa697d --- /dev/null +++ b/core/src/main/java/org/apache/iceberg/index/IndexSnapshotAdd.java @@ -0,0 +1,117 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.index; + +import static org.apache.iceberg.TableProperties.COMMIT_MAX_RETRY_WAIT_MS; +import static org.apache.iceberg.TableProperties.COMMIT_MAX_RETRY_WAIT_MS_DEFAULT; +import static org.apache.iceberg.TableProperties.COMMIT_MIN_RETRY_WAIT_MS; +import static org.apache.iceberg.TableProperties.COMMIT_MIN_RETRY_WAIT_MS_DEFAULT; +import static org.apache.iceberg.TableProperties.COMMIT_NUM_RETRIES; +import static org.apache.iceberg.TableProperties.COMMIT_NUM_RETRIES_DEFAULT; +import static org.apache.iceberg.TableProperties.COMMIT_TOTAL_RETRY_TIME_MS; +import static org.apache.iceberg.TableProperties.COMMIT_TOTAL_RETRY_TIME_MS_DEFAULT; + +import java.util.Map; +import org.apache.iceberg.exceptions.CommitFailedException; +import org.apache.iceberg.relocated.com.google.common.annotations.VisibleForTesting; +import org.apache.iceberg.relocated.com.google.common.base.Preconditions; +import org.apache.iceberg.relocated.com.google.common.collect.Maps; +import org.apache.iceberg.util.PropertyUtil; +import org.apache.iceberg.util.Tasks; + +class IndexSnapshotAdd implements AddIndexSnapshot { + private final IndexOperations ops; + private final Map snapshotProperties = Maps.newHashMap(); + private IndexMetadata base; + private Long tableSnapshotId = null; + private Long indexSnapshotId = null; + + IndexSnapshotAdd(IndexOperations ops) { + this.ops = ops; + this.base = ops.current(); + } + + @Override + public IndexSnapshot apply() { + return internalApply().snapshot(indexSnapshotId); + } + + @VisibleForTesting + IndexMetadata internalApply() { + Preconditions.checkState(null != tableSnapshotId, "Table snapshot ID must be set"); + Preconditions.checkState(null != indexSnapshotId, "Index snapshot ID must be set"); + + this.base = ops.refresh(); + + IndexSnapshot snapshot = + ImmutableIndexSnapshot.builder() + .tableSnapshotId(tableSnapshotId) + .indexSnapshotId(indexSnapshotId) + .versionId(base.currentVersionId()) + .properties(snapshotProperties.isEmpty() ? null : snapshotProperties) + .build(); + + return IndexMetadata.buildFrom(base).addSnapshot(snapshot).build(); + } + + @Override + public void commit() { + Map properties = + base.currentVersion().properties() != null + ? base.currentVersion().properties() + : Maps.newHashMap(); + Tasks.foreach(ops) + .retry( + PropertyUtil.propertyAsInt(properties, COMMIT_NUM_RETRIES, COMMIT_NUM_RETRIES_DEFAULT)) + .exponentialBackoff( + PropertyUtil.propertyAsInt( + properties, COMMIT_MIN_RETRY_WAIT_MS, COMMIT_MIN_RETRY_WAIT_MS_DEFAULT), + PropertyUtil.propertyAsInt( + properties, COMMIT_MAX_RETRY_WAIT_MS, COMMIT_MAX_RETRY_WAIT_MS_DEFAULT), + PropertyUtil.propertyAsInt( + properties, COMMIT_TOTAL_RETRY_TIME_MS, COMMIT_TOTAL_RETRY_TIME_MS_DEFAULT), + 2.0 /* exponential */) + .onlyRetryOn(CommitFailedException.class) + .run(taskOps -> taskOps.commit(base, internalApply())); + } + + @Override + public AddIndexSnapshot withTableSnapshotId(long newTableSnapshotId) { + this.tableSnapshotId = newTableSnapshotId; + return this; + } + + @Override + public AddIndexSnapshot withIndexSnapshotId(long newIndexSnapshotId) { + this.indexSnapshotId = newIndexSnapshotId; + return this; + } + + @Override + public AddIndexSnapshot withSnapshotProperties(Map properties) { + this.snapshotProperties.putAll(properties); + return this; + } + + @Override + public AddIndexSnapshot withSnapshotProperty(String key, String value) { + this.snapshotProperties.put(key, value); + return this; + } +} diff --git a/core/src/main/java/org/apache/iceberg/index/IndexSnapshotParser.java b/core/src/main/java/org/apache/iceberg/index/IndexSnapshotParser.java new file mode 100644 index 000000000000..4979ac910354 --- /dev/null +++ b/core/src/main/java/org/apache/iceberg/index/IndexSnapshotParser.java @@ -0,0 +1,78 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.index; + +import com.fasterxml.jackson.core.JsonGenerator; +import com.fasterxml.jackson.databind.JsonNode; +import java.io.IOException; +import java.util.Map; +import org.apache.iceberg.relocated.com.google.common.base.Preconditions; +import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; +import org.apache.iceberg.util.JsonUtil; + +class IndexSnapshotParser { + + private static final String TABLE_SNAPSHOT_ID = "table-snapshot-id"; + private static final String INDEX_SNAPSHOT_ID = "index-snapshot-id"; + private static final String VERSION_ID = "version-id"; + private static final String PROPERTIES = "properties"; + + private IndexSnapshotParser() {} + + static String toJson(IndexSnapshot snapshot) { + return JsonUtil.generate(gen -> toJson(snapshot, gen), false); + } + + static void toJson(IndexSnapshot snapshot, JsonGenerator generator) throws IOException { + Preconditions.checkArgument(snapshot != null, "Invalid index snapshot: null"); + generator.writeStartObject(); + generator.writeNumberField(TABLE_SNAPSHOT_ID, snapshot.tableSnapshotId()); + generator.writeNumberField(INDEX_SNAPSHOT_ID, snapshot.indexSnapshotId()); + generator.writeNumberField(VERSION_ID, snapshot.versionId()); + + if (snapshot.properties() != null && !snapshot.properties().isEmpty()) { + JsonUtil.writeStringMap(PROPERTIES, snapshot.properties(), generator); + } + + generator.writeEndObject(); + } + + static IndexSnapshot fromJson(String json) { + return JsonUtil.parse(json, IndexSnapshotParser::fromJson); + } + + static IndexSnapshot fromJson(JsonNode node) { + Preconditions.checkArgument(node != null, "Cannot parse index snapshot from null object"); + Preconditions.checkArgument( + node.isObject(), "Cannot parse index snapshot from non-object: %s", node); + + long tableSnapshotId = JsonUtil.getLong(TABLE_SNAPSHOT_ID, node); + long indexSnapshotId = JsonUtil.getLong(INDEX_SNAPSHOT_ID, node); + int versionId = JsonUtil.getInt(VERSION_ID, node); + Map properties = + node.has(PROPERTIES) ? JsonUtil.getStringMap(PROPERTIES, node) : ImmutableMap.of(); + + return ImmutableIndexSnapshot.builder() + .tableSnapshotId(tableSnapshotId) + .indexSnapshotId(indexSnapshotId) + .versionId(versionId) + .properties(properties) + .build(); + } +} diff --git a/core/src/main/java/org/apache/iceberg/index/IndexSnapshotsRemove.java b/core/src/main/java/org/apache/iceberg/index/IndexSnapshotsRemove.java new file mode 100644 index 000000000000..d2d98ca90fd4 --- /dev/null +++ b/core/src/main/java/org/apache/iceberg/index/IndexSnapshotsRemove.java @@ -0,0 +1,112 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.index; + +import static org.apache.iceberg.TableProperties.COMMIT_MAX_RETRY_WAIT_MS; +import static org.apache.iceberg.TableProperties.COMMIT_MAX_RETRY_WAIT_MS_DEFAULT; +import static org.apache.iceberg.TableProperties.COMMIT_MIN_RETRY_WAIT_MS; +import static org.apache.iceberg.TableProperties.COMMIT_MIN_RETRY_WAIT_MS_DEFAULT; +import static org.apache.iceberg.TableProperties.COMMIT_NUM_RETRIES; +import static org.apache.iceberg.TableProperties.COMMIT_NUM_RETRIES_DEFAULT; +import static org.apache.iceberg.TableProperties.COMMIT_TOTAL_RETRY_TIME_MS; +import static org.apache.iceberg.TableProperties.COMMIT_TOTAL_RETRY_TIME_MS_DEFAULT; + +import java.util.List; +import java.util.Map; +import java.util.Set; +import org.apache.iceberg.exceptions.CommitFailedException; +import org.apache.iceberg.relocated.com.google.common.annotations.VisibleForTesting; +import org.apache.iceberg.relocated.com.google.common.collect.Lists; +import org.apache.iceberg.relocated.com.google.common.collect.Maps; +import org.apache.iceberg.relocated.com.google.common.collect.Sets; +import org.apache.iceberg.util.PropertyUtil; +import org.apache.iceberg.util.Tasks; + +class IndexSnapshotsRemove implements RemoveIndexSnapshots { + private final IndexOperations ops; + private final Set snapshotIdsToRemove = Sets.newHashSet(); + private IndexMetadata base; + + IndexSnapshotsRemove(IndexOperations ops) { + this.ops = ops; + this.base = ops.current(); + } + + @Override + public List apply() { + List snapshotsToRemove = Lists.newArrayList(); + for (Long snapshotId : snapshotIdsToRemove) { + IndexSnapshot snapshot = base.snapshot(snapshotId); + if (snapshot != null) { + snapshotsToRemove.add(snapshot); + } + } + + return snapshotsToRemove; + } + + @VisibleForTesting + IndexMetadata internalApply() { + this.base = ops.refresh(); + + return IndexMetadata.buildFrom(base).removeSnapshots(snapshotIdsToRemove).build(); + } + + @Override + public void commit() { + Map properties = + base.currentVersion().properties() != null + ? base.currentVersion().properties() + : Maps.newHashMap(); + Tasks.foreach(ops) + .retry( + PropertyUtil.propertyAsInt(properties, COMMIT_NUM_RETRIES, COMMIT_NUM_RETRIES_DEFAULT)) + .exponentialBackoff( + PropertyUtil.propertyAsInt( + properties, COMMIT_MIN_RETRY_WAIT_MS, COMMIT_MIN_RETRY_WAIT_MS_DEFAULT), + PropertyUtil.propertyAsInt( + properties, COMMIT_MAX_RETRY_WAIT_MS, COMMIT_MAX_RETRY_WAIT_MS_DEFAULT), + PropertyUtil.propertyAsInt( + properties, COMMIT_TOTAL_RETRY_TIME_MS, COMMIT_TOTAL_RETRY_TIME_MS_DEFAULT), + 2.0 /* exponential */) + .onlyRetryOn(CommitFailedException.class) + .run(taskOps -> taskOps.commit(base, internalApply())); + } + + @Override + public RemoveIndexSnapshots removeSnapshotById(long indexSnapshotId) { + snapshotIdsToRemove.add(indexSnapshotId); + return this; + } + + @Override + public RemoveIndexSnapshots removeSnapshotsByIds(Set indexSnapshotIds) { + snapshotIdsToRemove.addAll(indexSnapshotIds); + return this; + } + + @Override + public RemoveIndexSnapshots removeSnapshotsByIds(long... indexSnapshotIds) { + for (long id : indexSnapshotIds) { + snapshotIdsToRemove.add(id); + } + + return this; + } +} diff --git a/core/src/main/java/org/apache/iceberg/index/IndexUpdate.java b/core/src/main/java/org/apache/iceberg/index/IndexUpdate.java new file mode 100644 index 000000000000..b0eacf9d64f8 --- /dev/null +++ b/core/src/main/java/org/apache/iceberg/index/IndexUpdate.java @@ -0,0 +1,140 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.index; + +import java.io.Serializable; +import java.util.Set; +import org.apache.iceberg.relocated.com.google.common.collect.ImmutableSet; + +/** Represents a change to index metadata. */ +public interface IndexUpdate extends Serializable { + void applyTo(IndexMetadata.Builder indexMetadataBuilder); + + /** Upgrades the format version of the index. */ + class UpgradeFormatVersion implements IndexUpdate { + private final int formatVersion; + + public UpgradeFormatVersion(int formatVersion) { + this.formatVersion = formatVersion; + } + + public int formatVersion() { + return formatVersion; + } + + @Override + public void applyTo(IndexMetadata.Builder indexMetadataBuilder) { + indexMetadataBuilder.upgradeFormatVersion(formatVersion); + } + } + + /** Adds a new index version to the index metadata. */ + class AddVersion implements IndexUpdate { + private final IndexVersion indexVersion; + + public AddVersion(IndexVersion indexVersion) { + this.indexVersion = indexVersion; + } + + public IndexVersion indexVersion() { + return indexVersion; + } + + @Override + public void applyTo(IndexMetadata.Builder indexMetadataBuilder) { + indexMetadataBuilder.addVersion(indexVersion); + } + } + + /** Sets the current index version ID in the index metadata. */ + class SetCurrentVersion implements IndexUpdate { + private final int versionId; + + public SetCurrentVersion(int versionId) { + this.versionId = versionId; + } + + public int versionId() { + return versionId; + } + + @Override + public void applyTo(IndexMetadata.Builder indexMetadataBuilder) { + indexMetadataBuilder.setCurrentVersion(versionId); + } + } + + /** Adds a new index snapshot to the index metadata. */ + class AddSnapshot implements IndexUpdate { + private final IndexSnapshot indexSnapshot; + + public AddSnapshot(IndexSnapshot indexSnapshot) { + this.indexSnapshot = indexSnapshot; + } + + public IndexSnapshot indexSnapshot() { + return indexSnapshot; + } + + @Override + public void applyTo(IndexMetadata.Builder indexMetadataBuilder) { + indexMetadataBuilder.addSnapshot(indexSnapshot); + } + } + + /** Removes index snapshots from the index metadata. */ + class RemoveSnapshots implements IndexUpdate { + private final Set indexSnapshotIds; + + public RemoveSnapshots(long indexSnapshotId) { + this.indexSnapshotIds = ImmutableSet.of(indexSnapshotId); + } + + public RemoveSnapshots(Set indexSnapshotIds) { + this.indexSnapshotIds = ImmutableSet.copyOf(indexSnapshotIds); + } + + public Set indexSnapshotIds() { + return indexSnapshotIds; + } + + @Override + public void applyTo(IndexMetadata.Builder indexMetadataBuilder) { + indexMetadataBuilder.removeSnapshots(indexSnapshotIds); + } + } + + /** Sets the location of the index. */ + class SetLocation implements IndexUpdate { + private final String location; + + public SetLocation(String location) { + this.location = location; + } + + public String location() { + return location; + } + + @Override + public void applyTo(IndexMetadata.Builder indexMetadataBuilder) { + indexMetadataBuilder.setLocation(location); + } + } +} diff --git a/core/src/main/java/org/apache/iceberg/index/IndexUpdateParser.java b/core/src/main/java/org/apache/iceberg/index/IndexUpdateParser.java new file mode 100644 index 000000000000..863f2c3e0a11 --- /dev/null +++ b/core/src/main/java/org/apache/iceberg/index/IndexUpdateParser.java @@ -0,0 +1,227 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.index; + +import com.fasterxml.jackson.core.JsonGenerator; +import com.fasterxml.jackson.databind.JsonNode; +import java.io.IOException; +import java.util.Locale; +import java.util.Map; +import java.util.Set; +import org.apache.iceberg.relocated.com.google.common.base.Preconditions; +import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; +import org.apache.iceberg.util.JsonUtil; + +/** Parser for {@link IndexUpdate} implementations. */ +public class IndexUpdateParser { + + private IndexUpdateParser() {} + + private static final String ACTION = "action"; + + // action types - visible for testing + static final String UPGRADE_FORMAT_VERSION = "upgrade-format-version"; + static final String ADD_SNAPSHOT = "add-snapshot"; + static final String REMOVE_SNAPSHOTS = "remove-snapshots"; + static final String ADD_VERSION = "add-version"; + static final String SET_CURRENT_VERSION = "set-current-version"; + static final String SET_LOCATION = "set-location"; + + // UpgradeFormatVersion + private static final String FORMAT_VERSION = "format-version"; + + // AddIndexSnapshot + private static final String SNAPSHOT = "snapshot"; + + // RemoveIndexSnapshots + private static final String SNAPSHOT_IDS = "snapshot-ids"; + + // AddIndexVersion + private static final String VERSION = "version"; + + // SetCurrentVersion + private static final String VERSION_ID = "version-id"; + + // SetLocation + private static final String LOCATION = "location"; + + private static final Map, String> ACTIONS = + ImmutableMap., String>builder() + .put(IndexUpdate.UpgradeFormatVersion.class, UPGRADE_FORMAT_VERSION) + .put(IndexUpdate.AddSnapshot.class, ADD_SNAPSHOT) + .put(IndexUpdate.RemoveSnapshots.class, REMOVE_SNAPSHOTS) + .put(IndexUpdate.AddVersion.class, ADD_VERSION) + .put(IndexUpdate.SetCurrentVersion.class, SET_CURRENT_VERSION) + .put(IndexUpdate.SetLocation.class, SET_LOCATION) + .buildOrThrow(); + + public static String toJson(IndexUpdate indexUpdate) { + return toJson(indexUpdate, false); + } + + public static String toJson(IndexUpdate indexUpdate, boolean pretty) { + return JsonUtil.generate(gen -> toJson(indexUpdate, gen), pretty); + } + + public static void toJson(IndexUpdate indexUpdate, JsonGenerator generator) throws IOException { + String updateAction = ACTIONS.get(indexUpdate.getClass()); + + Preconditions.checkArgument( + updateAction != null, + "Cannot convert index update to json. Unrecognized index update type: %s", + indexUpdate.getClass().getName()); + + generator.writeStartObject(); + generator.writeStringField(ACTION, updateAction); + + switch (updateAction) { + case UPGRADE_FORMAT_VERSION: + writeUpgradeIndexFormatVersion((IndexUpdate.UpgradeFormatVersion) indexUpdate, generator); + break; + case ADD_SNAPSHOT: + writeAddIndexSnapshot((IndexUpdate.AddSnapshot) indexUpdate, generator); + break; + case REMOVE_SNAPSHOTS: + writeRemoveIndexSnapshots((IndexUpdate.RemoveSnapshots) indexUpdate, generator); + break; + case ADD_VERSION: + writeAddIndexVersion((IndexUpdate.AddVersion) indexUpdate, generator); + break; + case SET_CURRENT_VERSION: + writeSetCurrentVersionId((IndexUpdate.SetCurrentVersion) indexUpdate, generator); + break; + case SET_LOCATION: + writeSetLocation((IndexUpdate.SetLocation) indexUpdate, generator); + break; + default: + throw new IllegalArgumentException( + String.format( + "Cannot convert index update to json. Unrecognized action: %s", updateAction)); + } + + generator.writeEndObject(); + } + + /** + * Read an IndexUpdate from a JSON string. + * + * @param json a JSON string of an IndexUpdate + * @return an IndexUpdate object + */ + public static IndexUpdate fromJson(String json) { + Preconditions.checkArgument(json != null, "Cannot parse index update from null string"); + return JsonUtil.parse(json, IndexUpdateParser::fromJson); + } + + public static IndexUpdate fromJson(JsonNode jsonNode) { + Preconditions.checkArgument( + jsonNode != null && jsonNode.isObject(), + "Cannot parse index update from non-object value: %s", + jsonNode); + Preconditions.checkArgument( + jsonNode.hasNonNull(ACTION), "Cannot parse index update. Missing field: action"); + String action = JsonUtil.getString(ACTION, jsonNode).toLowerCase(Locale.ROOT); + + switch (action) { + case UPGRADE_FORMAT_VERSION: + return readUpgradeIndexFormatVersion(jsonNode); + case ADD_SNAPSHOT: + return readAddIndexSnapshot(jsonNode); + case REMOVE_SNAPSHOTS: + return readRemoveIndexSnapshots(jsonNode); + case ADD_VERSION: + return readAddIndexVersion(jsonNode); + case SET_CURRENT_VERSION: + return readSetCurrentVersionId(jsonNode); + case SET_LOCATION: + return readSetLocation(jsonNode); + default: + throw new UnsupportedOperationException( + String.format("Cannot convert index update action from json: %s", action)); + } + } + + private static void writeUpgradeIndexFormatVersion( + IndexUpdate.UpgradeFormatVersion update, JsonGenerator gen) throws IOException { + gen.writeNumberField(FORMAT_VERSION, update.formatVersion()); + } + + private static void writeAddIndexSnapshot(IndexUpdate.AddSnapshot update, JsonGenerator gen) + throws IOException { + gen.writeFieldName(SNAPSHOT); + IndexSnapshotParser.toJson(update.indexSnapshot(), gen); + } + + private static void writeRemoveIndexSnapshots( + IndexUpdate.RemoveSnapshots update, JsonGenerator gen) throws IOException { + JsonUtil.writeLongArray(SNAPSHOT_IDS, update.indexSnapshotIds(), gen); + } + + private static void writeAddIndexVersion(IndexUpdate.AddVersion update, JsonGenerator gen) + throws IOException { + gen.writeFieldName(VERSION); + IndexVersionParser.toJson(update.indexVersion(), gen); + } + + private static void writeSetCurrentVersionId( + IndexUpdate.SetCurrentVersion update, JsonGenerator gen) throws IOException { + gen.writeNumberField(VERSION_ID, update.versionId()); + } + + private static void writeSetLocation(IndexUpdate.SetLocation update, JsonGenerator gen) + throws IOException { + gen.writeStringField(LOCATION, update.location()); + } + + private static IndexUpdate readAddIndexSnapshot(JsonNode node) { + JsonNode snapshotNode = JsonUtil.get(SNAPSHOT, node); + IndexSnapshot snapshot = IndexSnapshotParser.fromJson(snapshotNode); + return new IndexUpdate.AddSnapshot(snapshot); + } + + private static IndexUpdate readRemoveIndexSnapshots(JsonNode node) { + Set snapshotIds = JsonUtil.getLongSetOrNull(SNAPSHOT_IDS, node); + Preconditions.checkArgument( + snapshotIds != null, + "Invalid set of index snapshot ids to remove: must be non-null", + snapshotIds); + return new IndexUpdate.RemoveSnapshots(snapshotIds); + } + + private static IndexUpdate readAddIndexVersion(JsonNode node) { + JsonNode versionNode = JsonUtil.get(VERSION, node); + IndexVersion version = IndexVersionParser.fromJson(versionNode); + return new IndexUpdate.AddVersion(version); + } + + private static IndexUpdate readSetCurrentVersionId(JsonNode node) { + int versionId = JsonUtil.getInt(VERSION_ID, node); + return new IndexUpdate.SetCurrentVersion(versionId); + } + + private static IndexUpdate readSetLocation(JsonNode node) { + String location = JsonUtil.getString(LOCATION, node); + return new IndexUpdate.SetLocation(location); + } + + private static IndexUpdate readUpgradeIndexFormatVersion(JsonNode node) { + int formatVersion = JsonUtil.getInt(FORMAT_VERSION, node); + return new IndexUpdate.UpgradeFormatVersion(formatVersion); + } +} diff --git a/core/src/main/java/org/apache/iceberg/index/IndexVersionAdd.java b/core/src/main/java/org/apache/iceberg/index/IndexVersionAdd.java new file mode 100644 index 000000000000..0e08c0f09512 --- /dev/null +++ b/core/src/main/java/org/apache/iceberg/index/IndexVersionAdd.java @@ -0,0 +1,104 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.index; + +import static org.apache.iceberg.TableProperties.COMMIT_MAX_RETRY_WAIT_MS; +import static org.apache.iceberg.TableProperties.COMMIT_MAX_RETRY_WAIT_MS_DEFAULT; +import static org.apache.iceberg.TableProperties.COMMIT_MIN_RETRY_WAIT_MS; +import static org.apache.iceberg.TableProperties.COMMIT_MIN_RETRY_WAIT_MS_DEFAULT; +import static org.apache.iceberg.TableProperties.COMMIT_NUM_RETRIES; +import static org.apache.iceberg.TableProperties.COMMIT_NUM_RETRIES_DEFAULT; +import static org.apache.iceberg.TableProperties.COMMIT_TOTAL_RETRY_TIME_MS; +import static org.apache.iceberg.TableProperties.COMMIT_TOTAL_RETRY_TIME_MS_DEFAULT; + +import java.util.Map; +import org.apache.iceberg.exceptions.CommitFailedException; +import org.apache.iceberg.relocated.com.google.common.annotations.VisibleForTesting; +import org.apache.iceberg.relocated.com.google.common.base.Preconditions; +import org.apache.iceberg.relocated.com.google.common.collect.Maps; +import org.apache.iceberg.util.PropertyUtil; +import org.apache.iceberg.util.Tasks; + +class IndexVersionAdd implements AddIndexVersion { + private final IndexOperations ops; + private final Map properties = Maps.newHashMap(); + private IndexMetadata base; + + IndexVersionAdd(IndexOperations ops) { + this.ops = ops; + this.base = ops.current(); + } + + @Override + public IndexVersion apply() { + return internalApply().currentVersion(); + } + + @VisibleForTesting + IndexMetadata internalApply() { + this.base = ops.refresh(); + + return IndexMetadata.buildFrom(base) + .addVersion( + ImmutableIndexVersion.builder() + .timestampMillis(System.currentTimeMillis()) + .versionId(base.currentVersionId()) + .properties(properties) + .build()) + .setCurrentVersion(base.currentVersionId()) + .build(); + } + + @Override + public void commit() { + Map currentProperties = + base.currentVersion().properties() != null + ? base.currentVersion().properties() + : Maps.newHashMap(); + Tasks.foreach(ops) + .retry( + PropertyUtil.propertyAsInt( + currentProperties, COMMIT_NUM_RETRIES, COMMIT_NUM_RETRIES_DEFAULT)) + .exponentialBackoff( + PropertyUtil.propertyAsInt( + currentProperties, COMMIT_MIN_RETRY_WAIT_MS, COMMIT_MIN_RETRY_WAIT_MS_DEFAULT), + PropertyUtil.propertyAsInt( + currentProperties, COMMIT_MAX_RETRY_WAIT_MS, COMMIT_MAX_RETRY_WAIT_MS_DEFAULT), + PropertyUtil.propertyAsInt( + currentProperties, COMMIT_TOTAL_RETRY_TIME_MS, COMMIT_TOTAL_RETRY_TIME_MS_DEFAULT), + 2.0 /* exponential */) + .onlyRetryOn(CommitFailedException.class) + .run(taskOps -> taskOps.commit(base, internalApply())); + } + + @Override + public AddIndexVersion withProperties(Map newProperties) { + newProperties.forEach(this::withProperty); + return this; + } + + @Override + public AddIndexVersion withProperty(String key, String value) { + Preconditions.checkArgument(null != key, "Invalid key: null"); + Preconditions.checkArgument(null != value, "Invalid value: null"); + + properties.put(key, value); + return this; + } +} diff --git a/core/src/main/java/org/apache/iceberg/index/IndexVersionParser.java b/core/src/main/java/org/apache/iceberg/index/IndexVersionParser.java new file mode 100644 index 000000000000..ad45ca1485cd --- /dev/null +++ b/core/src/main/java/org/apache/iceberg/index/IndexVersionParser.java @@ -0,0 +1,74 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.index; + +import com.fasterxml.jackson.core.JsonGenerator; +import com.fasterxml.jackson.databind.JsonNode; +import java.io.IOException; +import java.util.Map; +import org.apache.iceberg.relocated.com.google.common.base.Preconditions; +import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; +import org.apache.iceberg.util.JsonUtil; + +class IndexVersionParser { + + private static final String VERSION_ID = "version-id"; + private static final String TIMESTAMP_MS = "timestamp-ms"; + private static final String PROPERTIES = "properties"; + + private IndexVersionParser() {} + + static String toJson(IndexVersion version) { + return JsonUtil.generate(gen -> toJson(version, gen), false); + } + + static void toJson(IndexVersion version, JsonGenerator generator) throws IOException { + Preconditions.checkArgument(version != null, "Invalid index version: null"); + generator.writeStartObject(); + generator.writeNumberField(VERSION_ID, version.versionId()); + generator.writeNumberField(TIMESTAMP_MS, version.timestampMillis()); + + if (version.properties() != null && !version.properties().isEmpty()) { + JsonUtil.writeStringMap(PROPERTIES, version.properties(), generator); + } + + generator.writeEndObject(); + } + + static IndexVersion fromJson(String json) { + return JsonUtil.parse(json, IndexVersionParser::fromJson); + } + + static IndexVersion fromJson(JsonNode node) { + Preconditions.checkArgument(node != null, "Cannot parse index version from null object"); + Preconditions.checkArgument( + node.isObject(), "Cannot parse index version from non-object: %s", node); + + int versionId = JsonUtil.getInt(VERSION_ID, node); + long timestampMillis = JsonUtil.getLong(TIMESTAMP_MS, node); + Map properties = + node.has(PROPERTIES) ? JsonUtil.getStringMap(PROPERTIES, node) : ImmutableMap.of(); + + return ImmutableIndexVersion.builder() + .versionId(versionId) + .timestampMillis(timestampMillis) + .properties(properties) + .build(); + } +} diff --git a/core/src/main/java/org/apache/iceberg/index/SetIndexLocation.java b/core/src/main/java/org/apache/iceberg/index/SetIndexLocation.java new file mode 100644 index 000000000000..3ea3113181bd --- /dev/null +++ b/core/src/main/java/org/apache/iceberg/index/SetIndexLocation.java @@ -0,0 +1,81 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.index; + +import static org.apache.iceberg.TableProperties.COMMIT_MAX_RETRY_WAIT_MS; +import static org.apache.iceberg.TableProperties.COMMIT_MAX_RETRY_WAIT_MS_DEFAULT; +import static org.apache.iceberg.TableProperties.COMMIT_MIN_RETRY_WAIT_MS; +import static org.apache.iceberg.TableProperties.COMMIT_MIN_RETRY_WAIT_MS_DEFAULT; +import static org.apache.iceberg.TableProperties.COMMIT_NUM_RETRIES; +import static org.apache.iceberg.TableProperties.COMMIT_NUM_RETRIES_DEFAULT; +import static org.apache.iceberg.TableProperties.COMMIT_TOTAL_RETRY_TIME_MS; +import static org.apache.iceberg.TableProperties.COMMIT_TOTAL_RETRY_TIME_MS_DEFAULT; + +import java.util.Map; +import org.apache.iceberg.UpdateLocation; +import org.apache.iceberg.exceptions.CommitFailedException; +import org.apache.iceberg.relocated.com.google.common.base.Preconditions; +import org.apache.iceberg.relocated.com.google.common.collect.Maps; +import org.apache.iceberg.util.PropertyUtil; +import org.apache.iceberg.util.Tasks; + +class SetIndexLocation implements UpdateLocation { + private final IndexOperations ops; + private String newLocation = null; + + SetIndexLocation(IndexOperations ops) { + this.ops = ops; + } + + @Override + public String apply() { + Preconditions.checkState(null != newLocation, "Invalid index location: null"); + return newLocation; + } + + @Override + public void commit() { + IndexMetadata base = ops.refresh(); + Map properties = + base.currentVersion().properties() != null + ? base.currentVersion().properties() + : Maps.newHashMap(); + Tasks.foreach(ops) + .retry( + PropertyUtil.propertyAsInt(properties, COMMIT_NUM_RETRIES, COMMIT_NUM_RETRIES_DEFAULT)) + .exponentialBackoff( + PropertyUtil.propertyAsInt( + properties, COMMIT_MIN_RETRY_WAIT_MS, COMMIT_MIN_RETRY_WAIT_MS_DEFAULT), + PropertyUtil.propertyAsInt( + properties, COMMIT_MAX_RETRY_WAIT_MS, COMMIT_MAX_RETRY_WAIT_MS_DEFAULT), + PropertyUtil.propertyAsInt( + properties, COMMIT_TOTAL_RETRY_TIME_MS, COMMIT_TOTAL_RETRY_TIME_MS_DEFAULT), + 2.0 /* exponential */) + .onlyRetryOn(CommitFailedException.class) + .run( + taskOps -> + taskOps.commit(base, IndexMetadata.buildFrom(base).setLocation(apply()).build())); + } + + @Override + public UpdateLocation setLocation(String location) { + this.newLocation = location; + return this; + } +} diff --git a/core/src/main/java/org/apache/iceberg/inmemory/InMemoryCatalog.java b/core/src/main/java/org/apache/iceberg/inmemory/InMemoryCatalog.java index 975b5a39dfe3..dff5ac77e614 100644 --- a/core/src/main/java/org/apache/iceberg/inmemory/InMemoryCatalog.java +++ b/core/src/main/java/org/apache/iceberg/inmemory/InMemoryCatalog.java @@ -33,6 +33,8 @@ import org.apache.iceberg.CatalogUtil; import org.apache.iceberg.TableMetadata; import org.apache.iceberg.TableOperations; +import org.apache.iceberg.catalog.IndexCatalog; +import org.apache.iceberg.catalog.IndexIdentifier; import org.apache.iceberg.catalog.Namespace; import org.apache.iceberg.catalog.SupportsNamespaces; import org.apache.iceberg.catalog.TableIdentifier; @@ -42,6 +44,10 @@ import org.apache.iceberg.exceptions.NoSuchNamespaceException; import org.apache.iceberg.exceptions.NoSuchTableException; import org.apache.iceberg.exceptions.NoSuchViewException; +import org.apache.iceberg.index.Index; +import org.apache.iceberg.index.IndexBuilder; +import org.apache.iceberg.index.IndexDefinition; +import org.apache.iceberg.index.IndexType; import org.apache.iceberg.io.CloseableGroup; import org.apache.iceberg.io.FileIO; import org.apache.iceberg.relocated.com.google.common.base.Joiner; @@ -60,13 +66,14 @@ * effects. It uses {@link InMemoryFileIO}. */ public class InMemoryCatalog extends BaseMetastoreViewCatalog - implements SupportsNamespaces, Closeable { + implements SupportsNamespaces, IndexCatalog, Closeable { private static final Joiner SLASH = Joiner.on("/"); private static final Joiner DOT = Joiner.on("."); private final ConcurrentMap> namespaces; private final ConcurrentMap tables; private final ConcurrentMap views; + private final InMemoryIndexCatalog indexCatalog; private FileIO io; private String catalogName; private String warehouseLocation; @@ -77,6 +84,7 @@ public InMemoryCatalog() { this.namespaces = Maps.newConcurrentMap(); this.tables = Maps.newConcurrentMap(); this.views = Maps.newConcurrentMap(); + this.indexCatalog = new InMemoryIndexCatalog(this); } @Override @@ -94,7 +102,10 @@ public void initialize(String name, Map properties) { this.io = CatalogUtil.loadFileIO(InMemoryFileIO.class.getName(), properties, null); this.closeableGroup = new CloseableGroup(); closeableGroup.addCloseable(metricsReporter()); + closeableGroup.addCloseable(indexCatalog); closeableGroup.setSuppressCloseFailure(true); + + indexCatalog.initialize(name, properties); } @Override @@ -383,6 +394,43 @@ protected Map properties() { return catalogProperties == null ? ImmutableMap.of() : catalogProperties; } + // IndexCatalog delegate methods + + @Override + public List listIndexes(TableIdentifier tableIdentifier, IndexType... types) { + return indexCatalog.listIndexes(tableIdentifier, types); + } + + @Override + public Index loadIndex(IndexIdentifier identifier) { + return indexCatalog.loadIndex(identifier); + } + + @Override + public boolean indexExists(IndexIdentifier identifier) { + return indexCatalog.indexExists(identifier); + } + + @Override + public IndexBuilder buildIndex(IndexIdentifier identifier) { + return indexCatalog.buildIndex(identifier); + } + + @Override + public boolean dropIndex(IndexIdentifier identifier) { + return indexCatalog.dropIndex(identifier); + } + + @Override + public void invalidateIndex(IndexIdentifier identifier) { + indexCatalog.invalidateIndex(identifier); + } + + @Override + public Index registerIndex(IndexIdentifier identifier, String metadataFileLocation) { + return indexCatalog.registerIndex(identifier, metadataFileLocation); + } + private class InMemoryTableOperations extends BaseMetastoreTableOperations { private final FileIO fileIO; private final TableIdentifier tableIdentifier; diff --git a/core/src/main/java/org/apache/iceberg/inmemory/InMemoryIndexCatalog.java b/core/src/main/java/org/apache/iceberg/inmemory/InMemoryIndexCatalog.java new file mode 100644 index 000000000000..1f4404e8f1ec --- /dev/null +++ b/core/src/main/java/org/apache/iceberg/inmemory/InMemoryIndexCatalog.java @@ -0,0 +1,224 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.inmemory; + +import java.io.Closeable; +import java.io.IOException; +import java.util.Comparator; +import java.util.List; +import java.util.Map; +import java.util.concurrent.ConcurrentMap; +import java.util.stream.Collectors; +import org.apache.iceberg.CatalogProperties; +import org.apache.iceberg.catalog.Catalog; +import org.apache.iceberg.catalog.IndexIdentifier; +import org.apache.iceberg.catalog.TableIdentifier; +import org.apache.iceberg.exceptions.AlreadyExistsException; +import org.apache.iceberg.exceptions.CommitFailedException; +import org.apache.iceberg.exceptions.NoSuchIndexException; +import org.apache.iceberg.exceptions.NoSuchTableException; +import org.apache.iceberg.index.BaseIndexCatalog; +import org.apache.iceberg.index.BaseIndexOperations; +import org.apache.iceberg.index.ImmutableIndexSummary; +import org.apache.iceberg.index.IndexDefinition; +import org.apache.iceberg.index.IndexMetadata; +import org.apache.iceberg.index.IndexOperations; +import org.apache.iceberg.index.IndexSnapshot; +import org.apache.iceberg.io.CloseableGroup; +import org.apache.iceberg.io.FileIO; +import org.apache.iceberg.relocated.com.google.common.base.Joiner; +import org.apache.iceberg.relocated.com.google.common.base.Objects; +import org.apache.iceberg.relocated.com.google.common.base.Preconditions; +import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; +import org.apache.iceberg.relocated.com.google.common.collect.Maps; + +/** + * Index catalog implementation that uses in-memory data-structures to store the indexes. This class + * doesn't touch external resources and can be utilized to write unit tests without side effects. It + * uses {@link InMemoryFileIO}. + */ +public class InMemoryIndexCatalog extends BaseIndexCatalog implements Closeable { + private static final Joiner SLASH = Joiner.on("/"); + + private final ConcurrentMap indexes; + private final Catalog tableCatalog; + private FileIO io; + private String catalogName; + private String warehouseLocation; + private CloseableGroup closeableGroup; + private Map catalogProperties; + + public InMemoryIndexCatalog(Catalog tableCatalog) { + Preconditions.checkArgument(tableCatalog != null, "Table catalog cannot be null"); + this.indexes = Maps.newConcurrentMap(); + this.tableCatalog = tableCatalog; + } + + @Override + public String name() { + return catalogName; + } + + @Override + public void initialize(String name, Map properties) { + super.initialize(name, properties); + this.catalogName = name != null ? name : InMemoryIndexCatalog.class.getSimpleName(); + this.catalogProperties = ImmutableMap.copyOf(properties); + + String warehouse = properties.getOrDefault(CatalogProperties.WAREHOUSE_LOCATION, ""); + this.warehouseLocation = warehouse.replaceAll("/*$", ""); + this.io = new InMemoryFileIO(); + this.closeableGroup = new CloseableGroup(); + closeableGroup.setSuppressCloseFailure(true); + } + + @Override + protected Catalog tableCatalog() { + return tableCatalog; + } + + @Override + protected IndexOperations newIndexOps(IndexIdentifier identifier) { + return new InMemoryIndexOperations(io, identifier); + } + + @Override + protected String defaultIndexLocation(IndexIdentifier identifier) { + return SLASH.join( + warehouseLocation, + SLASH.join(identifier.tableIdentifier().namespace().levels()), + identifier.tableIdentifier().name(), + "indexes", + identifier.name()); + } + + @Override + protected List doListIndexes(TableIdentifier tableIdentifier) { + return indexes.keySet().stream() + .filter(idx -> idx.tableIdentifier().equals(tableIdentifier)) + .sorted(Comparator.comparing(IndexIdentifier::toString)) + .map(this::loadIndexSummary) + .collect(Collectors.toList()); + } + + private IndexDefinition loadIndexSummary(IndexIdentifier identifier) { + IndexOperations ops = newIndexOps(identifier); + IndexMetadata metadata = ops.current(); + if (metadata == null) { + throw new NoSuchIndexException("Index does not exist: %s", identifier); + } + + long[] availableSnapshots = + metadata.snapshots().stream().mapToLong(IndexSnapshot::tableSnapshotId).toArray(); + + return ImmutableIndexSummary.builder() + .id(identifier) + .type(metadata.type()) + .indexColumnIds(metadata.indexColumnIds().stream().mapToInt(Integer::intValue).toArray()) + .optimizedColumnIds( + metadata.optimizedColumnIds().stream().mapToInt(Integer::intValue).toArray()) + .availableTableSnapshots(availableSnapshots) + .build(); + } + + @Override + protected boolean doDropIndex(IndexIdentifier identifier) { + synchronized (this) { + return null != indexes.remove(identifier); + } + } + + @Override + protected Map properties() { + return catalogProperties == null ? ImmutableMap.of() : catalogProperties; + } + + @Override + public void close() throws IOException { + closeableGroup.close(); + indexes.clear(); + } + + private class InMemoryIndexOperations extends BaseIndexOperations { + private final FileIO fileIO; + private final IndexIdentifier indexIdentifier; + private final String fullIndexName; + + InMemoryIndexOperations(FileIO fileIO, IndexIdentifier indexIdentifier) { + this.fileIO = fileIO; + this.indexIdentifier = indexIdentifier; + this.fullIndexName = fullIndexName(indexIdentifier); + } + + @Override + public void doRefresh() { + String latestLocation = indexes.get(indexIdentifier); + if (latestLocation == null) { + disableRefresh(); + } else { + refreshFromMetadataLocation(latestLocation); + } + } + + @Override + public void doCommit(IndexMetadata base, IndexMetadata metadata) { + String newLocation = writeNewMetadataIfRequired(metadata); + String oldLocation = base == null ? null : currentMetadataLocation(); + + synchronized (InMemoryIndexCatalog.this) { + if (null == base && !tableExists(indexIdentifier.tableIdentifier())) { + throw new NoSuchTableException( + "Cannot create index %s. Table does not exist: %s", + indexIdentifier, indexIdentifier.tableIdentifier()); + } + + indexes.compute( + indexIdentifier, + (k, existingLocation) -> { + if (!Objects.equal(existingLocation, oldLocation)) { + if (null == base) { + throw new AlreadyExistsException("Index already exists: %s", indexName()); + } + + if (null == existingLocation) { + throw new NoSuchIndexException("Index does not exist: %s", indexName()); + } + + throw new CommitFailedException( + "Cannot commit to index %s metadata location from %s to %s " + + "because it has been concurrently modified to %s", + indexIdentifier, oldLocation, newLocation, existingLocation); + } + + return newLocation; + }); + } + } + + @Override + protected FileIO io() { + return fileIO; + } + + @Override + protected String indexName() { + return fullIndexName; + } + } +} diff --git a/core/src/main/java/org/apache/iceberg/jdbc/JdbcCatalog.java b/core/src/main/java/org/apache/iceberg/jdbc/JdbcCatalog.java index a3f40512a0a4..2f2b7c0e1aca 100644 --- a/core/src/main/java/org/apache/iceberg/jdbc/JdbcCatalog.java +++ b/core/src/main/java/org/apache/iceberg/jdbc/JdbcCatalog.java @@ -44,6 +44,8 @@ import org.apache.iceberg.TableMetadata; import org.apache.iceberg.TableOperations; import org.apache.iceberg.Transaction; +import org.apache.iceberg.catalog.IndexCatalog; +import org.apache.iceberg.catalog.IndexIdentifier; import org.apache.iceberg.catalog.Namespace; import org.apache.iceberg.catalog.SupportsNamespaces; import org.apache.iceberg.catalog.TableIdentifier; @@ -54,6 +56,10 @@ import org.apache.iceberg.exceptions.NoSuchViewException; import org.apache.iceberg.exceptions.NotFoundException; import org.apache.iceberg.hadoop.Configurable; +import org.apache.iceberg.index.Index; +import org.apache.iceberg.index.IndexBuilder; +import org.apache.iceberg.index.IndexDefinition; +import org.apache.iceberg.index.IndexType; import org.apache.iceberg.io.CloseableGroup; import org.apache.iceberg.io.FileIO; import org.apache.iceberg.relocated.com.google.common.annotations.VisibleForTesting; @@ -72,7 +78,7 @@ import org.slf4j.LoggerFactory; public class JdbcCatalog extends BaseMetastoreViewCatalog - implements Configurable, SupportsNamespaces { + implements Configurable, SupportsNamespaces, IndexCatalog { public static final String PROPERTY_PREFIX = "jdbc."; private static final String NAMESPACE_EXISTS_PROPERTY = "exists"; @@ -92,6 +98,7 @@ public class JdbcCatalog extends BaseMetastoreViewCatalog private boolean initializeCatalogTables; private CloseableGroup closeableGroup; private JdbcUtil.SchemaVersion schemaVersion = JdbcUtil.SchemaVersion.V0; + private JdbcIndexCatalog indexCatalog; public JdbcCatalog() { this(null, null, true); @@ -149,8 +156,13 @@ public void initialize(String name, Map properties) { updateSchemaIfRequired(); + // Initialize the index catalog + this.indexCatalog = new JdbcIndexCatalog(this, connections, io); + this.indexCatalog.initialize(catalogName, properties); + this.closeableGroup = new CloseableGroup(); closeableGroup.addCloseable(metricsReporter()); + closeableGroup.addCloseable(indexCatalog); closeableGroup.addCloseable(connections); closeableGroup.addCloseable(io); closeableGroup.setSuppressCloseFailure(true); @@ -902,4 +914,41 @@ public Transaction replaceTransaction() { return super.replaceTransaction(); } } + + // IndexCatalog delegate methods + + @Override + public List listIndexes(TableIdentifier tableIdentifier, IndexType... types) { + return indexCatalog.listIndexes(tableIdentifier, types); + } + + @Override + public Index loadIndex(IndexIdentifier identifier) { + return indexCatalog.loadIndex(identifier); + } + + @Override + public boolean indexExists(IndexIdentifier identifier) { + return indexCatalog.indexExists(identifier); + } + + @Override + public IndexBuilder buildIndex(IndexIdentifier identifier) { + return indexCatalog.buildIndex(identifier); + } + + @Override + public boolean dropIndex(IndexIdentifier identifier) { + return indexCatalog.dropIndex(identifier); + } + + @Override + public void invalidateIndex(IndexIdentifier identifier) { + indexCatalog.invalidateIndex(identifier); + } + + @Override + public Index registerIndex(IndexIdentifier identifier, String metadataFileLocation) { + return indexCatalog.registerIndex(identifier, metadataFileLocation); + } } diff --git a/core/src/main/java/org/apache/iceberg/jdbc/JdbcIndexCatalog.java b/core/src/main/java/org/apache/iceberg/jdbc/JdbcIndexCatalog.java new file mode 100644 index 000000000000..b38b197b5933 --- /dev/null +++ b/core/src/main/java/org/apache/iceberg/jdbc/JdbcIndexCatalog.java @@ -0,0 +1,305 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.jdbc; + +import java.io.Closeable; +import java.io.IOException; +import java.sql.DatabaseMetaData; +import java.sql.PreparedStatement; +import java.sql.ResultSet; +import java.sql.SQLException; +import java.sql.SQLNonTransientConnectionException; +import java.sql.SQLTimeoutException; +import java.sql.SQLTransientConnectionException; +import java.util.Comparator; +import java.util.List; +import java.util.Locale; +import java.util.Map; +import java.util.function.Predicate; +import java.util.stream.Collectors; +import org.apache.iceberg.CatalogProperties; +import org.apache.iceberg.CatalogUtil; +import org.apache.iceberg.catalog.Catalog; +import org.apache.iceberg.catalog.IndexIdentifier; +import org.apache.iceberg.catalog.TableIdentifier; +import org.apache.iceberg.exceptions.NoSuchIndexException; +import org.apache.iceberg.index.BaseIndexCatalog; +import org.apache.iceberg.index.ImmutableIndexSummary; +import org.apache.iceberg.index.IndexDefinition; +import org.apache.iceberg.index.IndexMetadata; +import org.apache.iceberg.index.IndexOperations; +import org.apache.iceberg.index.IndexSnapshot; +import org.apache.iceberg.io.CloseableGroup; +import org.apache.iceberg.io.FileIO; +import org.apache.iceberg.relocated.com.google.common.base.Joiner; +import org.apache.iceberg.relocated.com.google.common.base.Preconditions; +import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; +import org.apache.iceberg.relocated.com.google.common.collect.Lists; +import org.apache.iceberg.util.LocationUtil; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * JDBC implementation of an Index Catalog that stores index metadata in a JDBC database. + * + *

This catalog uses a JDBC connection pool to store and retrieve index metadata. The indexes + * table schema includes catalog_name, table_namespace, table_name, index_name, and + * metadata_location. + */ +public class JdbcIndexCatalog extends BaseIndexCatalog implements Closeable { + + private static final Logger LOG = LoggerFactory.getLogger(JdbcIndexCatalog.class); + private static final Joiner SLASH = Joiner.on("/"); + + private final JdbcClientPool connections; + private final Catalog tableCatalog; + private FileIO io; + private String catalogName; + private String warehouseLocation; + private CloseableGroup closeableGroup; + private Map catalogProperties; + + /** + * Creates a new JdbcIndexCatalog. + * + * @param tableCatalog the catalog that manages tables + * @param connections the JDBC client pool for database connections + * @param fileIO the FileIO to use for reading and writing index metadata + */ + public JdbcIndexCatalog(Catalog tableCatalog, JdbcClientPool connections, FileIO fileIO) { + Preconditions.checkArgument(tableCatalog != null, "Table catalog cannot be null"); + Preconditions.checkArgument(connections != null, "JDBC client pool cannot be null"); + this.tableCatalog = tableCatalog; + this.connections = connections; + this.io = fileIO; + } + + @Override + public String name() { + return catalogName; + } + + @Override + public void initialize(String name, Map properties) { + super.initialize(name, properties); + this.catalogName = name != null ? name : JdbcIndexCatalog.class.getSimpleName(); + this.catalogProperties = ImmutableMap.copyOf(properties); + + String warehouse = + LocationUtil.stripTrailingSlash( + properties.getOrDefault(CatalogProperties.WAREHOUSE_LOCATION, "")); + this.warehouseLocation = warehouse; + + if (this.io == null) { + String ioImpl = + properties.getOrDefault( + CatalogProperties.FILE_IO_IMPL, "org.apache.iceberg.hadoop.HadoopFileIO"); + this.io = CatalogUtil.loadFileIO(ioImpl, properties, null); + } + + this.closeableGroup = new CloseableGroup(); + closeableGroup.addCloseable(io); + closeableGroup.setSuppressCloseFailure(true); + + initializeCatalogTables(); + } + + private void initializeCatalogTables() { + LOG.trace("Creating database tables (if missing) to store iceberg indexes"); + + try { + atomicCreateTable( + JdbcUtil.CATALOG_INDEX_TABLE_NAME, + JdbcUtil.CREATE_CATALOG_INDEX_SQL, + "to store iceberg catalog indexes"); + } catch (SQLTimeoutException e) { + throw new UncheckedSQLException(e, "Cannot initialize JDBC index catalog: Query timed out"); + } catch (SQLTransientConnectionException | SQLNonTransientConnectionException e) { + throw new UncheckedSQLException(e, "Cannot initialize JDBC index catalog: Connection failed"); + } catch (SQLException e) { + throw new UncheckedSQLException(e, "Cannot initialize JDBC index catalog"); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + throw new UncheckedInterruptedException(e, "Interrupted in call to initialize"); + } + } + + private void atomicCreateTable(String tableName, String sqlCommand, String reason) + throws SQLException, InterruptedException { + connections.run( + conn -> { + DatabaseMetaData dbMeta = conn.getMetaData(); + + Predicate tableTest = + tblName -> { + try { + ResultSet result = + dbMeta.getTables( + null /* catalog name */, + null /* schemaPattern */, + tblName /* tableNamePattern */, + null /* types */); + return result.next(); + } catch (SQLException e) { + return false; + } + }; + + Predicate tableExists = + tblName -> + tableTest.test(tblName) || tableTest.test(tblName.toUpperCase(Locale.ROOT)); + + if (tableExists.test(tableName)) { + return true; + } + + LOG.debug("Creating table {} {}", tableName, reason); + try { + conn.prepareStatement(sqlCommand).execute(); + return true; + } catch (SQLException e) { + if (tableExists.test(tableName)) { + return true; + } + throw e; + } + }); + } + + @Override + protected Catalog tableCatalog() { + return tableCatalog; + } + + @Override + protected IndexOperations newIndexOps(IndexIdentifier identifier) { + return new JdbcIndexOperations(connections, io, catalogName, identifier, catalogProperties); + } + + @Override + protected String defaultIndexLocation(IndexIdentifier identifier) { + return SLASH.join( + warehouseLocation, + SLASH.join(identifier.tableIdentifier().namespace().levels()), + identifier.tableIdentifier().name(), + "indexes", + identifier.name()); + } + + @Override + protected List doListIndexes(TableIdentifier tableIdentifier) { + List indexIdentifiers = listIndexIdentifiers(tableIdentifier); + return indexIdentifiers.stream() + .sorted(Comparator.comparing(IndexIdentifier::toString)) + .map(this::loadIndexSummary) + .collect(Collectors.toList()); + } + + private List listIndexIdentifiers(TableIdentifier tableIdentifier) { + try { + return connections.run( + conn -> { + List result = Lists.newArrayList(); + try (PreparedStatement sql = conn.prepareStatement(JdbcUtil.LIST_INDEXES_SQL)) { + sql.setString(1, catalogName); + sql.setString(2, JdbcUtil.namespaceToString(tableIdentifier.namespace())); + sql.setString(3, tableIdentifier.name()); + ResultSet rs = sql.executeQuery(); + + while (rs.next()) { + String indexName = rs.getString(JdbcUtil.INDEX_NAME); + result.add(IndexIdentifier.of(tableIdentifier, indexName)); + } + + rs.close(); + } + return result; + }); + } catch (SQLException e) { + throw new UncheckedSQLException( + e, "Failed to list indexes for table %s in catalog %s", tableIdentifier, catalogName); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + throw new UncheckedInterruptedException(e, "Interrupted while listing indexes"); + } + } + + private IndexDefinition loadIndexSummary(IndexIdentifier identifier) { + IndexOperations ops = newIndexOps(identifier); + IndexMetadata metadata = ops.current(); + if (metadata == null) { + throw new NoSuchIndexException("Index does not exist: %s", identifier); + } + + long[] availableSnapshots = + metadata.snapshots().stream().mapToLong(IndexSnapshot::tableSnapshotId).toArray(); + + return ImmutableIndexSummary.builder() + .id(identifier) + .type(metadata.type()) + .indexColumnIds(metadata.indexColumnIds().stream().mapToInt(Integer::intValue).toArray()) + .optimizedColumnIds( + metadata.optimizedColumnIds().stream().mapToInt(Integer::intValue).toArray()) + .availableTableSnapshots(availableSnapshots) + .build(); + } + + @Override + protected boolean doDropIndex(IndexIdentifier identifier) { + try { + int deletedRecords = + connections.run( + conn -> { + try (PreparedStatement sql = conn.prepareStatement(JdbcUtil.DROP_INDEX_SQL)) { + sql.setString(1, catalogName); + sql.setString(2, JdbcUtil.namespaceToString(identifier.namespace())); + sql.setString(3, identifier.tableName()); + sql.setString(4, identifier.name()); + return sql.executeUpdate(); + } + }); + + if (deletedRecords == 0) { + LOG.info("Skipping drop, index does not exist: {}", identifier); + return false; + } + + LOG.info("Dropped index: {}", identifier); + return true; + } catch (SQLException e) { + throw new UncheckedSQLException( + e, "Failed to drop index %s from catalog %s", identifier, catalogName); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + throw new UncheckedInterruptedException(e, "Interrupted while dropping index"); + } + } + + @Override + protected Map properties() { + return catalogProperties == null ? ImmutableMap.of() : catalogProperties; + } + + @Override + public void close() throws IOException { + if (closeableGroup != null) { + closeableGroup.close(); + } + } +} diff --git a/core/src/main/java/org/apache/iceberg/jdbc/JdbcIndexOperations.java b/core/src/main/java/org/apache/iceberg/jdbc/JdbcIndexOperations.java new file mode 100644 index 000000000000..7ae176b7e6cd --- /dev/null +++ b/core/src/main/java/org/apache/iceberg/jdbc/JdbcIndexOperations.java @@ -0,0 +1,208 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.jdbc; + +import java.sql.DataTruncation; +import java.sql.SQLException; +import java.sql.SQLIntegrityConstraintViolationException; +import java.sql.SQLNonTransientConnectionException; +import java.sql.SQLTimeoutException; +import java.sql.SQLTransientConnectionException; +import java.sql.SQLWarning; +import java.util.Map; +import java.util.Objects; +import org.apache.iceberg.catalog.IndexIdentifier; +import org.apache.iceberg.exceptions.AlreadyExistsException; +import org.apache.iceberg.exceptions.CommitFailedException; +import org.apache.iceberg.exceptions.NoSuchIndexException; +import org.apache.iceberg.exceptions.NoSuchTableException; +import org.apache.iceberg.index.BaseIndexOperations; +import org.apache.iceberg.index.IndexMetadata; +import org.apache.iceberg.io.FileIO; +import org.apache.iceberg.relocated.com.google.common.base.Preconditions; +import org.apache.iceberg.util.PropertyUtil; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** JDBC implementation of Iceberg IndexOperations. */ +public class JdbcIndexOperations extends BaseIndexOperations { + + private static final Logger LOG = LoggerFactory.getLogger(JdbcIndexOperations.class); + private final String catalogName; + private final IndexIdentifier indexIdentifier; + private final FileIO fileIO; + private final JdbcClientPool connections; + private final Map catalogProperties; + + protected JdbcIndexOperations( + JdbcClientPool dbConnPool, + FileIO fileIO, + String catalogName, + IndexIdentifier indexIdentifier, + Map catalogProperties) { + this.catalogName = catalogName; + this.indexIdentifier = indexIdentifier; + this.fileIO = fileIO; + this.connections = dbConnPool; + this.catalogProperties = catalogProperties; + } + + @Override + protected void doRefresh() { + Map index; + + try { + index = JdbcUtil.loadIndex(connections, catalogName, indexIdentifier); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + throw new UncheckedInterruptedException(e, "Interrupted during refresh"); + } catch (SQLException e) { + // SQL exception happened when getting index from catalog + throw new UncheckedSQLException( + e, "Failed to get index %s from catalog %s", indexIdentifier, catalogName); + } + + if (index.isEmpty()) { + if (currentMetadataLocation() != null) { + throw new NoSuchIndexException("Index does not exist: %s", indexIdentifier); + } else { + this.disableRefresh(); + return; + } + } + + String newMetadataLocation = index.get(JdbcTableOperations.METADATA_LOCATION_PROP); + Preconditions.checkState( + newMetadataLocation != null, + "Invalid index %s: metadata location is null", + indexIdentifier); + refreshFromMetadataLocation(newMetadataLocation); + } + + @Override + protected void doCommit(IndexMetadata base, IndexMetadata metadata) { + String newMetadataLocation = writeNewMetadataIfRequired(metadata); + try { + Map index = JdbcUtil.loadIndex(connections, catalogName, indexIdentifier); + if (base != null) { + validateMetadataLocation(index, base); + String oldMetadataLocation = base.metadataFileLocation(); + // Start atomic update + LOG.debug("Committing existing index: {}", indexName()); + updateIndex(newMetadataLocation, oldMetadataLocation); + } else { + // index does not exist, create it + LOG.debug("Committing new index: {}", indexName()); + createIndex(newMetadataLocation); + } + + } catch (SQLIntegrityConstraintViolationException e) { + if (currentMetadataLocation() == null) { + throw new AlreadyExistsException(e, "Index already exists: %s", indexIdentifier); + } else { + throw new UncheckedSQLException(e, "Index already exists: %s", indexIdentifier); + } + + } catch (SQLTimeoutException e) { + throw new UncheckedSQLException(e, "Database Connection timeout"); + } catch (SQLTransientConnectionException | SQLNonTransientConnectionException e) { + throw new UncheckedSQLException(e, "Database Connection failed"); + } catch (DataTruncation e) { + throw new UncheckedSQLException(e, "Database data truncation error"); + } catch (SQLWarning e) { + throw new UncheckedSQLException(e, "Database warning"); + } catch (SQLException e) { + // SQLite doesn't set SQLState or throw SQLIntegrityConstraintViolationException + if (e.getMessage() != null && e.getMessage().contains("constraint failed")) { + throw new AlreadyExistsException("Index already exists: %s", indexIdentifier); + } + + throw new UncheckedSQLException(e, "Unknown failure"); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + throw new UncheckedInterruptedException(e, "Interrupted during commit"); + } + } + + @Override + protected String indexName() { + return indexIdentifier.toString(); + } + + @Override + protected FileIO io() { + return fileIO; + } + + private void validateMetadataLocation(Map index, IndexMetadata base) { + String catalogMetadataLocation = index.get(JdbcTableOperations.METADATA_LOCATION_PROP); + String baseMetadataLocation = base != null ? base.metadataFileLocation() : null; + + if (!Objects.equals(baseMetadataLocation, catalogMetadataLocation)) { + throw new CommitFailedException( + "Cannot commit %s: metadata location %s has changed from %s", + indexIdentifier, baseMetadataLocation, catalogMetadataLocation); + } + } + + private void updateIndex(String newMetadataLocation, String oldMetadataLocation) + throws SQLException, InterruptedException { + int updatedRecords = + JdbcUtil.updateIndex( + connections, catalogName, indexIdentifier, newMetadataLocation, oldMetadataLocation); + + if (updatedRecords == 1) { + LOG.debug("Successfully committed to existing index: {}", indexIdentifier); + } else { + throw new CommitFailedException( + "Failed to update index %s from catalog %s", indexIdentifier, catalogName); + } + } + + private void createIndex(String newMetadataLocation) throws SQLException, InterruptedException { + if (PropertyUtil.propertyAsBoolean(catalogProperties, JdbcUtil.STRICT_MODE_PROPERTY, false) + && !JdbcUtil.namespaceExists(catalogName, connections, indexIdentifier.namespace())) { + throw new NoSuchTableException( + "Cannot create index %s in catalog %s. Namespace %s does not exist", + indexIdentifier, catalogName, indexIdentifier.namespace()); + } + + if (!JdbcUtil.tableExists( + JdbcUtil.SchemaVersion.V1, catalogName, connections, indexIdentifier.tableIdentifier())) { + throw new NoSuchTableException( + "Cannot create index %s. Table does not exist: %s", + indexIdentifier, indexIdentifier.tableIdentifier()); + } + + if (JdbcUtil.indexExists(catalogName, connections, indexIdentifier)) { + throw new AlreadyExistsException("Index already exists: %s", indexIdentifier); + } + + int insertRecord = + JdbcUtil.doCommitCreateIndex( + connections, catalogName, indexIdentifier, newMetadataLocation); + + if (insertRecord == 1) { + LOG.debug("Successfully committed to new index: {}", indexIdentifier); + } else { + throw new CommitFailedException( + "Failed to create index %s in catalog %s", indexIdentifier, catalogName); + } + } +} diff --git a/core/src/main/java/org/apache/iceberg/jdbc/JdbcUtil.java b/core/src/main/java/org/apache/iceberg/jdbc/JdbcUtil.java index d59da3ad044c..452a47aaf88d 100644 --- a/core/src/main/java/org/apache/iceberg/jdbc/JdbcUtil.java +++ b/core/src/main/java/org/apache/iceberg/jdbc/JdbcUtil.java @@ -27,6 +27,7 @@ import java.util.Properties; import java.util.Set; import org.apache.iceberg.BaseMetastoreTableOperations; +import org.apache.iceberg.catalog.IndexIdentifier; import org.apache.iceberg.catalog.Namespace; import org.apache.iceberg.catalog.TableIdentifier; import org.apache.iceberg.relocated.com.google.common.base.Joiner; @@ -62,6 +63,112 @@ enum SchemaVersion { static final String TABLE_RECORD_TYPE = "TABLE"; static final String VIEW_RECORD_TYPE = "VIEW"; + // Catalog Index + static final String CATALOG_INDEX_TABLE_NAME = "iceberg_indexes"; + static final String INDEX_NAME = "index_name"; + + private static final String GET_INDEX_SQL = + "SELECT * FROM " + + CATALOG_INDEX_TABLE_NAME + + " WHERE " + + CATALOG_NAME + + " = ? AND " + + TABLE_NAMESPACE + + " = ? AND " + + TABLE_NAME + + " = ? AND " + + INDEX_NAME + + " = ?"; + + private static final String DO_COMMIT_INDEX_SQL = + "UPDATE " + + CATALOG_INDEX_TABLE_NAME + + " SET " + + JdbcTableOperations.METADATA_LOCATION_PROP + + " = ? , " + + JdbcTableOperations.PREVIOUS_METADATA_LOCATION_PROP + + " = ?" + + " WHERE " + + CATALOG_NAME + + " = ? AND " + + TABLE_NAMESPACE + + " = ? AND " + + TABLE_NAME + + " = ? AND " + + INDEX_NAME + + " = ? AND " + + JdbcTableOperations.METADATA_LOCATION_PROP + + " = ?"; + + private static final String DO_COMMIT_CREATE_INDEX_SQL = + "INSERT INTO " + + CATALOG_INDEX_TABLE_NAME + + " (" + + CATALOG_NAME + + ", " + + TABLE_NAMESPACE + + ", " + + TABLE_NAME + + ", " + + INDEX_NAME + + ", " + + JdbcTableOperations.METADATA_LOCATION_PROP + + ", " + + JdbcTableOperations.PREVIOUS_METADATA_LOCATION_PROP + + ") " + + " VALUES (?,?,?,?,?,null)"; + + static final String CREATE_CATALOG_INDEX_SQL = + "CREATE TABLE " + + CATALOG_INDEX_TABLE_NAME + + "(" + + CATALOG_NAME + + " VARCHAR(255) NOT NULL," + + TABLE_NAMESPACE + + " VARCHAR(255) NOT NULL," + + TABLE_NAME + + " VARCHAR(255) NOT NULL," + + INDEX_NAME + + " VARCHAR(255) NOT NULL," + + JdbcTableOperations.METADATA_LOCATION_PROP + + " VARCHAR(1000)," + + JdbcTableOperations.PREVIOUS_METADATA_LOCATION_PROP + + " VARCHAR(1000)," + + "PRIMARY KEY (" + + CATALOG_NAME + + ", " + + TABLE_NAMESPACE + + ", " + + TABLE_NAME + + ", " + + INDEX_NAME + + ")" + + ")"; + + static final String LIST_INDEXES_SQL = + "SELECT * FROM " + + CATALOG_INDEX_TABLE_NAME + + " WHERE " + + CATALOG_NAME + + " = ? AND " + + TABLE_NAMESPACE + + " = ? AND " + + TABLE_NAME + + " = ?"; + + static final String DROP_INDEX_SQL = + "DELETE FROM " + + CATALOG_INDEX_TABLE_NAME + + " WHERE " + + CATALOG_NAME + + " = ? AND " + + TABLE_NAMESPACE + + " = ? AND " + + TABLE_NAME + + " = ? AND " + + INDEX_NAME + + " = ?"; + private static final String V1_DO_COMMIT_TABLE_SQL = "UPDATE " + CATALOG_TABLE_VIEW_NAME @@ -834,4 +941,94 @@ private static boolean exists(JdbcClientPool connections, String sql, String... throw new UncheckedInterruptedException(e, "Interrupted in SQL query"); } } + + static Map loadIndex( + JdbcClientPool connections, String catalogName, IndexIdentifier identifier) + throws SQLException, InterruptedException { + return connections.run( + conn -> { + Map index = Maps.newHashMap(); + + try (PreparedStatement sql = conn.prepareStatement(GET_INDEX_SQL)) { + sql.setString(1, catalogName); + sql.setString(2, namespaceToString(identifier.namespace())); + sql.setString(3, identifier.tableName()); + sql.setString(4, identifier.name()); + ResultSet rs = sql.executeQuery(); + + if (rs.next()) { + index.put(CATALOG_NAME, rs.getString(CATALOG_NAME)); + index.put(TABLE_NAMESPACE, rs.getString(TABLE_NAMESPACE)); + index.put(TABLE_NAME, rs.getString(TABLE_NAME)); + index.put(INDEX_NAME, rs.getString(INDEX_NAME)); + index.put( + BaseMetastoreTableOperations.METADATA_LOCATION_PROP, + rs.getString(BaseMetastoreTableOperations.METADATA_LOCATION_PROP)); + index.put( + BaseMetastoreTableOperations.PREVIOUS_METADATA_LOCATION_PROP, + rs.getString(BaseMetastoreTableOperations.PREVIOUS_METADATA_LOCATION_PROP)); + } + + rs.close(); + } + + return index; + }); + } + + static int updateIndex( + JdbcClientPool connections, + String catalogName, + IndexIdentifier indexIdentifier, + String newMetadataLocation, + String oldMetadataLocation) + throws SQLException, InterruptedException { + return connections.run( + conn -> { + try (PreparedStatement sql = conn.prepareStatement(DO_COMMIT_INDEX_SQL)) { + // UPDATE + sql.setString(1, newMetadataLocation); + sql.setString(2, oldMetadataLocation); + // WHERE + sql.setString(3, catalogName); + sql.setString(4, namespaceToString(indexIdentifier.namespace())); + sql.setString(5, indexIdentifier.tableName()); + sql.setString(6, indexIdentifier.name()); + sql.setString(7, oldMetadataLocation); + + return sql.executeUpdate(); + } + }); + } + + static int doCommitCreateIndex( + JdbcClientPool connections, + String catalogName, + IndexIdentifier indexIdentifier, + String newMetadataLocation) + throws SQLException, InterruptedException { + return connections.run( + conn -> { + try (PreparedStatement sql = conn.prepareStatement(DO_COMMIT_CREATE_INDEX_SQL)) { + sql.setString(1, catalogName); + sql.setString(2, namespaceToString(indexIdentifier.namespace())); + sql.setString(3, indexIdentifier.tableName()); + sql.setString(4, indexIdentifier.name()); + sql.setString(5, newMetadataLocation); + + return sql.executeUpdate(); + } + }); + } + + static boolean indexExists( + String catalogName, JdbcClientPool connections, IndexIdentifier indexIdentifier) { + return exists( + connections, + GET_INDEX_SQL, + catalogName, + namespaceToString(indexIdentifier.namespace()), + indexIdentifier.tableName(), + indexIdentifier.name()); + } } diff --git a/core/src/main/java/org/apache/iceberg/rest/CatalogHandlers.java b/core/src/main/java/org/apache/iceberg/rest/CatalogHandlers.java index d75909b4372f..a39cc3172811 100644 --- a/core/src/main/java/org/apache/iceberg/rest/CatalogHandlers.java +++ b/core/src/main/java/org/apache/iceberg/rest/CatalogHandlers.java @@ -64,15 +64,24 @@ import org.apache.iceberg.Transaction; import org.apache.iceberg.UpdateRequirement; import org.apache.iceberg.catalog.Catalog; +import org.apache.iceberg.catalog.IndexCatalog; +import org.apache.iceberg.catalog.IndexIdentifier; import org.apache.iceberg.catalog.Namespace; import org.apache.iceberg.catalog.SupportsNamespaces; import org.apache.iceberg.catalog.TableIdentifier; import org.apache.iceberg.catalog.ViewCatalog; import org.apache.iceberg.exceptions.AlreadyExistsException; import org.apache.iceberg.exceptions.CommitFailedException; +import org.apache.iceberg.exceptions.NoSuchIndexException; import org.apache.iceberg.exceptions.NoSuchNamespaceException; import org.apache.iceberg.exceptions.NoSuchTableException; import org.apache.iceberg.exceptions.NoSuchViewException; +import org.apache.iceberg.index.BaseIndex; +import org.apache.iceberg.index.Index; +import org.apache.iceberg.index.IndexBuilder; +import org.apache.iceberg.index.IndexDefinition; +import org.apache.iceberg.index.IndexMetadata; +import org.apache.iceberg.index.IndexOperations; import org.apache.iceberg.io.CloseableIterable; import org.apache.iceberg.relocated.com.google.common.annotations.VisibleForTesting; import org.apache.iceberg.relocated.com.google.common.base.Preconditions; @@ -80,14 +89,17 @@ import org.apache.iceberg.relocated.com.google.common.collect.Maps; import org.apache.iceberg.relocated.com.google.common.collect.Sets; import org.apache.iceberg.rest.RESTCatalogProperties.SnapshotMode; +import org.apache.iceberg.rest.requests.CreateIndexRequest; import org.apache.iceberg.rest.requests.CreateNamespaceRequest; import org.apache.iceberg.rest.requests.CreateTableRequest; import org.apache.iceberg.rest.requests.CreateViewRequest; import org.apache.iceberg.rest.requests.FetchScanTasksRequest; import org.apache.iceberg.rest.requests.PlanTableScanRequest; +import org.apache.iceberg.rest.requests.RegisterIndexRequest; import org.apache.iceberg.rest.requests.RegisterTableRequest; import org.apache.iceberg.rest.requests.RegisterViewRequest; import org.apache.iceberg.rest.requests.RenameTableRequest; +import org.apache.iceberg.rest.requests.UpdateIndexRequest; import org.apache.iceberg.rest.requests.UpdateNamespacePropertiesRequest; import org.apache.iceberg.rest.requests.UpdateTableRequest; import org.apache.iceberg.rest.responses.CreateNamespaceResponse; @@ -95,8 +107,10 @@ import org.apache.iceberg.rest.responses.FetchScanTasksResponse; import org.apache.iceberg.rest.responses.GetNamespaceResponse; import org.apache.iceberg.rest.responses.ImmutableLoadViewResponse; +import org.apache.iceberg.rest.responses.ListIndexesResponse; import org.apache.iceberg.rest.responses.ListNamespacesResponse; import org.apache.iceberg.rest.responses.ListTablesResponse; +import org.apache.iceberg.rest.responses.LoadIndexResponse; import org.apache.iceberg.rest.responses.LoadTableResponse; import org.apache.iceberg.rest.responses.LoadViewResponse; import org.apache.iceberg.rest.responses.PlanTableScanResponse; @@ -771,6 +785,222 @@ public static LoadViewResponse registerView( .build(); } + // Index catalog handlers + + private static BaseIndex asBaseIndex(org.apache.iceberg.index.Index index) { + Preconditions.checkState( + index instanceof BaseIndex, "Cannot wrap catalog that does not produce BaseIndex"); + return (BaseIndex) index; + } + + private static LoadIndexResponse indexResponse(org.apache.iceberg.index.Index index) { + IndexMetadata metadata = asBaseIndex(index).operations().current(); + return LoadIndexResponse.builder() + .withMetadataLocation(metadata.metadataFileLocation()) + .withMetadata(metadata) + .build(); + } + + /** + * List indexes for a table. + * + * @param catalog the index catalog + * @param tableIdentifier the table identifier + * @return the list indexes response + */ + public static ListIndexesResponse listIndexes( + IndexCatalog catalog, TableIdentifier tableIdentifier) { + List identifiers = + catalog.listIndexes(tableIdentifier).stream() + .map(IndexDefinition::id) + .collect(Collectors.toList()); + return ListIndexesResponse.builder().addAll(identifiers).build(); + } + + /** + * List indexes for a table with pagination. + * + * @param catalog the index catalog + * @param tableIdentifier the table identifier + * @param pageToken the page token + * @param pageSize the page size + * @return the list indexes response + */ + public static ListIndexesResponse listIndexes( + IndexCatalog catalog, TableIdentifier tableIdentifier, String pageToken, String pageSize) { + List identifiers = + catalog.listIndexes(tableIdentifier).stream() + .map(IndexDefinition::id) + .collect(Collectors.toList()); + + Pair, String> page = + paginate(identifiers, pageToken, Integer.parseInt(pageSize)); + + return ListIndexesResponse.builder().addAll(page.first()).nextPageToken(page.second()).build(); + } + + /** + * Create an index on a table. + * + * @param catalog the index catalog + * @param tableIdentifier the table identifier + * @param request the create index request + * @return the load index response + */ + public static LoadIndexResponse createIndex( + IndexCatalog catalog, TableIdentifier tableIdentifier, CreateIndexRequest request) { + request.validate(); + + IndexIdentifier indexIdentifier = IndexIdentifier.of(tableIdentifier, request.name()); + IndexBuilder builder = + catalog + .buildIndex(indexIdentifier) + .withTableUuid(UUID.fromString(request.tableUuid())) + .withType(request.type()) + .withIndexColumnIds(request.indexColumnIds()) + .withOptimizedColumnIds(request.optimizedColumnIds()) + .withLocation(request.location()) + .withProperties(request.properties()); + + if (request.tableSnapshotId() != null) { + builder.withTableSnapshotId(request.tableSnapshotId()); + } + + if (request.indexSnapshotId() != null) { + builder.withIndexSnapshotId(request.indexSnapshotId()); + } + + if (!request.snapshotProperties().isEmpty()) { + builder.withSnapshotProperties(request.snapshotProperties()); + } + + return indexResponse(builder.create()); + } + + /** + * Check if an index exists. + * + * @param catalog the index catalog + * @param indexIdentifier the index identifier + * @throws NoSuchIndexException if the index does not exist + */ + public static void indexExists(IndexCatalog catalog, IndexIdentifier indexIdentifier) { + if (!catalog.indexExists(indexIdentifier)) { + throw new NoSuchIndexException("Index does not exist: %s", indexIdentifier); + } + } + + /** + * Load an index. + * + * @param catalog the index catalog + * @param indexIdentifier the index identifier + * @return the load index response + */ + public static LoadIndexResponse loadIndex(IndexCatalog catalog, IndexIdentifier indexIdentifier) { + org.apache.iceberg.index.Index index = catalog.loadIndex(indexIdentifier); + return indexResponse(index); + } + + /** + * Update an index. + * + * @param catalog the index catalog + * @param indexIdentifier the index identifier + * @param request the update index request + * @return the load index response + */ + public static LoadIndexResponse updateIndex( + IndexCatalog catalog, IndexIdentifier indexIdentifier, UpdateIndexRequest request) { + Index index = catalog.loadIndex(indexIdentifier); + IndexMetadata metadata = commit(asBaseIndex(index).operations(), request); + + return LoadIndexResponse.builder() + .withMetadataLocation(metadata.metadataFileLocation()) + .withMetadata(metadata) + .build(); + } + + /** + * Drop an index. + * + * @param catalog the index catalog + * @param indexIdentifier the index identifier + */ + public static void dropIndex(IndexCatalog catalog, IndexIdentifier indexIdentifier) { + boolean dropped = catalog.dropIndex(indexIdentifier); + if (!dropped) { + throw new NoSuchIndexException("Index does not exist: %s", indexIdentifier); + } + } + + /** + * Register an index from an existing metadata file. + * + * @param catalog the index catalog + * @param tableIdentifier the table identifier + * @param request the register index request + * @return the load index response + */ + public static LoadIndexResponse registerIndex( + IndexCatalog catalog, TableIdentifier tableIdentifier, RegisterIndexRequest request) { + IndexIdentifier indexIdentifier = IndexIdentifier.of(tableIdentifier, request.name()); + Index index = catalog.registerIndex(indexIdentifier, request.metadataLocation()); + return indexResponse(index); + } + + /** + * Commit changes to an index. + * + * @param ops the index operations + * @param request the update index request + * @return the updated index metadata + */ + static IndexMetadata commit(IndexOperations ops, UpdateIndexRequest request) { + AtomicBoolean isRetry = new AtomicBoolean(false); + try { + Tasks.foreach(ops) + .retry(COMMIT_NUM_RETRIES_DEFAULT) + .exponentialBackoff( + COMMIT_MIN_RETRY_WAIT_MS_DEFAULT, + COMMIT_MAX_RETRY_WAIT_MS_DEFAULT, + COMMIT_TOTAL_RETRY_TIME_MS_DEFAULT, + 2.0 /* exponential */) + .run( + taskOps -> { + IndexMetadata base = isRetry.get() ? taskOps.refresh() : taskOps.current(); + isRetry.set(true); + + // validate requirements + try { + request.requirements().forEach(requirement -> requirement.validate(base)); + } catch (CommitFailedException e) { + // wrap and rethrow outside of tasks to avoid unnecessary retry + throw new ValidationFailureException(e); + } + + // apply changes + IndexMetadata.Builder metadataBuilder = IndexMetadata.buildFrom(base); + request.updates().forEach(update -> update.applyTo(metadataBuilder)); + + IndexMetadata updated = metadataBuilder.build(); + + if (updated.changes().isEmpty()) { + // do not commit if the metadata has not changed + return; + } + + // commit + taskOps.commit(base, updated); + }); + + } catch (ValidationFailureException e) { + throw e.wrapped(); + } + + return ops.current(); + } + static ViewMetadata commit(ViewOperations ops, UpdateTableRequest request) { AtomicBoolean isRetry = new AtomicBoolean(false); try { diff --git a/core/src/main/java/org/apache/iceberg/rest/Endpoint.java b/core/src/main/java/org/apache/iceberg/rest/Endpoint.java index c2369a0fa57d..7eb2d92512f0 100644 --- a/core/src/main/java/org/apache/iceberg/rest/Endpoint.java +++ b/core/src/main/java/org/apache/iceberg/rest/Endpoint.java @@ -89,6 +89,16 @@ public class Endpoint { public static final Endpoint V1_REGISTER_VIEW = Endpoint.create("POST", ResourcePaths.V1_VIEW_REGISTER); + // index endpoints + public static final Endpoint V1_LIST_INDEXES = Endpoint.create("GET", ResourcePaths.V1_INDEXES); + public static final Endpoint V1_CREATE_INDEX = Endpoint.create("POST", ResourcePaths.V1_INDEXES); + public static final Endpoint V1_LOAD_INDEX = Endpoint.create("GET", ResourcePaths.V1_INDEX); + public static final Endpoint V1_UPDATE_INDEX = Endpoint.create("POST", ResourcePaths.V1_INDEX); + public static final Endpoint V1_DELETE_INDEX = Endpoint.create("DELETE", ResourcePaths.V1_INDEX); + public static final Endpoint V1_INDEX_EXISTS = Endpoint.create("HEAD", ResourcePaths.V1_INDEX); + public static final Endpoint V1_REGISTER_INDEX = + Endpoint.create("POST", ResourcePaths.V1_INDEX_REGISTER); + private static final Splitter ENDPOINT_SPLITTER = Splitter.on(" "); private static final Joiner ENDPOINT_JOINER = Joiner.on(" "); private final String httpMethod; diff --git a/core/src/main/java/org/apache/iceberg/rest/ErrorHandlers.java b/core/src/main/java/org/apache/iceberg/rest/ErrorHandlers.java index 791eb732bb7c..8597e40c21fc 100644 --- a/core/src/main/java/org/apache/iceberg/rest/ErrorHandlers.java +++ b/core/src/main/java/org/apache/iceberg/rest/ErrorHandlers.java @@ -25,6 +25,7 @@ import org.apache.iceberg.exceptions.CommitStateUnknownException; import org.apache.iceberg.exceptions.ForbiddenException; import org.apache.iceberg.exceptions.NamespaceNotEmptyException; +import org.apache.iceberg.exceptions.NoSuchIndexException; import org.apache.iceberg.exceptions.NoSuchNamespaceException; import org.apache.iceberg.exceptions.NoSuchPlanIdException; import org.apache.iceberg.exceptions.NoSuchPlanTaskException; @@ -72,6 +73,14 @@ public static Consumer viewCommitHandler() { return ViewCommitErrorHandler.INSTANCE; } + public static Consumer indexErrorHandler() { + return IndexErrorHandler.INSTANCE; + } + + public static Consumer indexCommitHandler() { + return IndexCommitErrorHandler.INSTANCE; + } + public static Consumer tableCommitHandler() { return CommitErrorHandler.INSTANCE; } @@ -257,6 +266,58 @@ public void accept(ErrorResponse error) { } } + /** Index level error handler. */ + private static class IndexErrorHandler extends DefaultErrorHandler { + private static final ErrorHandler INSTANCE = new IndexErrorHandler(); + + @Override + public void accept(ErrorResponse error) { + switch (error.code()) { + case 404: + if (NoSuchNamespaceException.class.getSimpleName().equals(error.type())) { + throw new NoSuchNamespaceException("%s", error.message()); + } else if (NoSuchTableException.class.getSimpleName().equals(error.type())) { + throw new NoSuchTableException("%s", error.message()); + } else { + throw new NoSuchIndexException("%s", error.message()); + } + case 409: + throw new AlreadyExistsException("%s", error.message()); + } + + super.accept(error); + } + } + + /** Index commit error handler. */ + private static class IndexCommitErrorHandler extends DefaultErrorHandler { + private static final ErrorHandler INSTANCE = new IndexCommitErrorHandler(); + + @Override + public void accept(ErrorResponse error) { + switch (error.code()) { + case 404: + if (NoSuchNamespaceException.class.getSimpleName().equals(error.type())) { + throw new NoSuchNamespaceException("%s", error.message()); + } else if (NoSuchTableException.class.getSimpleName().equals(error.type())) { + throw new NoSuchTableException("%s", error.message()); + } else { + throw new NoSuchIndexException("%s", error.message()); + } + case 409: + throw new CommitFailedException("Commit failed: %s", error.message()); + case 500: + case 502: + case 503: + case 504: + throw new CommitStateUnknownException( + new ServiceFailureException("Service failed: %s: %s", error.code(), error.message())); + } + + super.accept(error); + } + } + /** Request error handler specifically for create-read-update ops on namespaces. */ private static class NamespaceErrorHandler extends DefaultErrorHandler { private static final ErrorHandler INSTANCE = new NamespaceErrorHandler(); diff --git a/core/src/main/java/org/apache/iceberg/rest/RESTCatalog.java b/core/src/main/java/org/apache/iceberg/rest/RESTCatalog.java index 895336b1ad3f..72e435e5e17f 100644 --- a/core/src/main/java/org/apache/iceberg/rest/RESTCatalog.java +++ b/core/src/main/java/org/apache/iceberg/rest/RESTCatalog.java @@ -30,6 +30,8 @@ import org.apache.iceberg.Table; import org.apache.iceberg.Transaction; import org.apache.iceberg.catalog.Catalog; +import org.apache.iceberg.catalog.IndexCatalog; +import org.apache.iceberg.catalog.IndexIdentifier; import org.apache.iceberg.catalog.Namespace; import org.apache.iceberg.catalog.SessionCatalog; import org.apache.iceberg.catalog.SupportsNamespaces; @@ -39,18 +41,28 @@ import org.apache.iceberg.exceptions.NamespaceNotEmptyException; import org.apache.iceberg.exceptions.NoSuchNamespaceException; import org.apache.iceberg.hadoop.Configurable; +import org.apache.iceberg.index.Index; +import org.apache.iceberg.index.IndexBuilder; +import org.apache.iceberg.index.IndexDefinition; +import org.apache.iceberg.index.IndexType; import org.apache.iceberg.relocated.com.google.common.base.Preconditions; import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList; import org.apache.iceberg.view.View; import org.apache.iceberg.view.ViewBuilder; public class RESTCatalog - implements Catalog, ViewCatalog, SupportsNamespaces, Configurable, Closeable { + implements Catalog, + ViewCatalog, + SupportsNamespaces, + Configurable, + Closeable, + IndexCatalog { private final RESTSessionCatalog sessionCatalog; private final Catalog delegate; private final SupportsNamespaces nsDelegate; private final SessionCatalog.SessionContext context; private final ViewCatalog viewSessionCatalog; + private final IndexCatalog indexSessionCatalog; public RESTCatalog() { this( @@ -74,6 +86,7 @@ public RESTCatalog( this.nsDelegate = (SupportsNamespaces) delegate; this.context = context; this.viewSessionCatalog = sessionCatalog.asViewCatalog(context); + this.indexSessionCatalog = sessionCatalog.asIndexCatalog(context); } /** @@ -334,4 +347,41 @@ public void invalidateView(TableIdentifier identifier) { public View registerView(TableIdentifier identifier, String metadataFileLocation) { return viewSessionCatalog.registerView(identifier, metadataFileLocation); } + + // IndexCatalog delegate methods + + @Override + public List listIndexes(TableIdentifier tableIdentifier, IndexType... types) { + return indexSessionCatalog.listIndexes(tableIdentifier, types); + } + + @Override + public Index loadIndex(IndexIdentifier identifier) { + return indexSessionCatalog.loadIndex(identifier); + } + + @Override + public boolean indexExists(IndexIdentifier identifier) { + return indexSessionCatalog.indexExists(identifier); + } + + @Override + public IndexBuilder buildIndex(IndexIdentifier identifier) { + return indexSessionCatalog.buildIndex(identifier); + } + + @Override + public boolean dropIndex(IndexIdentifier identifier) { + return indexSessionCatalog.dropIndex(identifier); + } + + @Override + public void invalidateIndex(IndexIdentifier identifier) { + indexSessionCatalog.invalidateIndex(identifier); + } + + @Override + public Index registerIndex(IndexIdentifier identifier, String metadataFileLocation) { + return indexSessionCatalog.registerIndex(identifier, metadataFileLocation); + } } diff --git a/core/src/main/java/org/apache/iceberg/rest/RESTCatalogProperties.java b/core/src/main/java/org/apache/iceberg/rest/RESTCatalogProperties.java index 7281862481dd..ccbe4cb51c88 100644 --- a/core/src/main/java/org/apache/iceberg/rest/RESTCatalogProperties.java +++ b/core/src/main/java/org/apache/iceberg/rest/RESTCatalogProperties.java @@ -36,6 +36,11 @@ private RESTCatalogProperties() {} public static final String VIEW_ENDPOINTS_SUPPORTED = "view-endpoints-supported"; public static final boolean VIEW_ENDPOINTS_SUPPORTED_DEFAULT = false; + // for backwards compatibility with older REST servers where it can be assumed that a particular + // server supports index endpoints but doesn't send the "endpoints" field in the ConfigResponse + public static final String INDEX_ENDPOINTS_SUPPORTED = "index-endpoints-supported"; + public static final boolean INDEX_ENDPOINTS_SUPPORTED_DEFAULT = false; + public static final String PAGE_SIZE = "rest-page-size"; public static final String NAMESPACE_SEPARATOR = "namespace-separator"; diff --git a/core/src/main/java/org/apache/iceberg/rest/RESTIndexOperations.java b/core/src/main/java/org/apache/iceberg/rest/RESTIndexOperations.java new file mode 100644 index 000000000000..c0ca48e40ca0 --- /dev/null +++ b/core/src/main/java/org/apache/iceberg/rest/RESTIndexOperations.java @@ -0,0 +1,124 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.rest; + +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.Set; +import java.util.function.Supplier; +import org.apache.iceberg.index.IndexMetadata; +import org.apache.iceberg.index.IndexOperations; +import org.apache.iceberg.index.IndexRequirement; +import org.apache.iceberg.index.IndexUpdate; +import org.apache.iceberg.relocated.com.google.common.base.Preconditions; +import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList; +import org.apache.iceberg.rest.requests.UpdateIndexRequest; +import org.apache.iceberg.rest.responses.LoadIndexResponse; + +/** + * REST implementation of {@link IndexOperations}. + * + *

This class communicates with a REST catalog server to perform index metadata operations such + * as loading, refreshing, and committing updates. + */ +class RESTIndexOperations implements IndexOperations { + private final RESTClient client; + private final String path; + private final Supplier> readHeaders; + private final Supplier> mutationHeaders; + private final Set endpoints; + private IndexMetadata current; + + RESTIndexOperations( + RESTClient client, + String path, + Supplier> headers, + IndexMetadata current, + Set endpoints) { + this(client, path, headers, headers, current, endpoints); + } + + RESTIndexOperations( + RESTClient client, + String path, + Supplier> readHeaders, + Supplier> mutationHeaders, + IndexMetadata current, + Set endpoints) { + Preconditions.checkArgument(null != current, "Invalid index metadata: null"); + this.client = client; + this.path = path; + this.readHeaders = readHeaders; + this.mutationHeaders = mutationHeaders; + this.current = current; + this.endpoints = endpoints; + } + + @Override + public IndexMetadata current() { + return current; + } + + @Override + public IndexMetadata refresh() { + Endpoint.check(endpoints, Endpoint.V1_LOAD_INDEX); + return updateCurrentMetadata( + client.get(path, LoadIndexResponse.class, readHeaders, ErrorHandlers.indexErrorHandler())); + } + + @Override + public void commit(IndexMetadata base, IndexMetadata metadata) { + Endpoint.check(endpoints, Endpoint.V1_UPDATE_INDEX); + // this is only used for updating index metadata + Preconditions.checkState(base != null, "Invalid base metadata: null"); + + // Get updates from the metadata changes + List updates = metadata.changes(); + + // Skip the commit if there are no changes + if (updates.isEmpty()) { + return; + } + + // Build requirements based on the base metadata + List requirements = + ImmutableList.of(new IndexRequirement.AssertIndexUUID(base.uuid())); + + UpdateIndexRequest request = UpdateIndexRequest.create(null, requirements, updates); + + LoadIndexResponse response = + client.post( + path, + request, + LoadIndexResponse.class, + mutationHeaders, + ErrorHandlers.indexCommitHandler()); + + updateCurrentMetadata(response); + } + + private IndexMetadata updateCurrentMetadata(LoadIndexResponse response) { + if (!Objects.equals(current.metadataFileLocation(), response.metadataLocation())) { + this.current = response.metadata(); + } + + return current; + } +} diff --git a/core/src/main/java/org/apache/iceberg/rest/RESTSerializers.java b/core/src/main/java/org/apache/iceberg/rest/RESTSerializers.java index a429018c33d5..e6356f5c244f 100644 --- a/core/src/main/java/org/apache/iceberg/rest/RESTSerializers.java +++ b/core/src/main/java/org/apache/iceberg/rest/RESTSerializers.java @@ -40,28 +40,37 @@ import org.apache.iceberg.TableMetadataParser; import org.apache.iceberg.UnboundPartitionSpec; import org.apache.iceberg.UnboundSortOrder; +import org.apache.iceberg.catalog.IndexIdentifier; +import org.apache.iceberg.catalog.IndexIdentifierParser; import org.apache.iceberg.catalog.Namespace; import org.apache.iceberg.catalog.TableIdentifier; import org.apache.iceberg.catalog.TableIdentifierParser; import org.apache.iceberg.rest.auth.OAuth2Util; import org.apache.iceberg.rest.requests.CommitTransactionRequest; import org.apache.iceberg.rest.requests.CommitTransactionRequestParser; +import org.apache.iceberg.rest.requests.CreateIndexRequest; +import org.apache.iceberg.rest.requests.CreateIndexRequestParser; import org.apache.iceberg.rest.requests.CreateViewRequest; import org.apache.iceberg.rest.requests.CreateViewRequestParser; import org.apache.iceberg.rest.requests.FetchScanTasksRequest; import org.apache.iceberg.rest.requests.FetchScanTasksRequestParser; import org.apache.iceberg.rest.requests.ImmutableCreateViewRequest; +import org.apache.iceberg.rest.requests.ImmutableRegisterIndexRequest; import org.apache.iceberg.rest.requests.ImmutableRegisterTableRequest; import org.apache.iceberg.rest.requests.ImmutableRegisterViewRequest; import org.apache.iceberg.rest.requests.ImmutableReportMetricsRequest; import org.apache.iceberg.rest.requests.PlanTableScanRequest; import org.apache.iceberg.rest.requests.PlanTableScanRequestParser; +import org.apache.iceberg.rest.requests.RegisterIndexRequest; +import org.apache.iceberg.rest.requests.RegisterIndexRequestParser; import org.apache.iceberg.rest.requests.RegisterTableRequest; import org.apache.iceberg.rest.requests.RegisterTableRequestParser; import org.apache.iceberg.rest.requests.RegisterViewRequest; import org.apache.iceberg.rest.requests.RegisterViewRequestParser; import org.apache.iceberg.rest.requests.ReportMetricsRequest; import org.apache.iceberg.rest.requests.ReportMetricsRequestParser; +import org.apache.iceberg.rest.requests.UpdateIndexRequest; +import org.apache.iceberg.rest.requests.UpdateIndexRequestParser; import org.apache.iceberg.rest.requests.UpdateTableRequest; import org.apache.iceberg.rest.requests.UpdateTableRequestParser; import org.apache.iceberg.rest.responses.ConfigResponse; @@ -76,6 +85,8 @@ import org.apache.iceberg.rest.responses.ImmutableLoadViewResponse; import org.apache.iceberg.rest.responses.LoadCredentialsResponse; import org.apache.iceberg.rest.responses.LoadCredentialsResponseParser; +import org.apache.iceberg.rest.responses.LoadIndexResponse; +import org.apache.iceberg.rest.responses.LoadIndexResponseParser; import org.apache.iceberg.rest.responses.LoadTableResponse; import org.apache.iceberg.rest.responses.LoadTableResponseParser; import org.apache.iceberg.rest.responses.LoadViewResponse; @@ -96,6 +107,8 @@ public static void registerAll(ObjectMapper mapper) { .addDeserializer(ErrorResponse.class, new ErrorResponseDeserializer()) .addSerializer(TableIdentifier.class, new TableIdentifierSerializer()) .addDeserializer(TableIdentifier.class, new TableIdentifierDeserializer()) + .addSerializer(IndexIdentifier.class, new IndexIdentifierSerializer()) + .addDeserializer(IndexIdentifier.class, new IndexIdentifierDeserializer()) .addSerializer(Namespace.class, new NamespaceSerializer()) .addDeserializer(Namespace.class, new NamespaceDeserializer()) .addSerializer(Schema.class, new SchemaSerializer()) @@ -160,7 +173,18 @@ public static void registerAll(ObjectMapper mapper) { ImmutableLoadCredentialsResponse.class, new LoadCredentialsResponseSerializer<>()) .addDeserializer(LoadCredentialsResponse.class, new LoadCredentialsResponseDeserializer<>()) .addDeserializer( - ImmutableLoadCredentialsResponse.class, new LoadCredentialsResponseDeserializer<>()); + ImmutableLoadCredentialsResponse.class, new LoadCredentialsResponseDeserializer<>()) + .addSerializer(CreateIndexRequest.class, new CreateIndexRequestSerializer<>()) + .addDeserializer(CreateIndexRequest.class, new CreateIndexRequestDeserializer<>()) + .addSerializer(UpdateIndexRequest.class, new UpdateIndexRequestSerializer()) + .addDeserializer(UpdateIndexRequest.class, new UpdateIndexRequestDeserializer()) + .addSerializer(LoadIndexResponse.class, new LoadIndexResponseSerializer<>()) + .addDeserializer(LoadIndexResponse.class, new LoadIndexResponseDeserializer<>()) + .addSerializer(RegisterIndexRequest.class, new RegisterIndexRequestSerializer<>()) + .addDeserializer(RegisterIndexRequest.class, new RegisterIndexRequestDeserializer<>()) + .addSerializer(ImmutableRegisterIndexRequest.class, new RegisterIndexRequestSerializer<>()) + .addDeserializer( + ImmutableRegisterIndexRequest.class, new RegisterIndexRequestDeserializer<>()); mapper.registerModule(module); } @@ -273,6 +297,24 @@ public void serialize( } } + public static class IndexIdentifierDeserializer extends JsonDeserializer { + @Override + public IndexIdentifier deserialize(JsonParser p, DeserializationContext context) + throws IOException { + JsonNode jsonNode = p.getCodec().readTree(p); + return IndexIdentifierParser.fromJson(jsonNode); + } + } + + public static class IndexIdentifierSerializer extends JsonSerializer { + @Override + public void serialize( + IndexIdentifier identifier, JsonGenerator gen, SerializerProvider serializers) + throws IOException { + IndexIdentifierParser.toJson(identifier, gen); + } + } + public static class SchemaDeserializer extends JsonDeserializer { @Override public Schema deserialize(JsonParser p, DeserializationContext context) throws IOException { @@ -650,4 +692,75 @@ boolean isCaseSensitive() { return caseSensitive; } } + + static class CreateIndexRequestSerializer + extends JsonSerializer { + @Override + public void serialize(T request, JsonGenerator gen, SerializerProvider serializers) + throws IOException { + CreateIndexRequestParser.toJson(request, gen); + } + } + + static class CreateIndexRequestDeserializer + extends JsonDeserializer { + @Override + public T deserialize(JsonParser p, DeserializationContext context) throws IOException { + JsonNode jsonNode = p.getCodec().readTree(p); + return (T) CreateIndexRequestParser.fromJson(jsonNode); + } + } + + static class UpdateIndexRequestSerializer extends JsonSerializer { + @Override + public void serialize( + UpdateIndexRequest request, JsonGenerator gen, SerializerProvider serializers) + throws IOException { + UpdateIndexRequestParser.toJson(request, gen); + } + } + + static class UpdateIndexRequestDeserializer extends JsonDeserializer { + @Override + public UpdateIndexRequest deserialize(JsonParser p, DeserializationContext context) + throws IOException { + JsonNode jsonNode = p.getCodec().readTree(p); + return UpdateIndexRequestParser.fromJson(jsonNode); + } + } + + static class LoadIndexResponseSerializer extends JsonSerializer { + @Override + public void serialize(T response, JsonGenerator gen, SerializerProvider serializers) + throws IOException { + LoadIndexResponseParser.toJson(response, gen); + } + } + + static class LoadIndexResponseDeserializer + extends JsonDeserializer { + @Override + public T deserialize(JsonParser p, DeserializationContext context) throws IOException { + JsonNode jsonNode = p.getCodec().readTree(p); + return (T) LoadIndexResponseParser.fromJson(jsonNode); + } + } + + static class RegisterIndexRequestSerializer + extends JsonSerializer { + @Override + public void serialize(T request, JsonGenerator gen, SerializerProvider serializers) + throws IOException { + RegisterIndexRequestParser.toJson(request, gen); + } + } + + static class RegisterIndexRequestDeserializer + extends JsonDeserializer { + @Override + public T deserialize(JsonParser p, DeserializationContext context) throws IOException { + JsonNode jsonNode = p.getCodec().readTree(p); + return (T) RegisterIndexRequestParser.fromJson(jsonNode); + } + } } diff --git a/core/src/main/java/org/apache/iceberg/rest/RESTSessionCatalog.java b/core/src/main/java/org/apache/iceberg/rest/RESTSessionCatalog.java index cda71fccda3a..dfc272171a7e 100644 --- a/core/src/main/java/org/apache/iceberg/rest/RESTSessionCatalog.java +++ b/core/src/main/java/org/apache/iceberg/rest/RESTSessionCatalog.java @@ -25,6 +25,7 @@ import java.util.Locale; import java.util.Map; import java.util.Set; +import java.util.UUID; import java.util.function.BiFunction; import java.util.function.Consumer; import java.util.function.Function; @@ -46,16 +47,30 @@ import org.apache.iceberg.TableOperations; import org.apache.iceberg.Transaction; import org.apache.iceberg.Transactions; -import org.apache.iceberg.catalog.BaseViewSessionCatalog; +import org.apache.iceberg.catalog.BaseIndexSessionCatalog; import org.apache.iceberg.catalog.Catalog; +import org.apache.iceberg.catalog.IndexIdentifier; import org.apache.iceberg.catalog.Namespace; import org.apache.iceberg.catalog.TableCommit; import org.apache.iceberg.catalog.TableIdentifier; import org.apache.iceberg.exceptions.AlreadyExistsException; +import org.apache.iceberg.exceptions.NoSuchIndexException; import org.apache.iceberg.exceptions.NoSuchNamespaceException; import org.apache.iceberg.exceptions.NoSuchTableException; import org.apache.iceberg.exceptions.NoSuchViewException; import org.apache.iceberg.hadoop.Configurable; +import org.apache.iceberg.index.BaseIndex; +import org.apache.iceberg.index.ImmutableIndexSnapshot; +import org.apache.iceberg.index.ImmutableIndexSummary; +import org.apache.iceberg.index.ImmutableIndexVersion; +import org.apache.iceberg.index.Index; +import org.apache.iceberg.index.IndexBuilder; +import org.apache.iceberg.index.IndexDefinition; +import org.apache.iceberg.index.IndexMetadata; +import org.apache.iceberg.index.IndexRequirements; +import org.apache.iceberg.index.IndexSnapshot; +import org.apache.iceberg.index.IndexType; +import org.apache.iceberg.index.IndexVersion; import org.apache.iceberg.io.CloseableGroup; import org.apache.iceberg.io.FileIO; import org.apache.iceberg.io.FileIOTracker; @@ -77,22 +92,28 @@ import org.apache.iceberg.rest.auth.AuthSession; import org.apache.iceberg.rest.credentials.Credential; import org.apache.iceberg.rest.requests.CommitTransactionRequest; +import org.apache.iceberg.rest.requests.CreateIndexRequest; import org.apache.iceberg.rest.requests.CreateNamespaceRequest; import org.apache.iceberg.rest.requests.CreateTableRequest; import org.apache.iceberg.rest.requests.CreateViewRequest; import org.apache.iceberg.rest.requests.ImmutableCreateViewRequest; +import org.apache.iceberg.rest.requests.ImmutableRegisterIndexRequest; import org.apache.iceberg.rest.requests.ImmutableRegisterTableRequest; import org.apache.iceberg.rest.requests.ImmutableRegisterViewRequest; +import org.apache.iceberg.rest.requests.RegisterIndexRequest; import org.apache.iceberg.rest.requests.RegisterTableRequest; import org.apache.iceberg.rest.requests.RegisterViewRequest; import org.apache.iceberg.rest.requests.RenameTableRequest; +import org.apache.iceberg.rest.requests.UpdateIndexRequest; import org.apache.iceberg.rest.requests.UpdateNamespacePropertiesRequest; import org.apache.iceberg.rest.requests.UpdateTableRequest; import org.apache.iceberg.rest.responses.ConfigResponse; import org.apache.iceberg.rest.responses.CreateNamespaceResponse; import org.apache.iceberg.rest.responses.GetNamespaceResponse; +import org.apache.iceberg.rest.responses.ListIndexesResponse; import org.apache.iceberg.rest.responses.ListNamespacesResponse; import org.apache.iceberg.rest.responses.ListTablesResponse; +import org.apache.iceberg.rest.responses.LoadIndexResponse; import org.apache.iceberg.rest.responses.LoadTableResponse; import org.apache.iceberg.rest.responses.LoadViewResponse; import org.apache.iceberg.rest.responses.UpdateNamespacePropertiesResponse; @@ -110,7 +131,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -public class RESTSessionCatalog extends BaseViewSessionCatalog +public class RESTSessionCatalog extends BaseIndexSessionCatalog implements Configurable, Closeable { private static final Logger LOG = LoggerFactory.getLogger(RESTSessionCatalog.class); private static final String DEFAULT_FILE_IO_IMPL = "org.apache.iceberg.io.ResolvingFileIO"; @@ -153,6 +174,18 @@ public class RESTSessionCatalog extends BaseViewSessionCatalog .add(Endpoint.V1_RENAME_VIEW) .build(); + // these index endpoints must not be updated in order to maintain backwards compatibility with + // legacy servers + private static final Set INDEX_ENDPOINTS = + ImmutableSet.builder() + .add(Endpoint.V1_LIST_INDEXES) + .add(Endpoint.V1_LOAD_INDEX) + .add(Endpoint.V1_CREATE_INDEX) + .add(Endpoint.V1_UPDATE_INDEX) + .add(Endpoint.V1_DELETE_INDEX) + .add(Endpoint.V1_INDEX_EXISTS) + .build(); + private final Function, RESTClient> clientBuilder; private final BiFunction, FileIO> ioBuilder; private FileIOTracker fileIOTracker = null; @@ -222,16 +255,24 @@ public void initialize(String name, Map unresolved) { } if (config.endpoints().isEmpty()) { - this.endpoints = - PropertyUtil.propertyAsBoolean( - mergedProps, - RESTCatalogProperties.VIEW_ENDPOINTS_SUPPORTED, - RESTCatalogProperties.VIEW_ENDPOINTS_SUPPORTED_DEFAULT) - ? ImmutableSet.builder() - .addAll(DEFAULT_ENDPOINTS) - .addAll(VIEW_ENDPOINTS) - .build() - : DEFAULT_ENDPOINTS; + ImmutableSet.Builder endpointsBuilder = ImmutableSet.builder(); + endpointsBuilder.addAll(DEFAULT_ENDPOINTS); + + if (PropertyUtil.propertyAsBoolean( + mergedProps, + RESTCatalogProperties.VIEW_ENDPOINTS_SUPPORTED, + RESTCatalogProperties.VIEW_ENDPOINTS_SUPPORTED_DEFAULT)) { + endpointsBuilder.addAll(VIEW_ENDPOINTS); + } + + if (PropertyUtil.propertyAsBoolean( + mergedProps, + RESTCatalogProperties.INDEX_ENDPOINTS_SUPPORTED, + RESTCatalogProperties.INDEX_ENDPOINTS_SUPPORTED_DEFAULT)) { + endpointsBuilder.addAll(INDEX_ENDPOINTS); + } + + this.endpoints = endpointsBuilder.build(); } else { this.endpoints = ImmutableSet.copyOf(config.endpoints()); } @@ -1752,4 +1793,552 @@ private View replace(LoadViewResponse response) { return new BaseView(ops, ViewUtil.fullViewName(name(), identifier)); } } + + @Override + public List listIndexes( + SessionContext context, TableIdentifier tableIdentifier, IndexType... types) { + if (!endpoints.contains(Endpoint.V1_LIST_INDEXES)) { + return ImmutableList.of(); + } + + checkIdentifierIsValid(tableIdentifier); + Map queryParams = Maps.newHashMap(); + ImmutableList.Builder indexes = ImmutableList.builder(); + String pageToken = ""; + if (pageSize != null) { + queryParams.put("pageSize", String.valueOf(pageSize)); + } + + do { + queryParams.put("pageToken", pageToken); + AuthSession contextualSession = authManager.contextualSession(context, catalogAuth); + ListIndexesResponse response = + client + .withAuthSession(contextualSession) + .get( + paths.indexes(tableIdentifier), + queryParams, + ListIndexesResponse.class, + Map.of(), + ErrorHandlers.tableErrorHandler()); + pageToken = response.nextPageToken(); + + // Convert IndexIdentifiers to IndexSummary by loading each index's metadata + for (IndexIdentifier indexId : response.identifiers()) { + try { + LoadIndexResponse indexResponse = + client + .withAuthSession(contextualSession) + .get( + paths.index(indexId), + LoadIndexResponse.class, + Map.of(), + ErrorHandlers.indexErrorHandler()); + + IndexMetadata metadata = indexResponse.metadata(); + IndexDefinition summary = + ImmutableIndexSummary.builder() + .id(indexId) + .type(metadata.type()) + .indexColumnIds( + metadata.indexColumnIds().stream().mapToInt(Integer::intValue).toArray()) + .optimizedColumnIds( + metadata.optimizedColumnIds().stream().mapToInt(Integer::intValue).toArray()) + .availableTableSnapshots( + metadata.snapshots().stream() + .mapToLong(IndexSnapshot::tableSnapshotId) + .toArray()) + .build(); + indexes.add(summary); + } catch (NoSuchIndexException e) { + // Index may have been deleted between list and load, skip it + LOG.debug("Index {} not found during list, skipping", indexId, e); + } + } + } while (pageToken != null); + + List allIndexes = indexes.build(); + + // Filter by types if specified + if (types == null || types.length == 0) { + return allIndexes; + } + + Set typeSet = ImmutableSet.copyOf(types); + return allIndexes.stream() + .filter(idx -> typeSet.contains(idx.type())) + .collect(Collectors.toList()); + } + + @Override + public Index loadIndex(SessionContext context, IndexIdentifier identifier) { + Endpoint.check( + endpoints, + Endpoint.V1_LOAD_INDEX, + () -> + new NoSuchIndexException( + "Unable to load index %s.%s: Server does not support endpoint %s", + name(), identifier, Endpoint.V1_LOAD_INDEX)); + + checkIndexIdentifierIsValid(identifier); + + AuthSession contextualSession = authManager.contextualSession(context, catalogAuth); + LoadIndexResponse response = + client + .withAuthSession(contextualSession) + .get( + paths.index(identifier), + LoadIndexResponse.class, + Map.of(), + ErrorHandlers.indexErrorHandler()); + + Map indexConf = response.config(); + AuthSession indexSession = + authManager.tableSession(identifier.tableIdentifier(), indexConf, contextualSession); + IndexMetadata metadata = response.metadata(); + + RESTIndexOperations ops = + newIndexOps( + client.withAuthSession(indexSession), + paths.index(identifier), + Map::of, + mutationHeaders, + metadata, + endpoints); + + return new BaseIndex(ops, fullIndexName(identifier)); + } + + @Override + public boolean indexExists(SessionContext context, IndexIdentifier identifier) { + try { + checkIndexIdentifierIsValid(identifier); + if (endpoints.contains(Endpoint.V1_INDEX_EXISTS)) { + AuthSession contextualSession = authManager.contextualSession(context, catalogAuth); + client + .withAuthSession(contextualSession) + .head(paths.index(identifier), Map.of(), ErrorHandlers.indexErrorHandler()); + return true; + } else { + // fallback: try to load the index + loadIndex(context, identifier); + return true; + } + } catch (NoSuchIndexException e) { + return false; + } + } + + @Override + public IndexBuilder buildIndex(SessionContext context, IndexIdentifier identifier) { + checkIndexIdentifierIsValid(identifier); + return new RESTIndexBuilder(context, identifier); + } + + @Override + public boolean dropIndex(SessionContext context, IndexIdentifier identifier) { + Endpoint.check(endpoints, Endpoint.V1_DELETE_INDEX); + checkIndexIdentifierIsValid(identifier); + + try { + AuthSession contextualSession = authManager.contextualSession(context, catalogAuth); + client + .withAuthSession(contextualSession) + .delete( + paths.index(identifier), null, mutationHeaders, ErrorHandlers.indexErrorHandler()); + return true; + } catch (NoSuchIndexException e) { + return false; + } + } + + @Override + public void invalidateIndex(SessionContext context, IndexIdentifier identifier) { + // Currently no caching for indexes, so nothing to invalidate + } + + @Override + public Index registerIndex( + SessionContext context, IndexIdentifier identifier, String metadataFileLocation) { + Endpoint.check(endpoints, Endpoint.V1_REGISTER_INDEX); + checkIndexIdentifierIsValid(identifier); + + Preconditions.checkArgument( + metadataFileLocation != null && !metadataFileLocation.isEmpty(), + "Cannot register an empty metadata file location as an index"); + + RegisterIndexRequest request = + ImmutableRegisterIndexRequest.builder() + .name(identifier.name()) + .metadataLocation(metadataFileLocation) + .build(); + + AuthSession contextualSession = authManager.contextualSession(context, catalogAuth); + LoadIndexResponse response = + client + .withAuthSession(contextualSession) + .post( + paths.registerIndex(identifier.tableIdentifier()), + request, + LoadIndexResponse.class, + mutationHeaders, + ErrorHandlers.indexErrorHandler()); + + RESTIndexOperations ops = + newIndexOps( + client.withAuthSession(contextualSession), + paths.index(identifier), + Map::of, + mutationHeaders, + response.metadata(), + endpoints); + + return new BaseIndex(ops, fullIndexName(identifier)); + } + + private void checkIndexIdentifierIsValid(IndexIdentifier identifier) { + if (identifier.tableIdentifier().namespace().isEmpty()) { + throw new NoSuchIndexException("Invalid index identifier: %s", identifier); + } + } + + private String fullIndexName(IndexIdentifier identifier) { + return String.format("%s.%s.%s", name(), identifier.tableIdentifier(), identifier.name()); + } + + /** + * Create a new {@link RESTIndexOperations} instance. + * + *

This method can be overridden in subclasses to provide custom index operations + * implementations. + * + * @param restClient the REST client to use for communicating with the catalog server + * @param path the REST path for the index + * @param readHeaders a supplier for additional HTTP headers to include in read requests + * (GET/HEAD) + * @param mutationHeaderSupplier a supplier for additional HTTP headers to include in mutation + * requests (POST/DELETE) + * @param current the current index metadata + * @param supportedEndpoints the set of supported REST endpoints + * @return a new RESTIndexOperations instance + */ + protected RESTIndexOperations newIndexOps( + RESTClient restClient, + String path, + Supplier> readHeaders, + Supplier> mutationHeaderSupplier, + IndexMetadata current, + Set supportedEndpoints) { + return new RESTIndexOperations( + restClient, path, readHeaders, mutationHeaderSupplier, current, supportedEndpoints); + } + + private class RESTIndexBuilder implements IndexBuilder { + private final SessionContext context; + private final IndexIdentifier identifier; + private Map properties = null; + private Map snapshotProperties = null; + private UUID tableUuid = null; + private Set snapshotIdsToRemove = null; + private IndexType type = null; + private List indexColumnIds = null; + private List optimizedColumnIds = null; + private String location = null; + private long tableSnapshotId = -1L; + private long indexSnapshotId = -1L; + + @SuppressWarnings("UnusedVariable") + private RESTIndexBuilder(SessionContext context, IndexIdentifier identifier) { + this.context = context; + this.identifier = identifier; + } + + @Override + public IndexBuilder withTableUuid(UUID uuid) { + this.tableUuid = uuid; + return this; + } + + @Override + public IndexBuilder withType(IndexType indexType) { + this.type = indexType; + return this; + } + + @Override + public IndexBuilder withIndexColumnIds(List columnIds) { + this.indexColumnIds = Lists.newArrayList(columnIds); + return this; + } + + @Override + public IndexBuilder withIndexColumnIds(int... columnIds) { + this.indexColumnIds = Lists.newArrayList(); + for (int id : columnIds) { + this.indexColumnIds.add(id); + } + return this; + } + + @Override + public IndexBuilder withOptimizedColumnIds(List columnIds) { + this.optimizedColumnIds = Lists.newArrayList(columnIds); + return this; + } + + @Override + public IndexBuilder withOptimizedColumnIds(int... columnIds) { + this.optimizedColumnIds = Lists.newArrayList(); + for (int id : columnIds) { + this.optimizedColumnIds.add(id); + } + return this; + } + + @Override + public IndexBuilder withLocation(String indexLocation) { + this.location = indexLocation; + return this; + } + + @Override + public IndexBuilder withProperty(String key, String value) { + if (properties == null) { + this.properties = Maps.newHashMap(); + } + properties.put(key, value); + return this; + } + + @Override + public IndexBuilder withProperties(Map props) { + props.forEach(this::withProperty); + return this; + } + + @Override + public IndexBuilder withTableSnapshotId(long snapshotId) { + this.tableSnapshotId = snapshotId; + return this; + } + + @Override + public IndexBuilder withIndexSnapshotId(long snapshotId) { + this.indexSnapshotId = snapshotId; + return this; + } + + @Override + public IndexBuilder withSnapshotProperty(String key, String value) { + if (snapshotProperties == null) { + this.snapshotProperties = Maps.newHashMap(); + } + snapshotProperties.put(key, value); + return this; + } + + @Override + public IndexBuilder withSnapshotProperties(Map props) { + props.forEach(this::withSnapshotProperty); + return this; + } + + @Override + public IndexBuilder removeSnapshotById(long snapshotIdToRemove) { + if (snapshotIdsToRemove == null) { + this.snapshotIdsToRemove = new java.util.HashSet<>(); + } + + snapshotIdsToRemove.add(snapshotIdToRemove); + return this; + } + + @Override + public IndexBuilder removeSnapshotsByIds(Set snapshotIds) { + snapshotIds.forEach(this::removeSnapshotById); + return this; + } + + @Override + public IndexBuilder removeSnapshotsByIds(long... snapshotIds) { + for (long id : snapshotIds) { + removeSnapshotById(id); + } + return this; + } + + @Override + public Index create() { + Endpoint.check(endpoints, Endpoint.V1_CREATE_INDEX); + Preconditions.checkState(type != null, "Cannot create index without specifying a type"); + Preconditions.checkState( + indexColumnIds != null && !indexColumnIds.isEmpty(), + "Cannot create index without specifying index column ids"); + if (tableSnapshotId != -1 || indexSnapshotId != -1 || snapshotProperties != null) { + Preconditions.checkArgument( + tableSnapshotId != -1L, + "Cannot create index snapshot without specifying tableSnapshotId"); + Preconditions.checkArgument( + indexSnapshotId != -1L, + "Cannot create index snapshot without specifying indexSnapshotId"); + } + + CreateIndexRequest request = + CreateIndexRequest.builder() + .withTableUuid(tableUuid.toString()) + .withName(identifier.name()) + .withType(type) + .withIndexColumnIds(indexColumnIds) + .withOptimizedColumnIds( + optimizedColumnIds != null ? optimizedColumnIds : ImmutableList.of()) + .withLocation(location) + .setProperties(properties != null ? properties : ImmutableMap.of()) + .withTableSnapshotId(tableSnapshotId) + .withIndexSnapshotId(indexSnapshotId) + .setSnapshotProperties( + snapshotProperties != null ? snapshotProperties : ImmutableMap.of()) + .build(); + + AuthSession contextualSession = authManager.contextualSession(context, catalogAuth); + LoadIndexResponse response = + client + .withAuthSession(contextualSession) + .post( + paths.indexes(identifier.tableIdentifier()), + request, + LoadIndexResponse.class, + mutationHeaders, + ErrorHandlers.indexErrorHandler()); + + Map indexConf = response.config(); + AuthSession indexSession = + authManager.tableSession(identifier.tableIdentifier(), indexConf, contextualSession); + RESTIndexOperations ops = + newIndexOps( + client.withAuthSession(indexSession), + paths.index(identifier), + Map::of, + mutationHeaders, + response.metadata(), + endpoints); + + return new BaseIndex(ops, fullIndexName(identifier)); + } + + @Override + public Index replace() { + if (!indexExists(context, identifier)) { + throw new NoSuchIndexException("Index does not exist: %s", identifier); + } + + return replace(loadIndex()); + } + + @Override + public Index createOrReplace() { + try { + return replace(loadIndex()); + } catch (NoSuchIndexException e) { + return create(); + } + } + + private Index replace(LoadIndexResponse original) { + Endpoint.check(endpoints, Endpoint.V1_UPDATE_INDEX); + Preconditions.checkState(type == null, "Cannot update index type"); + Preconditions.checkState(indexColumnIds == null, "Cannot update index column ids"); + Preconditions.checkState(optimizedColumnIds == null, "Cannot update optimized column ids"); + + IndexMetadata metadata = original.metadata(); + + IndexMetadata.Builder builder = IndexMetadata.buildFrom(metadata); + + int currentVersionId = metadata.currentVersionId(); + if (properties != null) { + int maxVersionId = + metadata.versions().stream() + .map(IndexVersion::versionId) + .max(Integer::compareTo) + .orElseGet(metadata::currentVersionId); + + IndexVersion newVersion = + ImmutableIndexVersion.builder() + .versionId(maxVersionId + 1) + .timestampMillis(System.currentTimeMillis()) + .properties(properties) + .build(); + builder = builder.addVersion(newVersion).setCurrentVersion(newVersion.versionId()); + currentVersionId = newVersion.versionId(); + } + + if (tableSnapshotId != -1 || indexSnapshotId != -1 || snapshotProperties != null) { + Preconditions.checkArgument( + tableSnapshotId != -1L, + "Cannot create index snapshot without specifying tableSnapshotId"); + Preconditions.checkArgument( + indexSnapshotId != -1L, + "Cannot create index snapshot without specifying indexSnapshotId"); + + builder.addSnapshot( + ImmutableIndexSnapshot.builder() + .indexSnapshotId(indexSnapshotId) + .tableSnapshotId(tableSnapshotId) + .versionId(currentVersionId) + .properties(snapshotProperties) + .build()); + } + + if (snapshotIdsToRemove != null) { + builder.removeSnapshots(snapshotIdsToRemove); + } + + if (location != null) { + builder.setLocation(location); + } + + IndexMetadata replacement = builder.build(); + + UpdateIndexRequest request = + UpdateIndexRequest.create( + null, + IndexRequirements.forReplaceIndex(metadata, replacement.changes()), + replacement.changes()); + + AuthSession contextualSession = authManager.contextualSession(context, catalogAuth); + LoadIndexResponse response = + client + .withAuthSession(contextualSession) + .post( + paths.index(identifier), + request, + LoadIndexResponse.class, + mutationHeaders, + ErrorHandlers.indexErrorHandler()); + + Map indexConf = response.config(); + AuthSession indexSession = + authManager.tableSession(identifier.tableIdentifier(), indexConf, contextualSession); + RESTIndexOperations ops = + newIndexOps( + client.withAuthSession(indexSession), + paths.index(identifier), + Map::of, + mutationHeaders, + response.metadata(), + endpoints); + + return new BaseIndex(ops, fullIndexName(identifier)); + } + + private LoadIndexResponse loadIndex() { + AuthSession contextualSession = authManager.contextualSession(context, catalogAuth); + return client + .withAuthSession(contextualSession) + .get( + paths.index(identifier), + LoadIndexResponse.class, + Map.of(), + ErrorHandlers.indexErrorHandler()); + } + } } diff --git a/core/src/main/java/org/apache/iceberg/rest/ResourcePaths.java b/core/src/main/java/org/apache/iceberg/rest/ResourcePaths.java index 0fc55c1a44d8..f08654a7244b 100644 --- a/core/src/main/java/org/apache/iceberg/rest/ResourcePaths.java +++ b/core/src/main/java/org/apache/iceberg/rest/ResourcePaths.java @@ -19,6 +19,7 @@ package org.apache.iceberg.rest; import java.util.Map; +import org.apache.iceberg.catalog.IndexIdentifier; import org.apache.iceberg.catalog.Namespace; import org.apache.iceberg.catalog.TableIdentifier; import org.apache.iceberg.relocated.com.google.common.base.Joiner; @@ -50,6 +51,12 @@ public class ResourcePaths { public static final String V1_VIEW = "/v1/{prefix}/namespaces/{namespace}/views/{view}"; public static final String V1_VIEW_RENAME = "/v1/{prefix}/views/rename"; public static final String V1_VIEW_REGISTER = "/v1/{prefix}/namespaces/{namespace}/register-view"; + public static final String V1_INDEXES = + "/v1/{prefix}/namespaces/{namespace}/tables/{table}/indexes"; + public static final String V1_INDEX = + "/v1/{prefix}/namespaces/{namespace}/tables/{table}/indexes/{index}"; + public static final String V1_INDEX_REGISTER = + "/v1/{prefix}/namespaces/{namespace}/tables/{table}/register-index"; public static ResourcePaths forCatalogProperties(Map properties) { return new ResourcePaths( @@ -190,6 +197,58 @@ public String fetchScanTasks(TableIdentifier ident) { "tasks"); } + /** + * Return the path for listing indexes for a table. + * + * @param tableIdentifier the table identifier + * @return the path for the indexes endpoint + */ + public String indexes(TableIdentifier tableIdentifier) { + return SLASH.join( + "v1", + prefix, + "namespaces", + pathEncode(tableIdentifier.namespace()), + "tables", + RESTUtil.encodeString(tableIdentifier.name()), + "indexes"); + } + + /** + * Return the path for a specific index. + * + * @param indexIdentifier the index identifier + * @return the path for the index endpoint + */ + public String index(IndexIdentifier indexIdentifier) { + return SLASH.join( + "v1", + prefix, + "namespaces", + pathEncode(indexIdentifier.tableIdentifier().namespace()), + "tables", + RESTUtil.encodeString(indexIdentifier.tableIdentifier().name()), + "indexes", + RESTUtil.encodeString(indexIdentifier.name())); + } + + /** + * Return the path for registering an index for a table. + * + * @param tableIdentifier the table identifier + * @return the path for the register index endpoint + */ + public String registerIndex(TableIdentifier tableIdentifier) { + return SLASH.join( + "v1", + prefix, + "namespaces", + pathEncode(tableIdentifier.namespace()), + "tables", + RESTUtil.encodeString(tableIdentifier.name()), + "register-index"); + } + private String pathEncode(Namespace ns) { return RESTUtil.encodeNamespace(ns, namespaceSeparator); } diff --git a/core/src/main/java/org/apache/iceberg/rest/requests/CreateIndexRequest.java b/core/src/main/java/org/apache/iceberg/rest/requests/CreateIndexRequest.java new file mode 100644 index 000000000000..69428aceb4e3 --- /dev/null +++ b/core/src/main/java/org/apache/iceberg/rest/requests/CreateIndexRequest.java @@ -0,0 +1,246 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.rest.requests; + +import java.util.List; +import java.util.Map; +import org.apache.iceberg.index.IndexType; +import org.apache.iceberg.relocated.com.google.common.base.MoreObjects; +import org.apache.iceberg.relocated.com.google.common.base.Preconditions; +import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList; +import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; +import org.apache.iceberg.relocated.com.google.common.collect.Lists; +import org.apache.iceberg.relocated.com.google.common.collect.Maps; +import org.apache.iceberg.rest.RESTRequest; + +/** A REST request to create a new index on a table. */ +public class CreateIndexRequest implements RESTRequest { + + private String tableUuid; + private String name; + private IndexType type; + private List indexColumnIds; + private List optimizedColumnIds; + private String location; + private Map properties; + private Long tableSnapshotId; + private Long indexSnapshotId; + private Map snapshotProperties; + + public CreateIndexRequest() { + // Required for Jackson deserialization + } + + private CreateIndexRequest( + String tableUuid, + String name, + IndexType type, + List indexColumnIds, + List optimizedColumnIds, + String location, + Map properties, + Long tableSnapshotId, + Long indexSnapshotId, + Map snapshotProperties) { + this.tableUuid = tableUuid; + this.name = name; + this.type = type; + this.indexColumnIds = indexColumnIds; + this.optimizedColumnIds = optimizedColumnIds; + this.location = location; + this.properties = properties; + this.tableSnapshotId = tableSnapshotId; + this.indexSnapshotId = indexSnapshotId; + this.snapshotProperties = snapshotProperties; + validate(); + } + + @Override + public void validate() { + Preconditions.checkArgument( + tableUuid != null && !tableUuid.isEmpty(), "Invalid table uuid: null or empty"); + Preconditions.checkArgument( + name != null && !name.isEmpty(), "Invalid index name: null or empty"); + Preconditions.checkArgument(type != null, "Invalid index type: null"); + Preconditions.checkArgument( + indexColumnIds != null && !indexColumnIds.isEmpty(), + "Invalid index column IDs: null or empty"); + } + + public String tableUuid() { + return tableUuid; + } + + public String name() { + return name; + } + + public IndexType type() { + return type; + } + + public List indexColumnIds() { + return indexColumnIds != null ? indexColumnIds : ImmutableList.of(); + } + + public List optimizedColumnIds() { + return optimizedColumnIds != null ? optimizedColumnIds : ImmutableList.of(); + } + + public String location() { + return location; + } + + public Map properties() { + return properties != null ? properties : ImmutableMap.of(); + } + + public Long tableSnapshotId() { + return tableSnapshotId; + } + + public Long indexSnapshotId() { + return indexSnapshotId; + } + + public Map snapshotProperties() { + return snapshotProperties != null ? snapshotProperties : ImmutableMap.of(); + } + + @Override + public String toString() { + return MoreObjects.toStringHelper(this) + .add("tableUuid", tableUuid) + .add("name", name) + .add("type", type) + .add("indexColumnIds", indexColumnIds) + .add("optimizedColumnIds", optimizedColumnIds) + .add("location", location) + .add("properties", properties) + .add("tableSnapshotId", tableSnapshotId) + .add("indexSnapshotId", indexSnapshotId) + .add("snapshotProperties", snapshotProperties) + .toString(); + } + + public static Builder builder() { + return new Builder(); + } + + public static class Builder { + private String tableUuid; + private String name; + private IndexType type; + private final List indexColumnIds = Lists.newArrayList(); + private final List optimizedColumnIds = Lists.newArrayList(); + private String location; + private final Map properties = Maps.newHashMap(); + private Long tableSnapshotId; + private Long indexSnapshotId; + private final Map snapshotProperties = Maps.newHashMap(); + + private Builder() {} + + public Builder withTableUuid(String uuid) { + this.tableUuid = uuid; + return this; + } + + public Builder withName(String indexName) { + this.name = indexName; + return this; + } + + public Builder withType(IndexType indexType) { + this.type = indexType; + return this; + } + + public Builder withIndexColumnIds(List columnIds) { + this.indexColumnIds.clear(); + this.indexColumnIds.addAll(columnIds); + return this; + } + + public Builder addIndexColumnId(Integer columnId) { + this.indexColumnIds.add(columnId); + return this; + } + + public Builder withOptimizedColumnIds(List columnIds) { + this.optimizedColumnIds.clear(); + this.optimizedColumnIds.addAll(columnIds); + return this; + } + + public Builder addOptimizedColumnId(Integer columnId) { + this.optimizedColumnIds.add(columnId); + return this; + } + + public Builder withLocation(String indexLocation) { + this.location = indexLocation; + return this; + } + + public Builder setProperty(String key, String value) { + this.properties.put(key, value); + return this; + } + + public Builder setProperties(Map props) { + this.properties.putAll(props); + return this; + } + + public Builder withTableSnapshotId(Long snapshotId) { + this.tableSnapshotId = snapshotId; + return this; + } + + public Builder withIndexSnapshotId(Long snapshotId) { + this.indexSnapshotId = snapshotId; + return this; + } + + public Builder setSnapshotProperty(String key, String value) { + this.snapshotProperties.put(key, value); + return this; + } + + public Builder setSnapshotProperties(Map props) { + this.snapshotProperties.putAll(props); + return this; + } + + public CreateIndexRequest build() { + return new CreateIndexRequest( + tableUuid, + name, + type, + ImmutableList.copyOf(indexColumnIds), + optimizedColumnIds.isEmpty() ? null : ImmutableList.copyOf(optimizedColumnIds), + location, + properties.isEmpty() ? null : ImmutableMap.copyOf(properties), + tableSnapshotId, + indexSnapshotId, + snapshotProperties.isEmpty() ? null : ImmutableMap.copyOf(snapshotProperties)); + } + } +} diff --git a/core/src/main/java/org/apache/iceberg/rest/requests/CreateIndexRequestParser.java b/core/src/main/java/org/apache/iceberg/rest/requests/CreateIndexRequestParser.java new file mode 100644 index 000000000000..f4911a543821 --- /dev/null +++ b/core/src/main/java/org/apache/iceberg/rest/requests/CreateIndexRequestParser.java @@ -0,0 +1,132 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.rest.requests; + +import com.fasterxml.jackson.core.JsonGenerator; +import com.fasterxml.jackson.databind.JsonNode; +import java.io.IOException; +import org.apache.iceberg.index.IndexType; +import org.apache.iceberg.relocated.com.google.common.base.Preconditions; +import org.apache.iceberg.util.JsonUtil; + +public class CreateIndexRequestParser { + + private static final String TABLE_UUID = "table-uuid"; + private static final String NAME = "name"; + private static final String TYPE = "type"; + private static final String INDEX_COLUMN_IDS = "index-column-ids"; + private static final String OPTIMIZED_COLUMN_IDS = "optimized-column-ids"; + private static final String LOCATION = "location"; + private static final String PROPERTIES = "properties"; + private static final String TABLE_SNAPSHOT_ID = "table-snapshot-id"; + private static final String INDEX_SNAPSHOT_ID = "index-snapshot-id"; + private static final String SNAPSHOT_PROPERTIES = "snapshot-properties"; + + private CreateIndexRequestParser() {} + + public static String toJson(CreateIndexRequest request) { + return toJson(request, false); + } + + public static String toJson(CreateIndexRequest request, boolean pretty) { + return JsonUtil.generate(gen -> toJson(request, gen), pretty); + } + + public static void toJson(CreateIndexRequest request, JsonGenerator gen) throws IOException { + Preconditions.checkArgument(null != request, "Invalid create index request: null"); + + gen.writeStartObject(); + + gen.writeStringField(TABLE_UUID, request.tableUuid()); + gen.writeStringField(NAME, request.name()); + gen.writeStringField(TYPE, request.type().typeName()); + JsonUtil.writeIntegerArray(INDEX_COLUMN_IDS, request.indexColumnIds(), gen); + + if (!request.optimizedColumnIds().isEmpty()) { + JsonUtil.writeIntegerArray(OPTIMIZED_COLUMN_IDS, request.optimizedColumnIds(), gen); + } + + if (request.location() != null) { + gen.writeStringField(LOCATION, request.location()); + } + + if (!request.properties().isEmpty()) { + JsonUtil.writeStringMap(PROPERTIES, request.properties(), gen); + } + + if (request.tableSnapshotId() != null) { + gen.writeNumberField(TABLE_SNAPSHOT_ID, request.tableSnapshotId()); + } + + if (request.indexSnapshotId() != null) { + gen.writeNumberField(INDEX_SNAPSHOT_ID, request.indexSnapshotId()); + } + + if (!request.snapshotProperties().isEmpty()) { + JsonUtil.writeStringMap(SNAPSHOT_PROPERTIES, request.snapshotProperties(), gen); + } + + gen.writeEndObject(); + } + + public static CreateIndexRequest fromJson(String json) { + return JsonUtil.parse(json, CreateIndexRequestParser::fromJson); + } + + public static CreateIndexRequest fromJson(JsonNode json) { + Preconditions.checkArgument(null != json, "Cannot parse create index request from null object"); + + String tableUuid = JsonUtil.getString(TABLE_UUID, json); + String name = JsonUtil.getString(NAME, json); + IndexType type = IndexType.fromString(JsonUtil.getString(TYPE, json)); + + CreateIndexRequest.Builder builder = + CreateIndexRequest.builder() + .withTableUuid(tableUuid) + .withName(name) + .withType(type) + .withIndexColumnIds(JsonUtil.getIntegerList(INDEX_COLUMN_IDS, json)); + + if (json.hasNonNull(OPTIMIZED_COLUMN_IDS)) { + builder.withOptimizedColumnIds(JsonUtil.getIntegerList(OPTIMIZED_COLUMN_IDS, json)); + } + + if (json.hasNonNull(LOCATION)) { + builder.withLocation(JsonUtil.getString(LOCATION, json)); + } + + if (json.hasNonNull(PROPERTIES)) { + builder.setProperties(JsonUtil.getStringMap(PROPERTIES, json)); + } + + if (json.hasNonNull(TABLE_SNAPSHOT_ID)) { + builder.withTableSnapshotId(JsonUtil.getLong(TABLE_SNAPSHOT_ID, json)); + } + + if (json.hasNonNull(INDEX_SNAPSHOT_ID)) { + builder.withIndexSnapshotId(JsonUtil.getLong(INDEX_SNAPSHOT_ID, json)); + } + + if (json.hasNonNull(SNAPSHOT_PROPERTIES)) { + builder.setSnapshotProperties(JsonUtil.getStringMap(SNAPSHOT_PROPERTIES, json)); + } + + return builder.build(); + } +} diff --git a/core/src/main/java/org/apache/iceberg/rest/requests/RegisterIndexRequest.java b/core/src/main/java/org/apache/iceberg/rest/requests/RegisterIndexRequest.java new file mode 100644 index 000000000000..ea5cb5550aa9 --- /dev/null +++ b/core/src/main/java/org/apache/iceberg/rest/requests/RegisterIndexRequest.java @@ -0,0 +1,35 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.rest.requests; + +import org.apache.iceberg.rest.RESTRequest; +import org.immutables.value.Value; + +@Value.Immutable +public interface RegisterIndexRequest extends RESTRequest { + + String name(); + + String metadataLocation(); + + @Override + default void validate() { + // nothing to validate as it's not possible to create an invalid instance + } +} diff --git a/core/src/main/java/org/apache/iceberg/rest/requests/RegisterIndexRequestParser.java b/core/src/main/java/org/apache/iceberg/rest/requests/RegisterIndexRequestParser.java new file mode 100644 index 000000000000..b19b9d2e94f5 --- /dev/null +++ b/core/src/main/java/org/apache/iceberg/rest/requests/RegisterIndexRequestParser.java @@ -0,0 +1,69 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.rest.requests; + +import com.fasterxml.jackson.core.JsonGenerator; +import com.fasterxml.jackson.databind.JsonNode; +import java.io.IOException; +import org.apache.iceberg.relocated.com.google.common.base.Preconditions; +import org.apache.iceberg.util.JsonUtil; + +public class RegisterIndexRequestParser { + + private static final String NAME = "name"; + private static final String METADATA_LOCATION = "metadata-location"; + + private RegisterIndexRequestParser() {} + + public static String toJson(RegisterIndexRequest request) { + return toJson(request, false); + } + + public static String toJson(RegisterIndexRequest request, boolean pretty) { + return JsonUtil.generate(gen -> toJson(request, gen), pretty); + } + + public static void toJson(RegisterIndexRequest request, JsonGenerator gen) throws IOException { + Preconditions.checkArgument(null != request, "Invalid register index request: null"); + + gen.writeStartObject(); + + gen.writeStringField(NAME, request.name()); + gen.writeStringField(METADATA_LOCATION, request.metadataLocation()); + + gen.writeEndObject(); + } + + public static RegisterIndexRequest fromJson(String json) { + return JsonUtil.parse(json, RegisterIndexRequestParser::fromJson); + } + + public static RegisterIndexRequest fromJson(JsonNode json) { + Preconditions.checkArgument( + null != json, "Cannot parse register index request from null object"); + + String name = JsonUtil.getString(NAME, json); + String metadataLocation = JsonUtil.getString(METADATA_LOCATION, json); + + return ImmutableRegisterIndexRequest.builder() + .name(name) + .metadataLocation(metadataLocation) + .build(); + } +} diff --git a/core/src/main/java/org/apache/iceberg/rest/requests/UpdateIndexRequest.java b/core/src/main/java/org/apache/iceberg/rest/requests/UpdateIndexRequest.java new file mode 100644 index 000000000000..a480e01e1ec9 --- /dev/null +++ b/core/src/main/java/org/apache/iceberg/rest/requests/UpdateIndexRequest.java @@ -0,0 +1,77 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.rest.requests; + +import java.util.List; +import org.apache.iceberg.catalog.TableIdentifier; +import org.apache.iceberg.index.IndexRequirement; +import org.apache.iceberg.index.IndexUpdate; +import org.apache.iceberg.relocated.com.google.common.base.MoreObjects; +import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList; +import org.apache.iceberg.rest.RESTRequest; + +public class UpdateIndexRequest implements RESTRequest { + + private TableIdentifier identifier; + private List requirements; + private List updates; + + public UpdateIndexRequest() { + // needed for Jackson deserialization + } + + public UpdateIndexRequest(List requirements, List updates) { + this.requirements = requirements; + this.updates = updates; + } + + UpdateIndexRequest( + TableIdentifier identifier, List requirements, List updates) { + this(requirements, updates); + this.identifier = identifier; + } + + @Override + public void validate() {} + + public List requirements() { + return requirements != null ? requirements : ImmutableList.of(); + } + + public List updates() { + return updates != null ? updates : ImmutableList.of(); + } + + public TableIdentifier identifier() { + return identifier; + } + + @Override + public String toString() { + return MoreObjects.toStringHelper(this) + .add("requirements", requirements) + .add("updates", updates) + .toString(); + } + + public static UpdateIndexRequest create( + TableIdentifier identifier, List requirements, List updates) { + return new UpdateIndexRequest(identifier, requirements, updates); + } +} diff --git a/core/src/main/java/org/apache/iceberg/rest/requests/UpdateIndexRequestParser.java b/core/src/main/java/org/apache/iceberg/rest/requests/UpdateIndexRequestParser.java new file mode 100644 index 000000000000..f6efd5cef18d --- /dev/null +++ b/core/src/main/java/org/apache/iceberg/rest/requests/UpdateIndexRequestParser.java @@ -0,0 +1,110 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.rest.requests; + +import com.fasterxml.jackson.core.JsonGenerator; +import com.fasterxml.jackson.databind.JsonNode; +import java.io.IOException; +import java.util.List; +import org.apache.iceberg.catalog.TableIdentifier; +import org.apache.iceberg.catalog.TableIdentifierParser; +import org.apache.iceberg.index.IndexRequirement; +import org.apache.iceberg.index.IndexRequirementParser; +import org.apache.iceberg.index.IndexUpdate; +import org.apache.iceberg.index.IndexUpdateParser; +import org.apache.iceberg.relocated.com.google.common.base.Preconditions; +import org.apache.iceberg.relocated.com.google.common.collect.Lists; +import org.apache.iceberg.util.JsonUtil; + +public class UpdateIndexRequestParser { + + private static final String IDENTIFIER = "identifier"; + private static final String REQUIREMENTS = "requirements"; + private static final String UPDATES = "updates"; + + private UpdateIndexRequestParser() {} + + public static String toJson(UpdateIndexRequest request) { + return toJson(request, false); + } + + public static String toJson(UpdateIndexRequest request, boolean pretty) { + return JsonUtil.generate(gen -> toJson(request, gen), pretty); + } + + public static void toJson(UpdateIndexRequest request, JsonGenerator gen) throws IOException { + Preconditions.checkArgument(null != request, "Invalid update index request: null"); + + gen.writeStartObject(); + + if (null != request.identifier()) { + gen.writeFieldName(IDENTIFIER); + TableIdentifierParser.toJson(request.identifier(), gen); + } + + gen.writeArrayFieldStart(REQUIREMENTS); + for (IndexRequirement updateRequirement : request.requirements()) { + IndexRequirementParser.toJson(updateRequirement, gen); + } + gen.writeEndArray(); + + gen.writeArrayFieldStart(UPDATES); + for (IndexUpdate metadataUpdate : request.updates()) { + IndexUpdateParser.toJson(metadataUpdate, gen); + } + gen.writeEndArray(); + + gen.writeEndObject(); + } + + public static UpdateIndexRequest fromJson(String json) { + return JsonUtil.parse(json, UpdateIndexRequestParser::fromJson); + } + + public static UpdateIndexRequest fromJson(JsonNode json) { + Preconditions.checkArgument(null != json, "Cannot parse update index request from null object"); + + TableIdentifier identifier = null; + List requirements = Lists.newArrayList(); + List updates = Lists.newArrayList(); + + if (json.hasNonNull(IDENTIFIER)) { + identifier = TableIdentifierParser.fromJson(JsonUtil.get(IDENTIFIER, json)); + } + + if (json.hasNonNull(REQUIREMENTS)) { + JsonNode requirementsNode = JsonUtil.get(REQUIREMENTS, json); + Preconditions.checkArgument( + requirementsNode.isArray(), + "Cannot parse requirements from non-array: %s", + requirementsNode); + requirementsNode.forEach(req -> requirements.add(IndexRequirementParser.fromJson(req))); + } + + if (json.hasNonNull(UPDATES)) { + JsonNode updatesNode = JsonUtil.get(UPDATES, json); + Preconditions.checkArgument( + updatesNode.isArray(), "Cannot parse metadata updates from non-array: %s", updatesNode); + + updatesNode.forEach(update -> updates.add(IndexUpdateParser.fromJson(update))); + } + + return UpdateIndexRequest.create(identifier, requirements, updates); + } +} diff --git a/core/src/main/java/org/apache/iceberg/rest/responses/ListIndexesResponse.java b/core/src/main/java/org/apache/iceberg/rest/responses/ListIndexesResponse.java new file mode 100644 index 000000000000..5b808c99329c --- /dev/null +++ b/core/src/main/java/org/apache/iceberg/rest/responses/ListIndexesResponse.java @@ -0,0 +1,98 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.rest.responses; + +import java.util.Collection; +import java.util.List; +import org.apache.iceberg.catalog.IndexIdentifier; +import org.apache.iceberg.relocated.com.google.common.base.MoreObjects; +import org.apache.iceberg.relocated.com.google.common.base.Preconditions; +import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList; +import org.apache.iceberg.rest.RESTResponse; + +/** A list of index identifiers for a given table. */ +public class ListIndexesResponse implements RESTResponse { + + private List identifiers; + private String nextPageToken; + + public ListIndexesResponse() { + // Required for Jackson deserialization + } + + private ListIndexesResponse(List identifiers, String nextPageToken) { + this.identifiers = identifiers; + this.nextPageToken = nextPageToken; + validate(); + } + + @Override + public void validate() { + Preconditions.checkArgument(identifiers != null, "Invalid identifier list: null"); + } + + public List identifiers() { + return identifiers != null ? identifiers : ImmutableList.of(); + } + + public String nextPageToken() { + return nextPageToken; + } + + @Override + public String toString() { + return MoreObjects.toStringHelper(this) + .add("identifiers", identifiers) + .add("next-page-token", nextPageToken()) + .toString(); + } + + public static Builder builder() { + return new Builder(); + } + + public static class Builder { + private final ImmutableList.Builder identifiers = ImmutableList.builder(); + private String nextPageToken; + + private Builder() {} + + public Builder add(IndexIdentifier toAdd) { + Preconditions.checkNotNull(toAdd, "Invalid index identifier: null"); + identifiers.add(toAdd); + return this; + } + + public Builder addAll(Collection toAdd) { + Preconditions.checkNotNull(toAdd, "Invalid index identifier list: null"); + Preconditions.checkArgument(!toAdd.contains(null), "Invalid index identifier: null"); + identifiers.addAll(toAdd); + return this; + } + + public Builder nextPageToken(String pageToken) { + nextPageToken = pageToken; + return this; + } + + public ListIndexesResponse build() { + return new ListIndexesResponse(identifiers.build(), nextPageToken); + } + } +} diff --git a/core/src/main/java/org/apache/iceberg/rest/responses/ListIndexesResponseParser.java b/core/src/main/java/org/apache/iceberg/rest/responses/ListIndexesResponseParser.java new file mode 100644 index 000000000000..b6fbd8802e1a --- /dev/null +++ b/core/src/main/java/org/apache/iceberg/rest/responses/ListIndexesResponseParser.java @@ -0,0 +1,92 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.rest.responses; + +import com.fasterxml.jackson.core.JsonGenerator; +import com.fasterxml.jackson.databind.JsonNode; +import java.io.IOException; +import org.apache.iceberg.catalog.IndexIdentifier; +import org.apache.iceberg.catalog.IndexIdentifierParser; +import org.apache.iceberg.relocated.com.google.common.base.Preconditions; +import org.apache.iceberg.util.JsonUtil; + +public class ListIndexesResponseParser { + + private static final String IDENTIFIERS = "identifiers"; + private static final String NEXT_PAGE_TOKEN = "next-page-token"; + + private ListIndexesResponseParser() {} + + public static String toJson(ListIndexesResponse response) { + return toJson(response, false); + } + + public static String toJson(ListIndexesResponse response, boolean pretty) { + return JsonUtil.generate(gen -> toJson(response, gen), pretty); + } + + public static void toJson(ListIndexesResponse response, JsonGenerator gen) throws IOException { + Preconditions.checkArgument(null != response, "Invalid list indexes response: null"); + + gen.writeStartObject(); + + gen.writeArrayFieldStart(IDENTIFIERS); + for (IndexIdentifier identifier : response.identifiers()) { + IndexIdentifierParser.toJson(identifier, gen); + } + gen.writeEndArray(); + + if (response.nextPageToken() != null) { + gen.writeStringField(NEXT_PAGE_TOKEN, response.nextPageToken()); + } else { + gen.writeNullField(NEXT_PAGE_TOKEN); + } + + gen.writeEndObject(); + } + + public static ListIndexesResponse fromJson(String json) { + return JsonUtil.parse(json, ListIndexesResponseParser::fromJson); + } + + public static ListIndexesResponse fromJson(JsonNode json) { + Preconditions.checkArgument( + null != json, "Cannot parse list indexes response from null object"); + + ListIndexesResponse.Builder builder = ListIndexesResponse.builder(); + + if (json.hasNonNull(IDENTIFIERS)) { + JsonNode identifiersNode = json.get(IDENTIFIERS); + Preconditions.checkArgument( + identifiersNode.isArray(), + "Cannot parse identifiers from non-array: %s", + identifiersNode); + + for (JsonNode identifierNode : identifiersNode) { + builder.add(IndexIdentifierParser.fromJson(identifierNode)); + } + } + + if (json.hasNonNull(NEXT_PAGE_TOKEN)) { + builder.nextPageToken(JsonUtil.getString(NEXT_PAGE_TOKEN, json)); + } + + return builder.build(); + } +} diff --git a/core/src/main/java/org/apache/iceberg/rest/responses/LoadIndexResponse.java b/core/src/main/java/org/apache/iceberg/rest/responses/LoadIndexResponse.java new file mode 100644 index 000000000000..835e9f5c0c48 --- /dev/null +++ b/core/src/main/java/org/apache/iceberg/rest/responses/LoadIndexResponse.java @@ -0,0 +1,109 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.rest.responses; + +import java.util.Map; +import org.apache.iceberg.index.IndexMetadata; +import org.apache.iceberg.relocated.com.google.common.base.MoreObjects; +import org.apache.iceberg.relocated.com.google.common.base.Preconditions; +import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; +import org.apache.iceberg.relocated.com.google.common.collect.Maps; +import org.apache.iceberg.rest.RESTResponse; + +/** A REST response that is used when an index is successfully loaded. */ +public class LoadIndexResponse implements RESTResponse { + + private String metadataLocation; + private IndexMetadata metadata; + private Map config; + + public LoadIndexResponse() { + // Required for Jackson deserialization + } + + private LoadIndexResponse( + String metadataLocation, IndexMetadata metadata, Map config) { + this.metadataLocation = metadataLocation; + this.metadata = metadata; + this.config = config; + } + + @Override + public void validate() { + Preconditions.checkNotNull(metadata, "Invalid metadata: null"); + } + + public String metadataLocation() { + return metadataLocation; + } + + public IndexMetadata metadata() { + return metadata; + } + + public Map config() { + return config != null ? config : ImmutableMap.of(); + } + + @Override + public String toString() { + return MoreObjects.toStringHelper(this) + .add("metadataLocation", metadataLocation) + .add("metadata", metadata) + .add("config", config) + .toString(); + } + + public static Builder builder() { + return new Builder(); + } + + public static class Builder { + private String metadataLocation; + private IndexMetadata metadata; + private final Map config = Maps.newHashMap(); + + private Builder() {} + + public Builder withMetadata(IndexMetadata indexMetadata) { + this.metadata = indexMetadata; + return this; + } + + public Builder withMetadataLocation(String location) { + this.metadataLocation = location; + return this; + } + + public Builder addConfig(String property, String value) { + config.put(property, value); + return this; + } + + public Builder addAllConfig(Map properties) { + config.putAll(properties); + return this; + } + + public LoadIndexResponse build() { + Preconditions.checkNotNull(metadata, "Invalid metadata: null"); + return new LoadIndexResponse(metadataLocation, metadata, config); + } + } +} diff --git a/core/src/main/java/org/apache/iceberg/rest/responses/LoadIndexResponseParser.java b/core/src/main/java/org/apache/iceberg/rest/responses/LoadIndexResponseParser.java new file mode 100644 index 000000000000..d60dd2aa2333 --- /dev/null +++ b/core/src/main/java/org/apache/iceberg/rest/responses/LoadIndexResponseParser.java @@ -0,0 +1,88 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.rest.responses; + +import com.fasterxml.jackson.core.JsonGenerator; +import com.fasterxml.jackson.databind.JsonNode; +import java.io.IOException; +import org.apache.iceberg.index.IndexMetadata; +import org.apache.iceberg.index.IndexMetadataParser; +import org.apache.iceberg.relocated.com.google.common.base.Preconditions; +import org.apache.iceberg.util.JsonUtil; + +public class LoadIndexResponseParser { + + private static final String METADATA_LOCATION = "metadata-location"; + private static final String METADATA = "metadata"; + private static final String CONFIG = "config"; + + private LoadIndexResponseParser() {} + + public static String toJson(LoadIndexResponse response) { + return toJson(response, false); + } + + public static String toJson(LoadIndexResponse response, boolean pretty) { + return JsonUtil.generate(gen -> toJson(response, gen), pretty); + } + + public static void toJson(LoadIndexResponse response, JsonGenerator gen) throws IOException { + Preconditions.checkArgument(null != response, "Invalid load index response: null"); + + gen.writeStartObject(); + + if (null != response.metadataLocation()) { + gen.writeStringField(METADATA_LOCATION, response.metadataLocation()); + } + + gen.writeFieldName(METADATA); + IndexMetadataParser.toJson(response.metadata(), gen); + + if (!response.config().isEmpty()) { + JsonUtil.writeStringMap(CONFIG, response.config(), gen); + } + + gen.writeEndObject(); + } + + public static LoadIndexResponse fromJson(String json) { + return JsonUtil.parse(json, LoadIndexResponseParser::fromJson); + } + + public static LoadIndexResponse fromJson(JsonNode json) { + Preconditions.checkArgument(null != json, "Cannot parse load index response from null object"); + + String metadataLocation = null; + if (json.hasNonNull(METADATA_LOCATION)) { + metadataLocation = JsonUtil.getString(METADATA_LOCATION, json); + } + + IndexMetadata metadata = + IndexMetadataParser.fromJson(metadataLocation, JsonUtil.get(METADATA, json)); + + LoadIndexResponse.Builder builder = + LoadIndexResponse.builder().withMetadata(metadata).withMetadataLocation(metadataLocation); + + if (json.hasNonNull(CONFIG)) { + builder.addAllConfig(JsonUtil.getStringMap(CONFIG, json)); + } + + return builder.build(); + } +} diff --git a/core/src/test/java/org/apache/iceberg/index/IndexCatalogTests.java b/core/src/test/java/org/apache/iceberg/index/IndexCatalogTests.java new file mode 100644 index 000000000000..f67ac3215941 --- /dev/null +++ b/core/src/test/java/org/apache/iceberg/index/IndexCatalogTests.java @@ -0,0 +1,1659 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.index; + +import static org.apache.iceberg.types.Types.NestedField.required; +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; +import static org.assertj.core.api.Assumptions.assumeThat; + +import java.nio.file.Path; +import java.util.List; +import java.util.Map; +import java.util.UUID; +import org.apache.iceberg.Schema; +import org.apache.iceberg.Table; +import org.apache.iceberg.catalog.Catalog; +import org.apache.iceberg.catalog.IndexCatalog; +import org.apache.iceberg.catalog.IndexIdentifier; +import org.apache.iceberg.catalog.SupportsNamespaces; +import org.apache.iceberg.catalog.TableIdentifier; +import org.apache.iceberg.exceptions.AlreadyExistsException; +import org.apache.iceberg.exceptions.CommitFailedException; +import org.apache.iceberg.exceptions.NoSuchIndexException; +import org.apache.iceberg.exceptions.NoSuchTableException; +import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; +import org.apache.iceberg.types.Types; +import org.apache.iceberg.util.LocationUtil; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.ValueSource; + +public abstract class IndexCatalogTests { + protected static final Schema SCHEMA = + new Schema( + 5, + required(3, "id", Types.IntegerType.get(), "unique ID"), + required(4, "data", Types.StringType.get())); + + protected abstract C catalog(); + + protected abstract Catalog tableCatalog(); + + @TempDir private Path tempDir; + + protected String indexLocation(String... paths) { + StringBuilder location = + new StringBuilder(LocationUtil.stripTrailingSlash(tempDir.toFile().toURI().toString())); + for (String path : paths) { + location.append("/").append(path); + } + + return location.toString(); + } + + protected boolean requiresNamespaceCreate() { + return false; + } + + protected boolean overridesRequestedLocation() { + return false; + } + + protected boolean supportsServerSideRetry() { + return false; + } + + @Test + public void loadIndexWithNonExistingTable() { + TableIdentifier tableIdentifier = TableIdentifier.of("non-existing", "table"); + IndexIdentifier indexIdentifier = IndexIdentifier.of(tableIdentifier, "index"); + assertThat(catalog().indexExists(indexIdentifier)).as("Index should not exist").isFalse(); + assertThatThrownBy(() -> catalog().loadIndex(indexIdentifier)) + .isInstanceOf(NoSuchIndexException.class) + .hasMessageContaining("Index does not exist"); + } + + @Test + public void loadNonExistingIndex() { + TableIdentifier tableIdentifier = TableIdentifier.of("ns", "table"); + + if (requiresNamespaceCreate()) { + catalog().createNamespace(tableIdentifier.namespace()); + } + + tableCatalog().buildTable(tableIdentifier, SCHEMA).create(); + assertThat(tableCatalog().tableExists(tableIdentifier)).as("Table should exist").isTrue(); + + IndexIdentifier indexIdentifier = IndexIdentifier.of(tableIdentifier, "non_existing_index"); + assertThat(catalog().indexExists(indexIdentifier)).as("Index should not exist").isFalse(); + assertThatThrownBy(() -> catalog().loadIndex(indexIdentifier)) + .isInstanceOf(NoSuchIndexException.class) + .hasMessageContaining("Index does not exist"); + } + + @Test + public void basicCreateIndex() { + TableIdentifier tableIdentifier = TableIdentifier.of("ns", "table"); + IndexIdentifier indexIdentifier = IndexIdentifier.of(tableIdentifier, "test_index"); + + if (requiresNamespaceCreate()) { + catalog().createNamespace(tableIdentifier.namespace()); + } + + Table table = tableCatalog().buildTable(tableIdentifier, SCHEMA).create(); + assertThat(tableCatalog().tableExists(tableIdentifier)).as("Table should exist").isTrue(); + + assertThat(catalog().indexExists(indexIdentifier)).as("Index should not exist").isFalse(); + + Index index = + catalog() + .buildIndex(indexIdentifier) + .withTableUuid(table.uuid()) + .withType(IndexType.BTREE) + .withIndexColumnIds(3) + .withOptimizedColumnIds(3) + .create(); + + assertThat(index).isNotNull(); + assertThat(catalog().indexExists(indexIdentifier)).as("Index should exist").isTrue(); + + // validate index settings + assertThat(index.name()).isNotNull(); + assertThat(index.type()).isEqualTo(IndexType.BTREE); + assertThat(index.indexColumnIds()).containsExactly(3); + assertThat(index.optimizedColumnIds()).containsExactly(3); + assertThat(index.history()).hasSize(1); + assertThat(index.versions()).hasSize(1); + + assertThat(catalog().dropIndex(indexIdentifier)).isTrue(); + assertThat(catalog().indexExists(indexIdentifier)).as("Index should not exist").isFalse(); + } + + @Test + public void completeCreateIndex() { + TableIdentifier tableIdentifier = TableIdentifier.of("ns", "table"); + IndexIdentifier indexIdentifier = IndexIdentifier.of(tableIdentifier, "complete_index"); + + if (requiresNamespaceCreate()) { + catalog().createNamespace(tableIdentifier.namespace()); + } + + Table table = tableCatalog().buildTable(tableIdentifier, SCHEMA).create(); + assertThat(tableCatalog().tableExists(tableIdentifier)).as("Table should exist").isTrue(); + + assertThat(catalog().indexExists(indexIdentifier)).as("Index should not exist").isFalse(); + + String location = + indexLocation( + tableIdentifier.namespace().toString(), tableIdentifier.name(), indexIdentifier.name()); + Index index = + catalog() + .buildIndex(indexIdentifier) + .withTableUuid(table.uuid()) + .withType(IndexType.BTREE) + .withIndexColumnIds(3, 4) + .withOptimizedColumnIds(3) + .withProperty("prop1", "val1") + .withProperty("prop2", "val2") + .withLocation(location) + .create(); + + assertThat(index).isNotNull(); + assertThat(catalog().indexExists(indexIdentifier)).as("Index should exist").isTrue(); + + if (!overridesRequestedLocation()) { + assertThat(index.location()).isEqualTo(location); + } else { + assertThat(index.location()).isNotNull(); + } + + // validate index settings + assertThat(index.uuid()).isNotNull(); + assertThat(index.name()).isNotNull(); + assertThat(index.type()).isEqualTo(IndexType.BTREE); + assertThat(index.indexColumnIds()).containsExactly(3, 4); + assertThat(index.optimizedColumnIds()).containsExactly(3); + assertThat(index.history()).hasSize(1); + assertThat(index.versions()).hasSize(1); + + assertThat(catalog().dropIndex(indexIdentifier)).isTrue(); + assertThat(catalog().indexExists(indexIdentifier)).as("Index should not exist").isFalse(); + } + + @Test + public void createIndexErrorCases() { + TableIdentifier tableIdentifier = TableIdentifier.of("ns", "table"); + IndexIdentifier indexIdentifier = IndexIdentifier.of(tableIdentifier, "error_index"); + + if (requiresNamespaceCreate()) { + catalog().createNamespace(tableIdentifier.namespace()); + } + + Table table = tableCatalog().buildTable(tableIdentifier, SCHEMA).create(); + assertThat(tableCatalog().tableExists(tableIdentifier)).as("Table should exist").isTrue(); + + assertThat(catalog().indexExists(indexIdentifier)).as("Index should not exist").isFalse(); + + // type is required + assertThatThrownBy( + () -> + catalog() + .buildIndex(indexIdentifier) + .withTableUuid(table.uuid()) + .withIndexColumnIds(3) + .create()) + .isInstanceOf(IllegalStateException.class) + .hasMessageContaining("Cannot create index without specifying a type"); + + // index column ids are required + assertThatThrownBy( + () -> + catalog() + .buildIndex(indexIdentifier) + .withTableUuid(table.uuid()) + .withType(IndexType.BTREE) + .create()) + .isInstanceOf(IllegalStateException.class) + .hasMessageContaining("Cannot create index without specifying index column ids"); + } + + @Test + public void createIndexThatAlreadyExists() { + TableIdentifier tableIdentifier = TableIdentifier.of("ns", "table"); + IndexIdentifier indexIdentifier = IndexIdentifier.of(tableIdentifier, "duplicate_index"); + + if (requiresNamespaceCreate()) { + catalog().createNamespace(tableIdentifier.namespace()); + } + + Table table = tableCatalog().buildTable(tableIdentifier, SCHEMA).create(); + assertThat(tableCatalog().tableExists(tableIdentifier)).as("Table should exist").isTrue(); + + assertThat(catalog().indexExists(indexIdentifier)).as("Index should not exist").isFalse(); + + Index index = + catalog() + .buildIndex(indexIdentifier) + .withTableUuid(table.uuid()) + .withType(IndexType.BTREE) + .withIndexColumnIds(3) + .withOptimizedColumnIds(3) + .create(); + + assertThat(index).isNotNull(); + assertThat(catalog().indexExists(indexIdentifier)).as("Index should exist").isTrue(); + + assertThatThrownBy( + () -> + catalog() + .buildIndex(indexIdentifier) + .withTableUuid(table.uuid()) + .withType(IndexType.TERM) + .withIndexColumnIds(4) + .withOptimizedColumnIds(4) + .create()) + .isInstanceOf(AlreadyExistsException.class) + .hasMessageContaining("Index already exists"); + } + + @Test + public void createIndexOnNonExistingTable() { + TableIdentifier tableIdentifier = TableIdentifier.of("ns", "non_existing_table"); + IndexIdentifier indexIdentifier = IndexIdentifier.of(tableIdentifier, "index"); + + if (requiresNamespaceCreate()) { + catalog().createNamespace(tableIdentifier.namespace()); + } + + assertThat(tableCatalog().tableExists(tableIdentifier)).as("Table should not exist").isFalse(); + + assertThatThrownBy( + () -> + catalog() + .buildIndex(indexIdentifier) + .withTableUuid(UUID.randomUUID()) + .withType(IndexType.BTREE) + .withIndexColumnIds(3) + .create()) + .isInstanceOf(NoSuchTableException.class) + .hasMessageContaining("Table does not exist"); + } + + @Test + public void listIndexes() { + TableIdentifier tableIdentifier = TableIdentifier.of("ns", "table"); + + if (requiresNamespaceCreate()) { + catalog().createNamespace(tableIdentifier.namespace()); + } + + Table table = tableCatalog().buildTable(tableIdentifier, SCHEMA).create(); + assertThat(tableCatalog().tableExists(tableIdentifier)).as("Table should exist").isTrue(); + + assertThat(catalog().listIndexes(tableIdentifier)).isEmpty(); + + IndexIdentifier index1Identifier = IndexIdentifier.of(tableIdentifier, "index1"); + catalog() + .buildIndex(index1Identifier) + .withTableUuid(table.uuid()) + .withType(IndexType.BTREE) + .withIndexColumnIds(3) + .withOptimizedColumnIds(3) + .create(); + + List indexes = catalog().listIndexes(tableIdentifier); + assertThat(indexes).hasSize(1); + assertThat(indexes.get(0).id()).isEqualTo(index1Identifier); + assertThat(indexes.get(0).type()).isEqualTo(IndexType.BTREE); + + IndexIdentifier index2Identifier = IndexIdentifier.of(tableIdentifier, "index2"); + catalog() + .buildIndex(index2Identifier) + .withTableUuid(table.uuid()) + .withType(IndexType.TERM) + .withIndexColumnIds(4) + .withOptimizedColumnIds(4) + .create(); + + indexes = catalog().listIndexes(tableIdentifier); + assertThat(indexes).hasSize(2); + + assertThat(catalog().dropIndex(index1Identifier)).isTrue(); + indexes = catalog().listIndexes(tableIdentifier); + assertThat(indexes).hasSize(1); + assertThat(indexes.get(0).id()).isEqualTo(index2Identifier); + + assertThat(catalog().dropIndex(index2Identifier)).isTrue(); + assertThat(catalog().listIndexes(tableIdentifier)).isEmpty(); + } + + @Test + public void listIndexesByType() { + TableIdentifier tableIdentifier = TableIdentifier.of("ns", "table"); + + if (requiresNamespaceCreate()) { + catalog().createNamespace(tableIdentifier.namespace()); + } + + Table table = tableCatalog().buildTable(tableIdentifier, SCHEMA).create(); + assertThat(tableCatalog().tableExists(tableIdentifier)).as("Table should exist").isTrue(); + + IndexIdentifier btreeIndexIdentifier = IndexIdentifier.of(tableIdentifier, "btree_index"); + catalog() + .buildIndex(btreeIndexIdentifier) + .withTableUuid(table.uuid()) + .withType(IndexType.BTREE) + .withIndexColumnIds(3) + .withOptimizedColumnIds(3) + .create(); + + IndexIdentifier termIndexIdentifier = IndexIdentifier.of(tableIdentifier, "term_index"); + catalog() + .buildIndex(termIndexIdentifier) + .withTableUuid(table.uuid()) + .withType(IndexType.TERM) + .withIndexColumnIds(4) + .withOptimizedColumnIds(4) + .create(); + + // list all indexes + List allIndexes = catalog().listIndexes(tableIdentifier); + assertThat(allIndexes).hasSize(2); + + // list only BTREE indexes + List btreeIndexes = catalog().listIndexes(tableIdentifier, IndexType.BTREE); + assertThat(btreeIndexes).hasSize(1); + assertThat(btreeIndexes.get(0).id()).isEqualTo(btreeIndexIdentifier); + assertThat(btreeIndexes.get(0).type()).isEqualTo(IndexType.BTREE); + + // list only TERM indexes + List termIndexes = catalog().listIndexes(tableIdentifier, IndexType.TERM); + assertThat(termIndexes).hasSize(1); + assertThat(termIndexes.get(0).id()).isEqualTo(termIndexIdentifier); + assertThat(termIndexes.get(0).type()).isEqualTo(IndexType.TERM); + + // list IVF indexes (should be empty) + List ivfIndexes = catalog().listIndexes(tableIdentifier, IndexType.IVF); + assertThat(ivfIndexes).isEmpty(); + + // list BTREE and TERM indexes + List multiTypeIndexes = + catalog().listIndexes(tableIdentifier, IndexType.BTREE, IndexType.TERM); + assertThat(multiTypeIndexes).hasSize(2); + } + + @Test + public void listIndexesOnNonExistingTable() { + TableIdentifier tableIdentifier = TableIdentifier.of("ns", "non_existing_table"); + + if (requiresNamespaceCreate()) { + catalog().createNamespace(tableIdentifier.namespace()); + } + + assertThat(tableCatalog().tableExists(tableIdentifier)).as("Table should not exist").isFalse(); + + assertThatThrownBy(() -> catalog().listIndexes(tableIdentifier)) + .isInstanceOf(NoSuchTableException.class) + .hasMessageContaining("Table does not exist"); + } + + @ParameterizedTest(name = ".createOrReplace() = {arguments}") + @ValueSource(booleans = {false, true}) + public void createOrReplaceIndex(boolean useCreateOrReplace) { + TableIdentifier tableIdentifier = TableIdentifier.of("ns", "table"); + IndexIdentifier indexIdentifier = IndexIdentifier.of(tableIdentifier, "replace_index"); + + if (requiresNamespaceCreate()) { + catalog().createNamespace(tableIdentifier.namespace()); + } + + Table table = tableCatalog().buildTable(tableIdentifier, SCHEMA).create(); + assertThat(tableCatalog().tableExists(tableIdentifier)).as("Table should exist").isTrue(); + + assertThat(catalog().indexExists(indexIdentifier)).as("Index should not exist").isFalse(); + + IndexBuilder indexBuilder = + catalog() + .buildIndex(indexIdentifier) + .withTableUuid(table.uuid()) + .withType(IndexType.BTREE) + .withIndexColumnIds(3) + .withOptimizedColumnIds(3) + .withProperty("prop1", "val1"); + Index index = useCreateOrReplace ? indexBuilder.createOrReplace() : indexBuilder.create(); + + assertThat(catalog().indexExists(indexIdentifier)).as("Index should exist").isTrue(); + assertThat(index.type()).isEqualTo(IndexType.BTREE); + assertThat(index.indexColumnIds()).containsExactly(3); + + IndexVersion indexVersion = index.currentVersion(); + + indexBuilder = catalog().buildIndex(indexIdentifier).withProperty("replacedProp1", "val1"); + Index replacedIndex = + useCreateOrReplace ? indexBuilder.createOrReplace() : indexBuilder.replace(); + + // validate replaced index settings + assertThat(replacedIndex.name()).isNotNull(); + assertThat(replacedIndex.type()).isEqualTo(IndexType.BTREE); + assertThat(replacedIndex.indexColumnIds()).containsExactly(3); + assertThat(replacedIndex.optimizedColumnIds()).containsExactly(3); + assertThat(replacedIndex.history()).hasSize(2); + assertThat(replacedIndex.versions()) + .hasSize(2) + .containsExactly(indexVersion, replacedIndex.currentVersion()); + + assertThat(catalog().dropIndex(indexIdentifier)).isTrue(); + assertThat(catalog().indexExists(indexIdentifier)).as("Index should not exist").isFalse(); + } + + @Test + public void replaceIndexErrorCases() { + TableIdentifier tableIdentifier = TableIdentifier.of("ns", "table"); + IndexIdentifier indexIdentifier = IndexIdentifier.of(tableIdentifier, "replace_error_index"); + + if (requiresNamespaceCreate()) { + catalog().createNamespace(tableIdentifier.namespace()); + } + + Table table = tableCatalog().buildTable(tableIdentifier, SCHEMA).create(); + assertThat(tableCatalog().tableExists(tableIdentifier)).as("Table should exist").isTrue(); + + assertThat(catalog().indexExists(indexIdentifier)).as("Index should not exist").isFalse(); + + catalog() + .buildIndex(indexIdentifier) + .withTableUuid(table.uuid()) + .withType(IndexType.BTREE) + .withIndexColumnIds(3) + .withOptimizedColumnIds(3) + .create(); + + assertThat(catalog().indexExists(indexIdentifier)).as("Index should exist").isTrue(); + + assertThatThrownBy( + () -> catalog().buildIndex(indexIdentifier).withType(IndexType.BTREE).replace()) + .isInstanceOf(IllegalStateException.class) + .hasMessageContaining("Cannot update index type"); + + assertThatThrownBy(() -> catalog().buildIndex(indexIdentifier).withIndexColumnIds(3).replace()) + .isInstanceOf(IllegalStateException.class) + .hasMessageContaining("Cannot update index column ids"); + + assertThatThrownBy( + () -> catalog().buildIndex(indexIdentifier).withOptimizedColumnIds(3).replace()) + .isInstanceOf(IllegalStateException.class) + .hasMessageContaining("Cannot update optimized column ids"); + + // cannot replace non-existing index + IndexIdentifier nonExistingIdentifier = IndexIdentifier.of(tableIdentifier, "non_existing"); + assertThatThrownBy( + () -> + catalog() + .buildIndex(nonExistingIdentifier) + .withType(IndexType.BTREE) + .withIndexColumnIds(3) + .replace()) + .isInstanceOf(NoSuchIndexException.class) + .hasMessageContaining("Index does not exist"); + } + + @Test + public void updateIndexProperties() { + TableIdentifier tableIdentifier = TableIdentifier.of("ns", "table"); + IndexIdentifier indexIdentifier = IndexIdentifier.of(tableIdentifier, "props_index"); + + if (requiresNamespaceCreate()) { + catalog().createNamespace(tableIdentifier.namespace()); + } + + Table table = tableCatalog().buildTable(tableIdentifier, SCHEMA).create(); + assertThat(tableCatalog().tableExists(tableIdentifier)).as("Table should exist").isTrue(); + + assertThat(catalog().indexExists(indexIdentifier)).as("Index should not exist").isFalse(); + + Index index = + catalog() + .buildIndex(indexIdentifier) + .withTableUuid(table.uuid()) + .withType(IndexType.BTREE) + .withIndexColumnIds(3) + .withOptimizedColumnIds(3) + .create(); + + index.addVersion().withProperty("key1", "val1").withProperty("key2", "val2").commit(); + + Index updatedIndex = catalog().loadIndex(indexIdentifier); + + // a new version should be added to the index history after updating index properties + assertThat(updatedIndex.history()).hasSize(2).isEqualTo(index.history()); + assertThat(updatedIndex.versions()) + .hasSize(2) + .containsExactlyInAnyOrderElementsOf(index.versions()); + + assertThat(catalog().dropIndex(indexIdentifier)).isTrue(); + assertThat(catalog().indexExists(indexIdentifier)).as("Index should not exist").isFalse(); + } + + @Test + public void updateIndexPropertiesErrorCases() { + TableIdentifier tableIdentifier = TableIdentifier.of("ns", "table"); + IndexIdentifier indexIdentifier = IndexIdentifier.of(tableIdentifier, "props_error_index"); + + if (requiresNamespaceCreate()) { + catalog().createNamespace(tableIdentifier.namespace()); + } + + Table table = tableCatalog().buildTable(tableIdentifier, SCHEMA).create(); + assertThat(tableCatalog().tableExists(tableIdentifier)).as("Table should exist").isTrue(); + + assertThat(catalog().indexExists(indexIdentifier)).as("Index should not exist").isFalse(); + + catalog() + .buildIndex(indexIdentifier) + .withTableUuid(table.uuid()) + .withType(IndexType.BTREE) + .withIndexColumnIds(3) + .withOptimizedColumnIds(3) + .create(); + + assertThat(catalog().indexExists(indexIdentifier)).as("Index should exist").isTrue(); + + assertThatThrownBy( + () -> + catalog() + .loadIndex(indexIdentifier) + .addVersion() + .withProperty(null, "new-val1") + .commit()) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Invalid key: null"); + + assertThatThrownBy( + () -> + catalog() + .loadIndex(indexIdentifier) + .addVersion() + .withProperty("key1", null) + .commit()) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Invalid value: null"); + } + + @Test + public void updateIndexPropertiesConflict() { + TableIdentifier tableIdentifier = TableIdentifier.of("ns", "table"); + IndexIdentifier indexIdentifier = IndexIdentifier.of(tableIdentifier, "props_conflict_index"); + + if (requiresNamespaceCreate()) { + catalog().createNamespace(tableIdentifier.namespace()); + } + + Table table = tableCatalog().buildTable(tableIdentifier, SCHEMA).create(); + assertThat(tableCatalog().tableExists(tableIdentifier)).as("Table should exist").isTrue(); + + assertThat(catalog().indexExists(indexIdentifier)).as("Index should not exist").isFalse(); + + Index index = + catalog() + .buildIndex(indexIdentifier) + .withTableUuid(table.uuid()) + .withType(IndexType.BTREE) + .withIndexColumnIds(3) + .withOptimizedColumnIds(3) + .create(); + + assertThat(catalog().indexExists(indexIdentifier)).as("Index should exist").isTrue(); + AddIndexVersion addIndexVersion = index.addVersion(); + + // drop index and then try to use the updateProperties API + catalog().dropIndex(indexIdentifier); + assertThat(catalog().indexExists(indexIdentifier)).as("Index should not exist").isFalse(); + + assertThatThrownBy(() -> addIndexVersion.withProperty("key1", "val1").commit()) + .isInstanceOf(NoSuchIndexException.class) + .hasMessageContaining("Index does not exist"); + } + + @Test + public void createIndexConflict() { + TableIdentifier tableIdentifier = TableIdentifier.of("ns", "table"); + IndexIdentifier indexIdentifier = IndexIdentifier.of(tableIdentifier, "conflict_index"); + + if (requiresNamespaceCreate()) { + catalog().createNamespace(tableIdentifier.namespace()); + } + + Table table = tableCatalog().buildTable(tableIdentifier, SCHEMA).create(); + assertThat(tableCatalog().tableExists(tableIdentifier)).as("Table should exist").isTrue(); + + assertThat(catalog().indexExists(indexIdentifier)).as("Index should not exist").isFalse(); + IndexBuilder indexBuilder = catalog().buildIndex(indexIdentifier); + + catalog() + .buildIndex(indexIdentifier) + .withTableUuid(table.uuid()) + .withType(IndexType.BTREE) + .withIndexColumnIds(3) + .withOptimizedColumnIds(3) + .create(); + + assertThat(catalog().indexExists(indexIdentifier)).as("Index should exist").isTrue(); + + // the index was already created concurrently + assertThatThrownBy( + () -> + indexBuilder + .withTableUuid(table.uuid()) + .withType(IndexType.TERM) + .withIndexColumnIds(4) + .withOptimizedColumnIds(4) + .create()) + .isInstanceOf(AlreadyExistsException.class) + .hasMessageContaining("Index already exists"); + } + + @Test + public void replaceIndexConflict() { + TableIdentifier tableIdentifier = TableIdentifier.of("ns", "table"); + IndexIdentifier indexIdentifier = IndexIdentifier.of(tableIdentifier, "replace_conflict_index"); + + if (requiresNamespaceCreate()) { + catalog().createNamespace(tableIdentifier.namespace()); + } + + Table table = tableCatalog().buildTable(tableIdentifier, SCHEMA).create(); + assertThat(tableCatalog().tableExists(tableIdentifier)).as("Table should exist").isTrue(); + + assertThat(catalog().indexExists(indexIdentifier)).as("Index should not exist").isFalse(); + + catalog() + .buildIndex(indexIdentifier) + .withTableUuid(table.uuid()) + .withType(IndexType.BTREE) + .withIndexColumnIds(3) + .withOptimizedColumnIds(3) + .create(); + + assertThat(catalog().indexExists(indexIdentifier)).as("Index should exist").isTrue(); + IndexBuilder indexBuilder = catalog().buildIndex(indexIdentifier); + + catalog().dropIndex(indexIdentifier); + assertThat(catalog().indexExists(indexIdentifier)).as("Index should not exist").isFalse(); + + // the index was already dropped concurrently + assertThatThrownBy( + () -> + indexBuilder + .withTableUuid(table.uuid()) + .withType(IndexType.BTREE) + .withIndexColumnIds(3, 4) + .withOptimizedColumnIds(3, 4) + .replace()) + .isInstanceOf(NoSuchIndexException.class) + .hasMessageContaining("Index does not exist"); + } + + @Test + public void createAndReplaceIndexWithLocation() { + TableIdentifier tableIdentifier = TableIdentifier.of("ns", "table"); + IndexIdentifier indexIdentifier = IndexIdentifier.of(tableIdentifier, "location_index"); + + if (requiresNamespaceCreate()) { + catalog().createNamespace(tableIdentifier.namespace()); + } + + Table table = tableCatalog().buildTable(tableIdentifier, SCHEMA).create(); + assertThat(tableCatalog().tableExists(tableIdentifier)).as("Table should exist").isTrue(); + + assertThat(catalog().indexExists(indexIdentifier)).as("Index should not exist").isFalse(); + + String location = + indexLocation( + tableIdentifier.namespace().toString(), tableIdentifier.name(), indexIdentifier.name()); + Index index = + catalog() + .buildIndex(indexIdentifier) + .withTableUuid(table.uuid()) + .withType(IndexType.BTREE) + .withIndexColumnIds(3) + .withOptimizedColumnIds(3) + .withLocation(location) + .create(); + + assertThat(catalog().indexExists(indexIdentifier)).as("Index should exist").isTrue(); + + if (!overridesRequestedLocation()) { + assertThat(index.location()).isEqualTo(location); + } else { + assertThat(index.location()).isNotNull(); + } + + String updatedLocation = indexLocation("updated", "ns", "table", "location_index"); + index = catalog().buildIndex(indexIdentifier).withLocation(updatedLocation).replace(); + + if (!overridesRequestedLocation()) { + assertThat(index.location()).isEqualTo(updatedLocation); + } else { + assertThat(index.location()).isNotNull(); + } + + assertThat(catalog().dropIndex(indexIdentifier)).isTrue(); + assertThat(catalog().indexExists(indexIdentifier)).as("Index should not exist").isFalse(); + } + + @Test + public void updateIndexLocation() { + TableIdentifier tableIdentifier = TableIdentifier.of("ns", "table"); + IndexIdentifier indexIdentifier = IndexIdentifier.of(tableIdentifier, "update_location_index"); + + if (requiresNamespaceCreate()) { + catalog().createNamespace(tableIdentifier.namespace()); + } + + Table table = tableCatalog().buildTable(tableIdentifier, SCHEMA).create(); + assertThat(tableCatalog().tableExists(tableIdentifier)).as("Table should exist").isTrue(); + + assertThat(catalog().indexExists(indexIdentifier)).as("Index should not exist").isFalse(); + + String location = + indexLocation( + tableIdentifier.namespace().toString(), tableIdentifier.name(), indexIdentifier.name()); + Index index = + catalog() + .buildIndex(indexIdentifier) + .withTableUuid(table.uuid()) + .withType(IndexType.BTREE) + .withIndexColumnIds(3) + .withOptimizedColumnIds(3) + .withLocation(location) + .create(); + + assertThat(catalog().indexExists(indexIdentifier)).as("Index should exist").isTrue(); + if (!overridesRequestedLocation()) { + assertThat(index.location()).isEqualTo(location); + } else { + assertThat(index.location()).isNotNull(); + } + + String updatedLocation = indexLocation("updated", "ns", "table", "update_location_index"); + index.updateLocation().setLocation(updatedLocation).commit(); + + Index updatedIndex = catalog().loadIndex(indexIdentifier); + + if (!overridesRequestedLocation()) { + assertThat(updatedIndex.location()).isEqualTo(updatedLocation); + } else { + assertThat(index.location()).isNotNull(); + } + + // history and index versions should stay the same after updating index location + assertThat(updatedIndex.history()).hasSize(1).isEqualTo(index.history()); + assertThat(updatedIndex.versions()).hasSize(1).containsExactly(index.currentVersion()); + + assertThat(catalog().dropIndex(indexIdentifier)).isTrue(); + assertThat(catalog().indexExists(indexIdentifier)).as("Index should not exist").isFalse(); + } + + @Test + public void updateIndexLocationConflict() { + TableIdentifier tableIdentifier = TableIdentifier.of("ns", "table"); + IndexIdentifier indexIdentifier = + IndexIdentifier.of(tableIdentifier, "location_conflict_index"); + + if (requiresNamespaceCreate()) { + catalog().createNamespace(tableIdentifier.namespace()); + } + + Table table = tableCatalog().buildTable(tableIdentifier, SCHEMA).create(); + assertThat(tableCatalog().tableExists(tableIdentifier)).as("Table should exist").isTrue(); + + assertThat(catalog().indexExists(indexIdentifier)).as("Index should not exist").isFalse(); + + Index index = + catalog() + .buildIndex(indexIdentifier) + .withTableUuid(table.uuid()) + .withType(IndexType.BTREE) + .withIndexColumnIds(3) + .withOptimizedColumnIds(3) + .create(); + + assertThat(catalog().indexExists(indexIdentifier)).as("Index should exist").isTrue(); + + // new location must be non-null + assertThatThrownBy(() -> index.updateLocation().setLocation(null).commit()) + .isInstanceOf(IllegalStateException.class) + .hasMessage("Invalid index location: null"); + + org.apache.iceberg.UpdateLocation updateIndexLocation = index.updateLocation(); + + catalog().dropIndex(indexIdentifier); + assertThat(catalog().indexExists(indexIdentifier)).as("Index should not exist").isFalse(); + + // the index was already dropped concurrently + assertThatThrownBy(() -> updateIndexLocation.setLocation("new-location").commit()) + .isInstanceOf(NoSuchIndexException.class) + .hasMessageContaining("Index does not exist"); + } + + @Test + public void dropNonExistingIndex() { + TableIdentifier tableIdentifier = TableIdentifier.of("ns", "table"); + IndexIdentifier indexIdentifier = IndexIdentifier.of(tableIdentifier, "non_existing"); + + if (requiresNamespaceCreate()) { + catalog().createNamespace(tableIdentifier.namespace()); + } + + tableCatalog().buildTable(tableIdentifier, SCHEMA).create(); + assertThat(tableCatalog().tableExists(tableIdentifier)).as("Table should exist").isTrue(); + + assertThat(catalog().indexExists(indexIdentifier)).as("Index should not exist").isFalse(); + assertThat(catalog().dropIndex(indexIdentifier)).isFalse(); + } + + @Test + public void invalidateIndex() { + TableIdentifier tableIdentifier = TableIdentifier.of("ns", "table"); + IndexIdentifier indexIdentifier = IndexIdentifier.of(tableIdentifier, "invalidate_index"); + + if (requiresNamespaceCreate()) { + catalog().createNamespace(tableIdentifier.namespace()); + } + + Table table = tableCatalog().buildTable(tableIdentifier, SCHEMA).create(); + assertThat(tableCatalog().tableExists(tableIdentifier)).as("Table should exist").isTrue(); + + assertThat(catalog().indexExists(indexIdentifier)).as("Index should not exist").isFalse(); + + catalog() + .buildIndex(indexIdentifier) + .withTableUuid(table.uuid()) + .withType(IndexType.BTREE) + .withIndexColumnIds(3) + .withOptimizedColumnIds(3) + .create(); + + assertThat(catalog().indexExists(indexIdentifier)).as("Index should exist").isTrue(); + + // invalidate should not throw for existing index + catalog().invalidateIndex(indexIdentifier); + assertThat(catalog().indexExists(indexIdentifier)).as("Index should still exist").isTrue(); + + // invalidate should not throw for non-existing index + IndexIdentifier nonExistingIdentifier = IndexIdentifier.of(tableIdentifier, "non_existing"); + catalog().invalidateIndex(nonExistingIdentifier); + + assertThat(catalog().dropIndex(indexIdentifier)).isTrue(); + } + + @Test + public void multipleIndexTypesOnSameTable() { + TableIdentifier tableIdentifier = TableIdentifier.of("ns", "table"); + + if (requiresNamespaceCreate()) { + catalog().createNamespace(tableIdentifier.namespace()); + } + + Table table = tableCatalog().buildTable(tableIdentifier, SCHEMA).create(); + assertThat(tableCatalog().tableExists(tableIdentifier)).as("Table should exist").isTrue(); + + // Create different types of indexes on the same table + IndexIdentifier btreeIdentifier = IndexIdentifier.of(tableIdentifier, "btree_idx"); + Index btreeIndex = + catalog() + .buildIndex(btreeIdentifier) + .withTableUuid(table.uuid()) + .withType(IndexType.BTREE) + .withIndexColumnIds(3) + .withOptimizedColumnIds(3) + .create(); + + IndexIdentifier termIdentifier = IndexIdentifier.of(tableIdentifier, "term_idx"); + Index termIndex = + catalog() + .buildIndex(termIdentifier) + .withTableUuid(table.uuid()) + .withType(IndexType.TERM) + .withIndexColumnIds(4) + .withOptimizedColumnIds(4) + .create(); + + IndexIdentifier ivfIdentifier = IndexIdentifier.of(tableIdentifier, "ivf_idx"); + Index ivfIndex = + catalog() + .buildIndex(ivfIdentifier) + .withTableUuid(table.uuid()) + .withType(IndexType.IVF) + .withIndexColumnIds(3, 4) + .withOptimizedColumnIds(3) + .create(); + + assertThat(btreeIndex.type()).isEqualTo(IndexType.BTREE); + assertThat(termIndex.type()).isEqualTo(IndexType.TERM); + assertThat(ivfIndex.type()).isEqualTo(IndexType.IVF); + + List allIndexes = catalog().listIndexes(tableIdentifier); + assertThat(allIndexes).hasSize(3); + + // Clean up + assertThat(catalog().dropIndex(btreeIdentifier)).isTrue(); + assertThat(catalog().dropIndex(termIdentifier)).isTrue(); + assertThat(catalog().dropIndex(ivfIdentifier)).isTrue(); + } + + @Test + public void createIndexWithSnapshot() { + TableIdentifier tableIdentifier = TableIdentifier.of("ns", "table"); + IndexIdentifier indexIdentifier = IndexIdentifier.of(tableIdentifier, "snapshot_index"); + + if (requiresNamespaceCreate()) { + catalog().createNamespace(tableIdentifier.namespace()); + } + + Table table = tableCatalog().buildTable(tableIdentifier, SCHEMA).create(); + assertThat(tableCatalog().tableExists(tableIdentifier)).as("Table should exist").isTrue(); + + assertThat(catalog().indexExists(indexIdentifier)).as("Index should not exist").isFalse(); + + Index index = + catalog() + .buildIndex(indexIdentifier) + .withTableUuid(table.uuid()) + .withType(IndexType.BTREE) + .withIndexColumnIds(3) + .withOptimizedColumnIds(3) + .withProperty("prop1", "val1") + .withTableSnapshotId(100L) + .withIndexSnapshotId(1L) + .withSnapshotProperty("snap_prop1", "snap_val1") + .create(); + + assertThat(index).isNotNull(); + assertThat(catalog().indexExists(indexIdentifier)).as("Index should exist").isTrue(); + + // Validate index snapshot + assertThat(index.snapshots()).hasSize(1); + IndexSnapshot snapshot = index.snapshots().get(0); + assertThat(snapshot.tableSnapshotId()).isEqualTo(100L); + assertThat(snapshot.indexSnapshotId()).isEqualTo(1L); + assertThat(snapshot.versionId()).isEqualTo(1); + assertThat(snapshot.properties()).containsEntry("snap_prop1", "snap_val1"); + + assertThat(catalog().dropIndex(indexIdentifier)).isTrue(); + assertThat(catalog().indexExists(indexIdentifier)).as("Index should not exist").isFalse(); + } + + @Test + public void addSnapshotToExistingIndex() { + TableIdentifier tableIdentifier = TableIdentifier.of("ns", "table"); + IndexIdentifier indexIdentifier = IndexIdentifier.of(tableIdentifier, "add_snapshot_index"); + + if (requiresNamespaceCreate()) { + catalog().createNamespace(tableIdentifier.namespace()); + } + + Table table = tableCatalog().buildTable(tableIdentifier, SCHEMA).create(); + assertThat(tableCatalog().tableExists(tableIdentifier)).as("Table should exist").isTrue(); + + // Create index without snapshot + Index index = + catalog() + .buildIndex(indexIdentifier) + .withTableUuid(table.uuid()) + .withType(IndexType.BTREE) + .withIndexColumnIds(3) + .withOptimizedColumnIds(3) + .withProperty("prop1", "val1") + .create(); + + assertThat(index.snapshots()).isEmpty(); + + // Add snapshot via the index snapshot builder + catalog() + .loadIndex(indexIdentifier) + .addIndexSnapshot() + .withTableSnapshotId(100L) + .withIndexSnapshotId(1L) + .withSnapshotProperty("snap_prop", "snap_val") + .commit(); + + Index updatedIndex = catalog().loadIndex(indexIdentifier); + assertThat(updatedIndex.snapshots()).hasSize(1); + IndexSnapshot snapshot = updatedIndex.snapshots().get(0); + assertThat(snapshot.tableSnapshotId()).isEqualTo(100L); + assertThat(snapshot.indexSnapshotId()).isEqualTo(1L); + + assertThat(catalog().dropIndex(indexIdentifier)).isTrue(); + } + + @Test + public void removeSnapshotsFromIndex() { + TableIdentifier tableIdentifier = TableIdentifier.of("ns", "table"); + IndexIdentifier indexIdentifier = IndexIdentifier.of(tableIdentifier, "remove_snapshot_index"); + + if (requiresNamespaceCreate()) { + catalog().createNamespace(tableIdentifier.namespace()); + } + + Table table = tableCatalog().buildTable(tableIdentifier, SCHEMA).create(); + assertThat(tableCatalog().tableExists(tableIdentifier)).as("Table should exist").isTrue(); + + // Create index with a snapshot + Index index = + catalog() + .buildIndex(indexIdentifier) + .withTableUuid(table.uuid()) + .withType(IndexType.BTREE) + .withIndexColumnIds(3) + .withOptimizedColumnIds(3) + .withProperty("prop1", "val1") + .withTableSnapshotId(100L) + .withIndexSnapshotId(1L) + .create(); + + assertThat(index.snapshots()).hasSize(1); + long snapshotIdToRemove = index.snapshots().get(0).indexSnapshotId(); + + // Remove the snapshot using the builder + catalog() + .loadIndex(indexIdentifier) + .removeIndexSnapshots() + .removeSnapshotById(snapshotIdToRemove) + .commit(); + + Index updatedIndex = catalog().loadIndex(indexIdentifier); + assertThat(updatedIndex.snapshots()).isEmpty(); + + assertThat(catalog().dropIndex(indexIdentifier)).isTrue(); + } + + @Test + public void removeMultipleSnapshotsFromIndex() { + TableIdentifier tableIdentifier = TableIdentifier.of("ns", "table"); + IndexIdentifier indexIdentifier = + IndexIdentifier.of(tableIdentifier, "remove_multi_snapshot_index"); + + if (requiresNamespaceCreate()) { + catalog().createNamespace(tableIdentifier.namespace()); + } + + Table table = tableCatalog().buildTable(tableIdentifier, SCHEMA).create(); + assertThat(tableCatalog().tableExists(tableIdentifier)).as("Table should exist").isTrue(); + + // Create index with first snapshot + Index index = + catalog() + .buildIndex(indexIdentifier) + .withTableUuid(table.uuid()) + .withType(IndexType.BTREE) + .withIndexColumnIds(3) + .withOptimizedColumnIds(3) + .withProperty("prop1", "val1") + .withTableSnapshotId(100L) + .withIndexSnapshotId(1L) + .create(); + + assertThat(index.snapshots()).hasSize(1); + + // Add second snapshot via replace + index = + catalog() + .buildIndex(indexIdentifier) + .withTableUuid(table.uuid()) + .withTableSnapshotId(200L) + .withIndexSnapshotId(2L) + .replace(); + + assertThat(index.snapshots()).hasSize(2); + + // Add third snapshot via replace + index = + catalog() + .buildIndex(indexIdentifier) + .withTableUuid(table.uuid()) + .withTableSnapshotId(300L) + .withIndexSnapshotId(3L) + .replace(); + + assertThat(index.snapshots()).hasSize(3); + + // Remove multiple snapshots at once + catalog() + .loadIndex(indexIdentifier) + .removeIndexSnapshots() + .removeSnapshotsByIds(1L, 3L) + .commit(); + + Index updatedIndex = catalog().loadIndex(indexIdentifier); + assertThat(updatedIndex.snapshots()).hasSize(1); + assertThat(updatedIndex.snapshots().get(0).indexSnapshotId()).isEqualTo(2L); + + assertThat(catalog().dropIndex(indexIdentifier)).isTrue(); + } + + @Test + public void removeNonExistentSnapshotFromIndex() { + TableIdentifier tableIdentifier = TableIdentifier.of("ns", "table"); + IndexIdentifier indexIdentifier = + IndexIdentifier.of(tableIdentifier, "remove_nonexistent_snapshot_index"); + + if (requiresNamespaceCreate()) { + catalog().createNamespace(tableIdentifier.namespace()); + } + + Table table = tableCatalog().buildTable(tableIdentifier, SCHEMA).create(); + assertThat(tableCatalog().tableExists(tableIdentifier)).as("Table should exist").isTrue(); + + // Create index with a snapshot + Index index = + catalog() + .buildIndex(indexIdentifier) + .withTableUuid(table.uuid()) + .withType(IndexType.BTREE) + .withIndexColumnIds(3) + .withOptimizedColumnIds(3) + .withProperty("prop1", "val1") + .withTableSnapshotId(100L) + .withIndexSnapshotId(1L) + .create(); + + assertThat(index.snapshots()).hasSize(1); + + // Removing non-existent snapshot should not throw, just do nothing + catalog().loadIndex(indexIdentifier).removeIndexSnapshots().removeSnapshotById(999L).commit(); + + Index updatedIndex = catalog().loadIndex(indexIdentifier); + // Original snapshot should still be there + assertThat(updatedIndex.snapshots()).hasSize(1); + + assertThat(catalog().dropIndex(indexIdentifier)).isTrue(); + } + + @Test + public void snapshotLookupMethods() { + TableIdentifier tableIdentifier = TableIdentifier.of("ns", "table"); + IndexIdentifier indexIdentifier = IndexIdentifier.of(tableIdentifier, "lookup_snapshot_index"); + + if (requiresNamespaceCreate()) { + catalog().createNamespace(tableIdentifier.namespace()); + } + + Table table = tableCatalog().buildTable(tableIdentifier, SCHEMA).create(); + assertThat(tableCatalog().tableExists(tableIdentifier)).as("Table should exist").isTrue(); + + // Create index with a snapshot + Index index = + catalog() + .buildIndex(indexIdentifier) + .withTableUuid(table.uuid()) + .withType(IndexType.BTREE) + .withIndexColumnIds(3) + .withOptimizedColumnIds(3) + .withProperty("prop1", "val1") + .withTableSnapshotId(100L) + .withIndexSnapshotId(1L) + .create(); + + // Test snapshot lookup by index snapshot ID + IndexSnapshot snapshotById = index.snapshot(1L); + assertThat(snapshotById).isNotNull(); + assertThat(snapshotById.indexSnapshotId()).isEqualTo(1L); + assertThat(snapshotById.tableSnapshotId()).isEqualTo(100L); + + // Test snapshot lookup by table snapshot ID + IndexSnapshot snapshotByTableId = index.snapshotForTableSnapshot(100L); + assertThat(snapshotByTableId).isNotNull(); + assertThat(snapshotByTableId.indexSnapshotId()).isEqualTo(1L); + + // Test non-existent lookups + assertThat(index.snapshot(999L)).isNull(); + assertThat(index.snapshotForTableSnapshot(999L)).isNull(); + + assertThat(catalog().dropIndex(indexIdentifier)).isTrue(); + } + + @Test + public void concurrentAddIndexSnapshot() { + TableIdentifier tableIdentifier = TableIdentifier.of("ns", "table"); + IndexIdentifier indexIdentifier = + IndexIdentifier.of(tableIdentifier, "concurrent_snapshot_index"); + + if (requiresNamespaceCreate()) { + catalog().createNamespace(tableIdentifier.namespace()); + } + + Table table = tableCatalog().buildTable(tableIdentifier, SCHEMA).create(); + assertThat(tableCatalog().tableExists(tableIdentifier)).as("Table should exist").isTrue(); + + assertThat(catalog().indexExists(indexIdentifier)).as("Index should not exist").isFalse(); + + Index index = + catalog() + .buildIndex(indexIdentifier) + .withTableUuid(table.uuid()) + .withType(IndexType.BTREE) + .withIndexColumnIds(3) + .withOptimizedColumnIds(3) + .create(); + + assertThat(catalog().indexExists(indexIdentifier)).as("Index should exist").isTrue(); + assertThat(index.snapshots()).isEmpty(); + + AddIndexSnapshot addSnapshotOne = + index + .addIndexSnapshot() + .withTableSnapshotId(100L) + .withIndexSnapshotId(1L) + .withSnapshotProperty("source", "snapshot-one"); + + AddIndexSnapshot addSnapshotTwo = + index + .addIndexSnapshot() + .withTableSnapshotId(200L) + .withIndexSnapshotId(2L) + .withSnapshotProperty("source", "snapshot-two"); + + // simulate a concurrent add of the index snapshot + IndexOperations indexOps = ((BaseIndex) index).operations(); + IndexMetadata current = indexOps.current(); + + IndexMetadata firstUpdate = ((IndexSnapshotAdd) addSnapshotOne).internalApply(); + IndexMetadata secondUpdate = ((IndexSnapshotAdd) addSnapshotTwo).internalApply(); + + indexOps.commit(current, firstUpdate); + + if (supportsServerSideRetry()) { + // retry should succeed and the changes should be applied + indexOps.commit(current, secondUpdate); + + Index updatedIndex = catalog().loadIndex(indexIdentifier); + assertThat(updatedIndex.snapshots()).hasSize(2); + + IndexSnapshot snapshot1 = updatedIndex.snapshot(1L); + assertThat(snapshot1).isNotNull(); + assertThat(snapshot1.tableSnapshotId()).isEqualTo(100L); + assertThat(snapshot1.properties()).containsEntry("source", "snapshot-one"); + + IndexSnapshot snapshot2 = updatedIndex.snapshot(2L); + assertThat(snapshot2).isNotNull(); + assertThat(snapshot2.tableSnapshotId()).isEqualTo(200L); + assertThat(snapshot2.properties()).containsEntry("source", "snapshot-two"); + } else { + assertThatThrownBy(() -> indexOps.commit(current, secondUpdate)) + .isInstanceOf(CommitFailedException.class) + .hasMessageContaining("Cannot commit"); + + Index updatedIndex = catalog().loadIndex(indexIdentifier); + assertThat(updatedIndex.snapshots()).hasSize(1); + + IndexSnapshot snapshot1 = updatedIndex.snapshot(1L); + assertThat(snapshot1).isNotNull(); + assertThat(snapshot1.tableSnapshotId()).isEqualTo(100L); + assertThat(snapshot1.properties()).containsEntry("source", "snapshot-one"); + + assertThat(updatedIndex.snapshot(2L)).isNull(); + } + + assertThat(catalog().dropIndex(indexIdentifier)).isTrue(); + assertThat(catalog().indexExists(indexIdentifier)).as("Index should not exist").isFalse(); + } + + @Test + public void concurrentAddVersion() { + TableIdentifier tableIdentifier = TableIdentifier.of("ns", "table"); + IndexIdentifier indexIdentifier = + IndexIdentifier.of(tableIdentifier, "concurrent_update_props_index"); + + if (requiresNamespaceCreate()) { + catalog().createNamespace(tableIdentifier.namespace()); + } + + Table table = tableCatalog().buildTable(tableIdentifier, SCHEMA).create(); + assertThat(tableCatalog().tableExists(tableIdentifier)).as("Table should exist").isTrue(); + + assertThat(catalog().indexExists(indexIdentifier)).as("Index should not exist").isFalse(); + + Index index = + catalog() + .buildIndex(indexIdentifier) + .withTableUuid(table.uuid()) + .withType(IndexType.BTREE) + .withIndexColumnIds(3) + .withOptimizedColumnIds(3) + .create(); + + assertThat(catalog().indexExists(indexIdentifier)).as("Index should exist").isTrue(); + + AddIndexVersion updatePropsOne = + index.addVersion().withProperty("key1", "value1").withProperty("source1", "update-one"); + + AddIndexVersion updatePropsTwo = + index.addVersion().withProperty("key2", "value2").withProperty("source2", "update-two"); + + // simulate a concurrent update of the index properties + IndexOperations indexOps = ((BaseIndex) index).operations(); + IndexMetadata current = indexOps.current(); + + IndexMetadata firstUpdate = ((IndexVersionAdd) updatePropsOne).internalApply(); + IndexMetadata secondUpdate = ((IndexVersionAdd) updatePropsTwo).internalApply(); + + indexOps.commit(current, firstUpdate); + + if (supportsServerSideRetry()) { + // retry should succeed and the changes should be applied + indexOps.commit(current, secondUpdate); + + Index updatedIndex = catalog().loadIndex(indexIdentifier); + Map properties = updatedIndex.currentVersion().properties(); + assertThat(properties).isEqualTo(ImmutableMap.of("key2", "value2", "source2", "update-two")); + assertThat(updatedIndex.versions()).hasSize(3); + assertThat(updatedIndex.version(2)) + .isNotNull() + .extracting("properties") + .isEqualTo(ImmutableMap.of("key1", "value1", "source1", "update-one")); + + } else { + assertThatThrownBy(() -> indexOps.commit(current, secondUpdate)) + .isInstanceOf(CommitFailedException.class) + .hasMessageContaining("Cannot commit"); + + Index updatedIndex = catalog().loadIndex(indexIdentifier); + Map properties = updatedIndex.currentVersion().properties(); + assertThat(properties).isEqualTo(ImmutableMap.of("key1", "value1", "source1", "update-one")); + assertThat(updatedIndex.versions()).hasSize(2); + } + + assertThat(catalog().dropIndex(indexIdentifier)).isTrue(); + assertThat(catalog().indexExists(indexIdentifier)).as("Index should not exist").isFalse(); + } + + @Test + public void registerIndex() { + TableIdentifier tableIdentifier = TableIdentifier.of("ns", "table"); + IndexIdentifier sourceIndexIdentifier = IndexIdentifier.of(tableIdentifier, "source_index"); + IndexIdentifier registeredIndexIdentifier = + IndexIdentifier.of(tableIdentifier, "registered_index"); + + if (requiresNamespaceCreate()) { + catalog().createNamespace(tableIdentifier.namespace()); + } + + Table table = tableCatalog().buildTable(tableIdentifier, SCHEMA).create(); + assertThat(tableCatalog().tableExists(tableIdentifier)).as("Table should exist").isTrue(); + + // Create a source index to get its metadata file location + Index sourceIndex = + catalog() + .buildIndex(sourceIndexIdentifier) + .withTableUuid(table.uuid()) + .withType(IndexType.BTREE) + .withIndexColumnIds(3) + .withOptimizedColumnIds(3) + .withProperty("prop1", "val1") + .create(); + + assertThat(catalog().indexExists(sourceIndexIdentifier)) + .as("Source index should exist") + .isTrue(); + + // Get the metadata file location from the source index + String metadataFileLocation = + ((BaseIndex) sourceIndex).operations().current().metadataFileLocation(); + assertThat(metadataFileLocation).isNotNull(); + + // Drop the source index before registering it with a new identifier + assertThat(catalog().dropIndex(sourceIndexIdentifier)).isTrue(); + assertThat(catalog().indexExists(sourceIndexIdentifier)) + .as("Source index should not exist") + .isFalse(); + + // Register a new index using the metadata file location + Index registeredIndex = + catalog().registerIndex(registeredIndexIdentifier, metadataFileLocation); + + assertThat(registeredIndex).isNotNull(); + assertThat(catalog().indexExists(registeredIndexIdentifier)) + .as("Registered index should exist") + .isTrue(); + + // Validate the registered index has the same properties as the source + assertThat(registeredIndex.type()).isEqualTo(sourceIndex.type()); + assertThat(registeredIndex.indexColumnIds()).isEqualTo(sourceIndex.indexColumnIds()); + assertThat(registeredIndex.optimizedColumnIds()).isEqualTo(sourceIndex.optimizedColumnIds()); + + // Validate the registered index has the same UUID as the source (registerIndex preserves + // metadata) + assertThat(registeredIndex.uuid()).isEqualTo(sourceIndex.uuid()); + + // Clean up + assertThat(catalog().dropIndex(registeredIndexIdentifier)).isTrue(); + } + + @Test + public void registerIndexThatAlreadyExists() { + TableIdentifier tableIdentifier = TableIdentifier.of("ns", "table"); + IndexIdentifier indexIdentifier = IndexIdentifier.of(tableIdentifier, "existing_index"); + + if (requiresNamespaceCreate()) { + catalog().createNamespace(tableIdentifier.namespace()); + } + + Table table = tableCatalog().buildTable(tableIdentifier, SCHEMA).create(); + assertThat(tableCatalog().tableExists(tableIdentifier)).as("Table should exist").isTrue(); + + // Create the index first + Index existingIndex = + catalog() + .buildIndex(indexIdentifier) + .withTableUuid(table.uuid()) + .withType(IndexType.BTREE) + .withIndexColumnIds(3) + .withOptimizedColumnIds(3) + .create(); + + assertThat(catalog().indexExists(indexIdentifier)).as("Index should exist").isTrue(); + + String metadataFileLocation = + ((BaseIndex) existingIndex).operations().current().metadataFileLocation(); + + // Trying to register an index with the same identifier should fail + assertThatThrownBy(() -> catalog().registerIndex(indexIdentifier, metadataFileLocation)) + .isInstanceOf(AlreadyExistsException.class) + .hasMessageContaining("Index already exists"); + + // Clean up + assertThat(catalog().dropIndex(indexIdentifier)).isTrue(); + } + + @Test + public void registerIndexOnNonExistingTable() { + TableIdentifier tableIdentifier = TableIdentifier.of("ns", "non_existing_table"); + TableIdentifier existingTableIdentifier = TableIdentifier.of("ns", "existing_table"); + IndexIdentifier sourceIndexIdentifier = + IndexIdentifier.of(existingTableIdentifier, "source_index"); + IndexIdentifier indexIdentifier = IndexIdentifier.of(tableIdentifier, "index"); + + if (requiresNamespaceCreate()) { + catalog().createNamespace(tableIdentifier.namespace()); + } + + // Create a table and index to get a valid metadata file location + tableCatalog().buildTable(existingTableIdentifier, SCHEMA).create(); + Index sourceIndex = + catalog() + .buildIndex(sourceIndexIdentifier) + .withTableUuid(UUID.randomUUID()) + .withType(IndexType.BTREE) + .withIndexColumnIds(3) + .withOptimizedColumnIds(3) + .create(); + + String metadataFileLocation = + ((BaseIndex) sourceIndex).operations().current().metadataFileLocation(); + + assertThat(tableCatalog().tableExists(tableIdentifier)).as("Table should not exist").isFalse(); + + // Trying to register an index on a non-existing table should fail + assertThatThrownBy(() -> catalog().registerIndex(indexIdentifier, metadataFileLocation)) + .isInstanceOf(NoSuchTableException.class) + .hasMessageContaining("Table does not exist"); + + // Clean up + assertThat(catalog().dropIndex(sourceIndexIdentifier)).isTrue(); + } + + @Test + public void registerIndexWithInvalidMetadataLocation() { + TableIdentifier tableIdentifier = TableIdentifier.of("ns", "table"); + IndexIdentifier indexIdentifier = IndexIdentifier.of(tableIdentifier, "index"); + + if (requiresNamespaceCreate()) { + catalog().createNamespace(tableIdentifier.namespace()); + } + + tableCatalog().buildTable(tableIdentifier, SCHEMA).create(); + assertThat(tableCatalog().tableExists(tableIdentifier)).as("Table should exist").isTrue(); + + // Trying to register with null metadata location should fail + assertThatThrownBy(() -> catalog().registerIndex(indexIdentifier, null)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageContaining("Cannot register an empty metadata file location"); + + // Trying to register with empty metadata location should fail + assertThatThrownBy(() -> catalog().registerIndex(indexIdentifier, "")) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageContaining("Cannot register an empty metadata file location"); + } + + @Test + public void concurrentIndexUpdates() { + assumeThat(supportsServerSideRetry()).isTrue(); + TableIdentifier tableIdentifier = TableIdentifier.of("ns", "table"); + IndexIdentifier indexIdentifier = + IndexIdentifier.of(tableIdentifier, "concurrent_snapshot_index"); + + if (requiresNamespaceCreate()) { + catalog().createNamespace(tableIdentifier.namespace()); + } + + Table table = tableCatalog().buildTable(tableIdentifier, SCHEMA).create(); + assertThat(tableCatalog().tableExists(tableIdentifier)).as("Table should exist").isTrue(); + + assertThat(catalog().indexExists(indexIdentifier)).as("Index should not exist").isFalse(); + + Index index = + catalog() + .buildIndex(indexIdentifier) + .withTableUuid(table.uuid()) + .withType(IndexType.BTREE) + .withIndexColumnIds(3) + .withOptimizedColumnIds(3) + .withIndexSnapshotId(0L) + .withTableSnapshotId(0L) + .create(); + + assertThat(catalog().indexExists(indexIdentifier)).as("Index should exist").isTrue(); + assertThat(index.snapshots()).hasSize(1); + + AddIndexSnapshot addSnapshotOne = + index + .addIndexSnapshot() + .withTableSnapshotId(100L) + .withIndexSnapshotId(1L) + .withSnapshotProperty("source", "snapshot-one"); + + AddIndexVersion updatePropertiesOne = index.addVersion().withProperty("prop1", "value1"); + + RemoveIndexSnapshots removeSnapshot = index.removeIndexSnapshots().removeSnapshotById(0L); + + AddIndexSnapshot addSnapshotTwo = + index + .addIndexSnapshot() + .withTableSnapshotId(200L) + .withIndexSnapshotId(2L) + .withSnapshotProperty("source", "snapshot-two"); + + AddIndexVersion updatePropertiesTwo = index.addVersion().withProperty("prop2", "value2"); + + // simulate a concurrent add of the index snapshot + IndexOperations indexOps = ((BaseIndex) index).operations(); + IndexMetadata current = indexOps.current(); + + IndexMetadata firstAdd = ((IndexSnapshotAdd) addSnapshotOne).internalApply(); + IndexMetadata firstUpdate = ((IndexVersionAdd) updatePropertiesOne).internalApply(); + IndexMetadata firstRemove = ((IndexSnapshotsRemove) removeSnapshot).internalApply(); + IndexMetadata secondAdd = ((IndexSnapshotAdd) addSnapshotTwo).internalApply(); + IndexMetadata secondUpdate = ((IndexVersionAdd) updatePropertiesTwo).internalApply(); + + indexOps.commit(current, firstAdd); + + Index updatedIndex = catalog().loadIndex(indexIdentifier); + assertThat(updatedIndex.snapshots()).hasSize(2); + + indexOps.commit(current, firstUpdate); + + updatedIndex = catalog().loadIndex(indexIdentifier); + assertThat(updatedIndex.versions()).hasSize(2); + + indexOps.commit(current, firstRemove); + + updatedIndex = catalog().loadIndex(indexIdentifier); + assertThat(updatedIndex.snapshots()).hasSize(1); + + indexOps.commit(current, secondAdd); + + updatedIndex = catalog().loadIndex(indexIdentifier); + assertThat(updatedIndex.snapshots()).hasSize(2); + + indexOps.commit(current, secondUpdate); + + updatedIndex = catalog().loadIndex(indexIdentifier); + assertThat(updatedIndex.versions()).hasSize(3); + + IndexSnapshot snapshot1 = updatedIndex.snapshot(1L); + assertThat(snapshot1).isNotNull(); + assertThat(snapshot1.tableSnapshotId()).isEqualTo(100L); + assertThat(snapshot1.properties()).containsEntry("source", "snapshot-one"); + assertThat(snapshot1.versionId()).isEqualTo(1L); + + IndexSnapshot snapshot2 = updatedIndex.snapshot(2L); + assertThat(snapshot2).isNotNull(); + assertThat(snapshot2.tableSnapshotId()).isEqualTo(200L); + assertThat(snapshot2.properties()).containsEntry("source", "snapshot-two"); + assertThat(snapshot1.versionId()).isEqualTo(1L); + + IndexVersion version1 = updatedIndex.version(1); + assertThat(version1).isNotNull(); + assertThat(version1.properties()).isEmpty(); + + IndexVersion version2 = updatedIndex.version(2); + assertThat(version2).isNotNull(); + assertThat(version2.properties()).containsEntry("prop1", "value1"); + + IndexVersion version3 = updatedIndex.version(3); + assertThat(version3).isNotNull(); + assertThat(version3.properties()).containsEntry("prop2", "value2"); + + assertThat(catalog().dropIndex(indexIdentifier)).isTrue(); + assertThat(catalog().indexExists(indexIdentifier)).as("Index should not exist").isFalse(); + } +} diff --git a/core/src/test/java/org/apache/iceberg/index/TestIndexHistoryEntryParser.java b/core/src/test/java/org/apache/iceberg/index/TestIndexHistoryEntryParser.java new file mode 100644 index 000000000000..661b06bb7367 --- /dev/null +++ b/core/src/test/java/org/apache/iceberg/index/TestIndexHistoryEntryParser.java @@ -0,0 +1,91 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.index; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; + +import com.fasterxml.jackson.databind.JsonNode; +import org.junit.jupiter.api.Test; + +public class TestIndexHistoryEntryParser { + private static final String INDEX_HISTORY_ENTRY_JSON = + """ + { + "timestamp-ms": 1622547800000, + "version-id": 3 + } + """ + .replaceAll("\\s+", ""); + + private static final IndexHistoryEntry INDEX_HISTORY_ENTRY = + ImmutableIndexHistoryEntry.builder().versionId(3).timestampMillis(1622547800000L).build(); + + @Test + public void testIndexHistoryEntryFromJson() { + assertThat(IndexHistoryEntryParser.fromJson(INDEX_HISTORY_ENTRY_JSON)) + .as("Should be able to deserialize valid index history entry") + .isEqualTo(INDEX_HISTORY_ENTRY); + } + + @Test + public void testIndexHistoryEntryToJson() { + assertThat(IndexHistoryEntryParser.toJson(INDEX_HISTORY_ENTRY)) + .as("Should be able to serialize index history entry") + .isEqualTo(INDEX_HISTORY_ENTRY_JSON); + } + + @Test + public void testNullIndexHistoryEntry() { + assertThatThrownBy(() -> IndexHistoryEntryParser.fromJson((JsonNode) null)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Cannot parse index history entry from null object"); + + assertThatThrownBy(() -> IndexHistoryEntryParser.toJson(null)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Invalid index history entry: null"); + } + + @Test + public void testIndexHistoryEntryMissingFields() { + assertThatThrownBy(() -> IndexHistoryEntryParser.fromJson("{}")) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Cannot parse missing int: version-id"); + + assertThatThrownBy(() -> IndexHistoryEntryParser.fromJson("{\"timestamp-ms\":123}")) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Cannot parse missing int: version-id"); + + assertThatThrownBy(() -> IndexHistoryEntryParser.fromJson("{\"version-id\":1}")) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Cannot parse missing long: timestamp-ms"); + } + + @Test + public void testRoundTrip() { + IndexHistoryEntry original = + ImmutableIndexHistoryEntry.builder().versionId(42).timestampMillis(1234567890L).build(); + + String json = IndexHistoryEntryParser.toJson(original); + IndexHistoryEntry parsed = IndexHistoryEntryParser.fromJson(json); + + assertThat(parsed.versionId()).isEqualTo(original.versionId()); + assertThat(parsed.timestampMillis()).isEqualTo(original.timestampMillis()); + } +} diff --git a/core/src/test/java/org/apache/iceberg/index/TestIndexMetadata.java b/core/src/test/java/org/apache/iceberg/index/TestIndexMetadata.java new file mode 100644 index 000000000000..cbc6a79853ca --- /dev/null +++ b/core/src/test/java/org/apache/iceberg/index/TestIndexMetadata.java @@ -0,0 +1,849 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.index; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; + +import java.util.List; +import java.util.Map; +import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList; +import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; +import org.apache.iceberg.relocated.com.google.common.collect.ImmutableSet; +import org.assertj.core.api.InstanceOfAssertFactories; +import org.junit.jupiter.api.Test; + +public class TestIndexMetadata { + + private static final String TABLE_UUID = "test-table-uuid"; + + private IndexVersion newIndexVersion(int id) { + return newIndexVersion(id, System.currentTimeMillis()); + } + + private IndexVersion newIndexVersion(int id, long timestampMillis) { + return ImmutableIndexVersion.builder().versionId(id).timestampMillis(timestampMillis).build(); + } + + private IndexVersion newIndexVersion( + int id, long timestampMillis, Map properties) { + return ImmutableIndexVersion.builder() + .versionId(id) + .timestampMillis(timestampMillis) + .properties(properties) + .build(); + } + + @Test + public void testExpiration() { + // purposely use versions and timestamps that do not match to check that version ID is used + IndexVersion v1 = newIndexVersion(1); + IndexVersion v3 = newIndexVersion(3); + IndexVersion v2 = newIndexVersion(2); + Map versionsById = ImmutableMap.of(1, v1, 2, v2, 3, v3); + + assertThat(IndexMetadata.Builder.expireVersions(versionsById, 3, v1)) + .containsExactlyInAnyOrder(v1, v2, v3); + assertThat(IndexMetadata.Builder.expireVersions(versionsById, 2, v1)) + .containsExactlyInAnyOrder(v1, v3); + assertThat(IndexMetadata.Builder.expireVersions(versionsById, 1, v1)).containsExactly(v1); + } + + @Test + public void testUpdateHistory() { + IndexVersion v1 = newIndexVersion(1); + IndexVersion v2 = newIndexVersion(2); + IndexVersion v3 = newIndexVersion(3); + + IndexHistoryEntry one = + ImmutableIndexHistoryEntry.builder() + .versionId(v1.versionId()) + .timestampMillis(v1.timestampMillis()) + .build(); + IndexHistoryEntry two = + ImmutableIndexHistoryEntry.builder() + .versionId(v2.versionId()) + .timestampMillis(v2.timestampMillis()) + .build(); + IndexHistoryEntry three = + ImmutableIndexHistoryEntry.builder() + .versionId(v3.versionId()) + .timestampMillis(v3.timestampMillis()) + .build(); + + assertThat( + IndexMetadata.Builder.updateHistory( + ImmutableList.of(one, two, three), ImmutableSet.of(1, 2, 3))) + .containsExactly(one, two, three); + + // one was an invalid entry in the history, so all previous elements are removed + assertThat( + IndexMetadata.Builder.updateHistory( + ImmutableList.of(three, two, one, two, three), ImmutableSet.of(2, 3))) + .containsExactly(two, three); + + // two was an invalid entry in the history, so all previous elements are removed + assertThat( + IndexMetadata.Builder.updateHistory( + ImmutableList.of(one, two, three, one, three), ImmutableSet.of(1, 3))) + .containsExactly(three, one, three); + } + + @Test + public void nullAndMissingFields() { + assertThatThrownBy(() -> IndexMetadata.builder().build()) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Invalid location: null"); + + assertThatThrownBy(() -> IndexMetadata.builder().setLocation("location").build()) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Invalid table uuid: null"); + + assertThatThrownBy( + () -> IndexMetadata.builder().setTableUuid(TABLE_UUID).setLocation("location").build()) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Invalid index: no versions were added"); + } + + @Test + public void unsupportedFormatVersion() { + IndexVersion version = newIndexVersion(1); + assertThatThrownBy( + () -> + IndexMetadata.builder() + .upgradeFormatVersion(23) + .setTableUuid(TABLE_UUID) + .setLocation("location") + .setType(IndexType.BTREE) + .setIndexColumnIds(ImmutableList.of(1)) + .setOptimizedColumnIds(ImmutableList.of(1)) + .addVersion(version) + .setCurrentVersion(version.versionId()) + .build()) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Unsupported format version: 23"); + + assertThatThrownBy( + () -> + IndexMetadata.builder() + .upgradeFormatVersion(0) + .setTableUuid(TABLE_UUID) + .setLocation("location") + .setType(IndexType.BTREE) + .setIndexColumnIds(ImmutableList.of(1)) + .setOptimizedColumnIds(ImmutableList.of(1)) + .addVersion(version) + .setCurrentVersion(version.versionId()) + .build()) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Cannot downgrade v1 index to v0"); + } + + @Test + public void emptyIndexVersion() { + assertThatThrownBy( + () -> + IndexMetadata.builder() + .setTableUuid(TABLE_UUID) + .setLocation("location") + .setType(IndexType.BTREE) + .setIndexColumnIds(ImmutableList.of(1)) + .setOptimizedColumnIds(ImmutableList.of(1)) + .build()) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Invalid index: no versions were added"); + } + + @Test + public void invalidVersionHistorySizeToKeep() { + IndexVersion version = + newIndexVersion(1, 1000L, ImmutableMap.of(IndexProperties.VERSION_HISTORY_SIZE, "0")); + assertThatThrownBy( + () -> + IndexMetadata.builder() + .upgradeFormatVersion(1) + .setTableUuid(TABLE_UUID) + .setLocation("location") + .setType(IndexType.BTREE) + .setIndexColumnIds(ImmutableList.of(1)) + .setOptimizedColumnIds(ImmutableList.of(1)) + .addVersion(version) + .setCurrentVersion(version.versionId()) + .build()) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("version.history.num-entries must be positive but was 0"); + } + + @Test + public void indexVersionHistoryNormalization() { + // Each version must have different properties to avoid deduplication + IndexVersion indexVersionOne = + newIndexVersion( + 1, 1000L, ImmutableMap.of(IndexProperties.VERSION_HISTORY_SIZE, "2", "v", "1")); + IndexVersion indexVersionTwo = + newIndexVersion( + 2, 2000L, ImmutableMap.of(IndexProperties.VERSION_HISTORY_SIZE, "2", "v", "2")); + IndexVersion indexVersionThree = + newIndexVersion( + 3, 3000L, ImmutableMap.of(IndexProperties.VERSION_HISTORY_SIZE, "2", "v", "3")); + + IndexMetadata originalIndexMetadata = + IndexMetadata.builder() + .upgradeFormatVersion(1) + .setTableUuid(TABLE_UUID) + .setLocation("location") + .setType(IndexType.BTREE) + .setIndexColumnIds(ImmutableList.of(1)) + .setOptimizedColumnIds(ImmutableList.of(1)) + .addVersion(indexVersionOne) + .setCurrentVersion(indexVersionOne.versionId()) + .addVersion(indexVersionTwo) + .setCurrentVersion(indexVersionTwo.versionId()) + .addVersion(indexVersionThree) + .setCurrentVersion(indexVersionThree.versionId()) + .build(); + + // the first build will not expire versions that were added in the builder + assertThat(originalIndexMetadata.versions()).hasSize(3); + assertThat(originalIndexMetadata.history()).hasSize(1); + + // rebuild the metadata to expire older versions + IndexMetadata indexMetadata = IndexMetadata.buildFrom(originalIndexMetadata).build(); + assertThat(indexMetadata.versions()).hasSize(2); + assertThat(indexMetadata.history()).hasSize(1); + + // make sure that metadata changes reflect the current state after the history was adjusted + List changes = originalIndexMetadata.changes(); + assertThat(changes).hasSize(7); + assertThat(changes) + .element(0) + .isInstanceOf(IndexUpdate.SetLocation.class) + .asInstanceOf(InstanceOfAssertFactories.type(IndexUpdate.SetLocation.class)) + .extracting(IndexUpdate.SetLocation::location) + .isEqualTo("location"); + + assertThat(changes) + .element(1) + .isInstanceOf(IndexUpdate.AddVersion.class) + .asInstanceOf(InstanceOfAssertFactories.type(IndexUpdate.AddVersion.class)) + .extracting(IndexUpdate.AddVersion::indexVersion) + .isEqualTo(indexVersionOne); + + assertThat(changes) + .element(2) + .isInstanceOf(IndexUpdate.SetCurrentVersion.class) + .asInstanceOf(InstanceOfAssertFactories.type(IndexUpdate.SetCurrentVersion.class)) + .extracting(IndexUpdate.SetCurrentVersion::versionId) + .isEqualTo(indexVersionOne.versionId()); + + assertThat(changes) + .element(3) + .isInstanceOf(IndexUpdate.AddVersion.class) + .asInstanceOf(InstanceOfAssertFactories.type(IndexUpdate.AddVersion.class)) + .extracting(IndexUpdate.AddVersion::indexVersion) + .isEqualTo(indexVersionTwo); + + assertThat(changes) + .element(4) + .isInstanceOf(IndexUpdate.SetCurrentVersion.class) + .asInstanceOf(InstanceOfAssertFactories.type(IndexUpdate.SetCurrentVersion.class)) + .extracting(IndexUpdate.SetCurrentVersion::versionId) + .isEqualTo(indexVersionTwo.versionId()); + + assertThat(changes) + .element(5) + .isInstanceOf(IndexUpdate.AddVersion.class) + .asInstanceOf(InstanceOfAssertFactories.type(IndexUpdate.AddVersion.class)) + .extracting(IndexUpdate.AddVersion::indexVersion) + .isEqualTo(indexVersionThree); + + assertThat(changes) + .element(6) + .isInstanceOf(IndexUpdate.SetCurrentVersion.class) + .asInstanceOf(InstanceOfAssertFactories.type(IndexUpdate.SetCurrentVersion.class)) + .extracting(IndexUpdate.SetCurrentVersion::versionId) + .isEqualTo(indexVersionThree.versionId()); + } + + @Test + public void indexVersionHistoryIsCorrectlyRetained() { + // Each version must have different properties to avoid deduplication + IndexVersion indexVersionOne = + newIndexVersion( + 1, 1000L, ImmutableMap.of(IndexProperties.VERSION_HISTORY_SIZE, "2", "v", "1")); + IndexVersion indexVersionTwo = + newIndexVersion( + 2, 2000L, ImmutableMap.of(IndexProperties.VERSION_HISTORY_SIZE, "2", "v", "2")); + IndexVersion indexVersionThree = + newIndexVersion( + 3, 3000L, ImmutableMap.of(IndexProperties.VERSION_HISTORY_SIZE, "2", "v", "3")); + + IndexMetadata originalIndexMetadata = + IndexMetadata.builder() + .upgradeFormatVersion(1) + .setTableUuid(TABLE_UUID) + .setLocation("location") + .setType(IndexType.BTREE) + .setIndexColumnIds(ImmutableList.of(1)) + .setOptimizedColumnIds(ImmutableList.of(1)) + .addVersion(indexVersionOne) + .setCurrentVersion(indexVersionOne.versionId()) + .build(); + originalIndexMetadata = + IndexMetadata.buildFrom(originalIndexMetadata) + .addVersion(indexVersionTwo) + .setCurrentVersion(indexVersionTwo.versionId()) + .build(); + originalIndexMetadata = + IndexMetadata.buildFrom(originalIndexMetadata) + .addVersion(indexVersionThree) + .setCurrentVersion(indexVersionThree.versionId()) + .build(); + + assertThat(originalIndexMetadata.versions()) + .hasSize(2) + .containsExactlyInAnyOrder(indexVersionTwo, indexVersionThree); + assertThat(originalIndexMetadata.history()) + .hasSize(2) + .last() + .extracting(IndexHistoryEntry::versionId) + .isEqualTo(3); + + // rebuild the metadata to expire older versions + IndexMetadata indexMetadata = IndexMetadata.buildFrom(originalIndexMetadata).build(); + assertThat(indexMetadata.versions()) + .hasSize(2) + // there is no requirement about the order of versions + .containsExactlyInAnyOrder(indexVersionThree, indexVersionTwo); + assertThat(indexMetadata.history()) + .hasSize(2) + .last() + .extracting(IndexHistoryEntry::versionId) + .isEqualTo(3); + + IndexMetadata updated = + IndexMetadata.buildFrom(indexMetadata) + .setCurrentVersion(indexVersionTwo.versionId()) + .build(); + assertThat(updated.versions()) + .hasSize(2) + .containsExactlyInAnyOrder(indexVersionTwo, indexVersionThree); + assertThat(updated.history()) + .hasSize(3) + .element(1) + .extracting(IndexHistoryEntry::versionId) + .isEqualTo(3); + assertThat(updated.history()).last().extracting(IndexHistoryEntry::versionId).isEqualTo(2); + + IndexMetadata index = + IndexMetadata.buildFrom(updated).setCurrentVersion(indexVersionThree.versionId()).build(); + assertThat(index.versions()) + .hasSize(2) + .containsExactlyInAnyOrder(indexVersionTwo, indexVersionThree); + assertThat(index.history()) + .hasSize(4) + .element(1) + .extracting(IndexHistoryEntry::versionId) + .isEqualTo(3); + assertThat(index.history()).element(2).extracting(IndexHistoryEntry::versionId).isEqualTo(2); + assertThat(index.history()).last().extracting(IndexHistoryEntry::versionId).isEqualTo(3); + } + + @Test + public void versionHistoryEntryMaintainCorrectTimeline() { + // Each version must have different properties to avoid deduplication + IndexVersion indexVersionOne = newIndexVersion(1, 1000, ImmutableMap.of("v", "1")); + IndexVersion indexVersionTwo = newIndexVersion(2, 2000, ImmutableMap.of("v", "2")); + IndexVersion indexVersionThree = newIndexVersion(3, 3000, ImmutableMap.of("v", "3")); + + IndexMetadata indexMetadata = + IndexMetadata.builder() + .upgradeFormatVersion(1) + .setTableUuid(TABLE_UUID) + .setLocation("location") + .setType(IndexType.BTREE) + .setIndexColumnIds(ImmutableList.of(1)) + .setOptimizedColumnIds(ImmutableList.of(1)) + .addVersion(indexVersionOne) + .setCurrentVersion(indexVersionOne.versionId()) + .build(); + + indexMetadata = + IndexMetadata.buildFrom(indexMetadata) + .addVersion(indexVersionTwo) + .setCurrentVersion(indexVersionTwo.versionId()) + .build(); + + // setting an existing index version as the new current should update the timestamp in the + // history + IndexMetadata updated = + IndexMetadata.buildFrom(indexMetadata) + .setCurrentVersion(indexVersionOne.versionId()) + .build(); + + List history = updated.history(); + assertThat(history) + .hasSize(3) + .element(0) + .isEqualTo(ImmutableIndexHistoryEntry.builder().versionId(1).timestampMillis(1000).build()); + assertThat(history) + .element(1) + .isEqualTo(ImmutableIndexHistoryEntry.builder().versionId(2).timestampMillis(2000).build()); + assertThat(history) + .element(2) + .satisfies( + v -> { + assertThat(v.versionId()).isEqualTo(1); + assertThat(v.timestampMillis()) + .isGreaterThan(3000) + .isLessThanOrEqualTo(System.currentTimeMillis()); + }); + + // adding a new index version and setting it as current should use the index version's timestamp + // in the history (which has been set to a fixed value for testing) + updated = + IndexMetadata.buildFrom(updated) + .addVersion(indexVersionThree) + .setCurrentVersion(indexVersionThree.versionId()) + .build(); + List historyTwo = updated.history(); + assertThat(historyTwo) + .hasSize(4) + .last() + .isEqualTo(ImmutableIndexHistoryEntry.builder().versionId(3).timestampMillis(3000).build()); + + // setting an older index version as the new current (aka doing a rollback) should update the + // timestamp in the history + IndexMetadata reactiveOldIndexVersion = + IndexMetadata.buildFrom(updated).setCurrentVersion(indexVersionOne.versionId()).build(); + List historyThree = reactiveOldIndexVersion.history(); + assertThat(historyThree) + .hasSize(5) + .last() + .satisfies( + v -> { + assertThat(v.versionId()).isEqualTo(1); + assertThat(v.timestampMillis()) + .isGreaterThan(3000) + .isLessThanOrEqualTo(System.currentTimeMillis()); + }); + } + + @Test + public void indexMetadataAndMetadataChanges() { + // Each version must have different properties to avoid deduplication + IndexVersion indexVersionOne = newIndexVersion(1, 1000L, ImmutableMap.of("key1", "prop1")); + IndexVersion indexVersionTwo = newIndexVersion(2, 2000L, ImmutableMap.of("key2", "prop2")); + IndexVersion indexVersionThree = newIndexVersion(3, 3000L, ImmutableMap.of("key3", "prop3")); + + IndexMetadata indexMetadata = + IndexMetadata.builder() + .upgradeFormatVersion(1) + .setTableUuid(TABLE_UUID) + .setLocation("custom-location") + .setType(IndexType.BTREE) + .setIndexColumnIds(ImmutableList.of(1, 2)) + .setOptimizedColumnIds(ImmutableList.of(1)) + .addVersion(indexVersionOne) + .setCurrentVersion(indexVersionOne.versionId()) + .addVersion(indexVersionTwo) + .setCurrentVersion(indexVersionTwo.versionId()) + .addVersion(indexVersionThree) + .setCurrentVersion(indexVersionThree.versionId()) + .build(); + + assertThat(indexMetadata.versions()) + .hasSize(3) + .containsExactly(indexVersionOne, indexVersionTwo, indexVersionThree); + assertThat(indexMetadata.history()).hasSize(1); + assertThat(indexMetadata.currentVersionId()).isEqualTo(3); + assertThat(indexMetadata.currentVersion()).isEqualTo(indexVersionThree); + assertThat(indexMetadata.formatVersion()).isEqualTo(IndexMetadata.DEFAULT_INDEX_FORMAT_VERSION); + assertThat(indexMetadata.type()).isEqualTo(IndexType.BTREE); + assertThat(indexMetadata.indexColumnIds()).isEqualTo(ImmutableList.of(1, 2)); + assertThat(indexMetadata.optimizedColumnIds()).isEqualTo(ImmutableList.of(1)); + assertThat(indexMetadata.location()).isEqualTo("custom-location"); + + List changes = indexMetadata.changes(); + assertThat(changes).hasSize(7); + + assertThat(changes) + .element(0) + .isInstanceOf(IndexUpdate.SetLocation.class) + .asInstanceOf(InstanceOfAssertFactories.type(IndexUpdate.SetLocation.class)) + .extracting(IndexUpdate.SetLocation::location) + .isEqualTo("custom-location"); + + assertThat(changes) + .element(1) + .isInstanceOf(IndexUpdate.AddVersion.class) + .asInstanceOf(InstanceOfAssertFactories.type(IndexUpdate.AddVersion.class)) + .extracting(IndexUpdate.AddVersion::indexVersion) + .isEqualTo(indexVersionOne); + + assertThat(changes) + .element(2) + .isInstanceOf(IndexUpdate.SetCurrentVersion.class) + .asInstanceOf(InstanceOfAssertFactories.type(IndexUpdate.SetCurrentVersion.class)) + .extracting(IndexUpdate.SetCurrentVersion::versionId) + .isEqualTo(indexVersionOne.versionId()); + + assertThat(changes) + .element(3) + .isInstanceOf(IndexUpdate.AddVersion.class) + .asInstanceOf(InstanceOfAssertFactories.type(IndexUpdate.AddVersion.class)) + .extracting(IndexUpdate.AddVersion::indexVersion) + .isEqualTo(indexVersionTwo); + + assertThat(changes) + .element(4) + .isInstanceOf(IndexUpdate.SetCurrentVersion.class) + .asInstanceOf(InstanceOfAssertFactories.type(IndexUpdate.SetCurrentVersion.class)) + .extracting(IndexUpdate.SetCurrentVersion::versionId) + .isEqualTo(indexVersionTwo.versionId()); + + assertThat(changes) + .element(5) + .isInstanceOf(IndexUpdate.AddVersion.class) + .asInstanceOf(InstanceOfAssertFactories.type(IndexUpdate.AddVersion.class)) + .extracting(IndexUpdate.AddVersion::indexVersion) + .isEqualTo(indexVersionThree); + + assertThat(changes) + .element(6) + .isInstanceOf(IndexUpdate.SetCurrentVersion.class) + .asInstanceOf(InstanceOfAssertFactories.type(IndexUpdate.SetCurrentVersion.class)) + .extracting(IndexUpdate.SetCurrentVersion::versionId) + .isEqualTo(indexVersionThree.versionId()); + } + + @Test + public void indexVersionIDReassignment() { + // all index versions have the same ID but different properties so they won't be deduplicated + IndexVersion indexVersionOne = newIndexVersion(1000, 1000L, ImmutableMap.of("v", "1")); + IndexVersion indexVersionTwo = newIndexVersion(1001, 2000L, ImmutableMap.of("v", "2")); + IndexVersion indexVersionThree = newIndexVersion(1002, 3000L, ImmutableMap.of("v", "3")); + + IndexMetadata indexMetadata = + IndexMetadata.builder() + .upgradeFormatVersion(1) + .setTableUuid(TABLE_UUID) + .setLocation("custom-location") + .setType(IndexType.BTREE) + .setIndexColumnIds(ImmutableList.of(1)) + .setOptimizedColumnIds(ImmutableList.of(1)) + .addVersion(indexVersionOne) + .setCurrentVersion(indexVersionOne.versionId()) + .addVersion(indexVersionTwo) + .setCurrentVersion(indexVersionTwo.versionId()) + .addVersion(indexVersionThree) + .setCurrentVersion(indexVersionThree.versionId()) + .build(); + + assertThat(indexMetadata.currentVersion()) + .isEqualTo(ImmutableIndexVersion.builder().from(indexVersionThree).versionId(3).build()); + + // IDs of the index versions should be re-assigned + assertThat(indexMetadata.versions()) + .hasSize(3) + .containsExactly( + ImmutableIndexVersion.builder().from(indexVersionOne).versionId(1).build(), + ImmutableIndexVersion.builder().from(indexVersionTwo).versionId(2).build(), + ImmutableIndexVersion.builder().from(indexVersionThree).versionId(3).build()); + } + + @Test + public void indexVersionDeduplication() { + // all index versions have the same ID + // additionally, there are duplicate index versions that only differ in their creation timestamp + IndexVersion indexVersionOne = newIndexVersion(1, 1000L); + IndexVersion indexVersionTwo = newIndexVersion(2, 2000L, ImmutableMap.of("key", "value")); + IndexVersion indexVersionThree = newIndexVersion(3, 3000L, ImmutableMap.of("key2", "value2")); + IndexVersion indexVersionOneUpdated = + ImmutableIndexVersion.builder() + .from(indexVersionOne) + .versionId(4) + .timestampMillis(4000) + .build(); + IndexVersion indexVersionTwoUpdated = + ImmutableIndexVersion.builder() + .from(indexVersionTwo) + .versionId(5) + .timestampMillis(5000) + .build(); + IndexVersion indexVersionThreeUpdated = + ImmutableIndexVersion.builder() + .from(indexVersionThree) + .versionId(6) + .timestampMillis(6000) + .build(); + + IndexMetadata indexMetadata = + IndexMetadata.builder() + .upgradeFormatVersion(1) + .setTableUuid(TABLE_UUID) + .setLocation("custom-location") + .setType(IndexType.BTREE) + .setIndexColumnIds(ImmutableList.of(1)) + .setOptimizedColumnIds(ImmutableList.of(1)) + .addVersion(indexVersionOne) + .setCurrentVersion(indexVersionOne.versionId()) + .addVersion(indexVersionTwo) + .setCurrentVersion(indexVersionTwo.versionId()) + .addVersion(indexVersionThree) + .setCurrentVersion(indexVersionThree.versionId()) + .addVersion(indexVersionOneUpdated) + .setCurrentVersion(indexVersionOneUpdated.versionId()) + .addVersion(indexVersionTwoUpdated) + .setCurrentVersion(indexVersionTwoUpdated.versionId()) + .addVersion(indexVersionThreeUpdated) + .setCurrentVersion(indexVersionThreeUpdated.versionId()) + .build(); + + assertThat(indexMetadata.currentVersion()) + .isEqualTo(ImmutableIndexVersion.builder().from(indexVersionThree).versionId(3).build()); + + // IDs of the index versions should be re-assigned and index versions should be de-duplicated + assertThat(indexMetadata.versions()) + .hasSize(3) + .containsExactly( + indexVersionOne, + ImmutableIndexVersion.builder().from(indexVersionTwo).versionId(2).build(), + ImmutableIndexVersion.builder().from(indexVersionThree).versionId(3).build()); + } + + @Test + public void snapshotsById() { + IndexVersion version = newIndexVersion(1, 1000L); + IndexSnapshot snapshot1 = + ImmutableIndexSnapshot.builder() + .tableSnapshotId(100L) + .indexSnapshotId(200L) + .versionId(1) + .build(); + IndexSnapshot snapshot2 = + ImmutableIndexSnapshot.builder() + .tableSnapshotId(101L) + .indexSnapshotId(201L) + .versionId(1) + .build(); + + IndexMetadata indexMetadata = + IndexMetadata.builder() + .upgradeFormatVersion(1) + .setTableUuid(TABLE_UUID) + .setLocation("custom-location") + .setType(IndexType.BTREE) + .setIndexColumnIds(ImmutableList.of(1)) + .setOptimizedColumnIds(ImmutableList.of(1)) + .addVersion(version) + .setCurrentVersion(version.versionId()) + .addSnapshot(snapshot1) + .addSnapshot(snapshot2) + .build(); + + assertThat(indexMetadata.snapshots()).hasSize(2).containsExactly(snapshot1, snapshot2); + assertThat(indexMetadata.snapshot(200L)).isEqualTo(snapshot1); + assertThat(indexMetadata.snapshot(201L)).isEqualTo(snapshot2); + assertThat(indexMetadata.snapshotForTableSnapshot(100L)).isEqualTo(snapshot1); + assertThat(indexMetadata.snapshotForTableSnapshot(101L)).isEqualTo(snapshot2); + } + + @Test + public void addSnapshotWithUnknownVersionId() { + IndexVersion version = newIndexVersion(1, 1000L); + IndexSnapshot snapshot = + ImmutableIndexSnapshot.builder() + .tableSnapshotId(100L) + .indexSnapshotId(200L) + .versionId(999) // unknown version id + .build(); + + assertThatThrownBy( + () -> + IndexMetadata.builder() + .upgradeFormatVersion(1) + .setTableUuid(TABLE_UUID) + .setLocation("custom-location") + .setType(IndexType.BTREE) + .setIndexColumnIds(ImmutableList.of(1)) + .setOptimizedColumnIds(ImmutableList.of(1)) + .addVersion(version) + .setCurrentVersion(version.versionId()) + .addSnapshot(snapshot) + .build()) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Invalid index version id. Cannot add snapshot with unknown version id: 999"); + } + + @Test + public void addDuplicateSnapshotForTableSnapshot() { + IndexVersion version = newIndexVersion(1, 1000L); + IndexSnapshot snapshot1 = + ImmutableIndexSnapshot.builder() + .tableSnapshotId(100L) + .indexSnapshotId(200L) + .versionId(1) + .build(); + IndexSnapshot snapshot2 = + ImmutableIndexSnapshot.builder() + .tableSnapshotId(100L) // same table snapshot id + .indexSnapshotId(201L) + .versionId(1) + .build(); + + assertThatThrownBy( + () -> + IndexMetadata.builder() + .upgradeFormatVersion(1) + .setTableUuid(TABLE_UUID) + .setLocation("custom-location") + .setType(IndexType.BTREE) + .setIndexColumnIds(ImmutableList.of(1)) + .setOptimizedColumnIds(ImmutableList.of(1)) + .addVersion(version) + .setCurrentVersion(version.versionId()) + .addSnapshot(snapshot1) + .addSnapshot(snapshot2) + .build()) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage( + "Invalid table snapshot id. Snapshot for table snapshot 100 already added to the index."); + } + + @Test + public void removeSnapshots() { + IndexVersion version = newIndexVersion(1, 1000L); + IndexSnapshot snapshot1 = + ImmutableIndexSnapshot.builder() + .tableSnapshotId(100L) + .indexSnapshotId(200L) + .versionId(1) + .build(); + IndexSnapshot snapshot2 = + ImmutableIndexSnapshot.builder() + .tableSnapshotId(101L) + .indexSnapshotId(201L) + .versionId(1) + .build(); + + IndexMetadata indexMetadata = + IndexMetadata.builder() + .upgradeFormatVersion(1) + .setTableUuid(TABLE_UUID) + .setLocation("custom-location") + .setType(IndexType.BTREE) + .setIndexColumnIds(ImmutableList.of(1)) + .setOptimizedColumnIds(ImmutableList.of(1)) + .addVersion(version) + .setCurrentVersion(version.versionId()) + .addSnapshot(snapshot1) + .addSnapshot(snapshot2) + .build(); + + assertThat(indexMetadata.snapshots()).hasSize(2); + + IndexMetadata updated = + IndexMetadata.buildFrom(indexMetadata).removeSnapshots(ImmutableSet.of(200L)).build(); + assertThat(updated.snapshots()).hasSize(1).containsExactly(snapshot2); + } + + @Test + public void removeNonExistentSnapshots() { + IndexVersion version = newIndexVersion(1, 1000L); + IndexSnapshot snapshot = + ImmutableIndexSnapshot.builder() + .tableSnapshotId(100L) + .indexSnapshotId(200L) + .versionId(1) + .build(); + + IndexMetadata indexMetadata = + IndexMetadata.builder() + .upgradeFormatVersion(1) + .setTableUuid(TABLE_UUID) + .setLocation("custom-location") + .setType(IndexType.BTREE) + .setIndexColumnIds(ImmutableList.of(1)) + .setOptimizedColumnIds(ImmutableList.of(1)) + .addVersion(version) + .setCurrentVersion(version.versionId()) + .addSnapshot(snapshot) + .build(); + + indexMetadata = + IndexMetadata.buildFrom(indexMetadata).removeSnapshots(ImmutableSet.of(999L)).build(); + + assertThat(indexMetadata.snapshots()).hasSize(1).containsExactly(snapshot); + } + + @Test + public void indexTypeIsRequired() { + IndexVersion version = newIndexVersion(1); + assertThatThrownBy( + () -> + IndexMetadata.builder() + .upgradeFormatVersion(1) + .setTableUuid(TABLE_UUID) + .setLocation("location") + .setIndexColumnIds(ImmutableList.of(1)) + .setOptimizedColumnIds(ImmutableList.of(1)) + .addVersion(version) + .setCurrentVersion(version.versionId()) + .build()) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Invalid index type: null"); + } + + @Test + public void indexColumnIdsAreRequired() { + IndexVersion version = newIndexVersion(1); + assertThatThrownBy( + () -> + IndexMetadata.builder() + .upgradeFormatVersion(1) + .setTableUuid(TABLE_UUID) + .setLocation("location") + .setType(IndexType.BTREE) + .setOptimizedColumnIds(ImmutableList.of(1)) + .addVersion(version) + .setCurrentVersion(version.versionId()) + .build()) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Index column IDs cannot be empty"); + } + + @Test + public void optimizedColumnIdsAreRequired() { + IndexVersion version = newIndexVersion(1); + assertThatThrownBy( + () -> + IndexMetadata.builder() + .upgradeFormatVersion(1) + .setTableUuid(TABLE_UUID) + .setLocation("location") + .setType(IndexType.BTREE) + .setIndexColumnIds(ImmutableList.of(1)) + .addVersion(version) + .setCurrentVersion(version.versionId()) + .build()) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Optimized column IDs cannot be empty"); + } +} diff --git a/core/src/test/java/org/apache/iceberg/index/TestIndexMetadataParser.java b/core/src/test/java/org/apache/iceberg/index/TestIndexMetadataParser.java new file mode 100644 index 000000000000..7b0026037067 --- /dev/null +++ b/core/src/test/java/org/apache/iceberg/index/TestIndexMetadataParser.java @@ -0,0 +1,486 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.index; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; + +import com.fasterxml.jackson.databind.JsonNode; +import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList; +import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.EnumSource; + +public class TestIndexMetadataParser { + + @Test + public void nullAndEmptyCheck() { + assertThatThrownBy(() -> IndexMetadataParser.fromJson((String) null)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Cannot parse index metadata from null string"); + + assertThatThrownBy(() -> IndexMetadataParser.fromJson((JsonNode) null)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Cannot parse index metadata from null object"); + + assertThatThrownBy(() -> IndexMetadataParser.toJson(null)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Invalid index metadata: null"); + } + + @Test + public void testReadAndWriteValidIndexMetadata() { + IndexVersion version1 = + ImmutableIndexVersion.builder() + .versionId(1) + .timestampMillis(4353L) + .properties(ImmutableMap.of("user-key", "user-value")) + .build(); + + IndexHistoryEntry historyEntry = + ImmutableIndexHistoryEntry.builder().versionId(1).timestampMillis(4353L).build(); + + IndexSnapshot snapshot = + ImmutableIndexSnapshot.builder() + .tableSnapshotId(100L) + .indexSnapshotId(200L) + .versionId(1) + .properties(ImmutableMap.of("snapshot-key", "snapshot-value")) + .build(); + + IndexMetadata expectedMetadata = + ImmutableIndexMetadata.of( + "fa6506c3-7681-40c8-86dc-e36561f83385", + "a1b2c3d4-e5f6-7890-abcd-ef1234567890", + 1, + IndexType.BTREE, + ImmutableList.of(1, 2), + ImmutableList.of(1), + "s3://bucket/test/location", + 1, + ImmutableList.of(version1), + ImmutableList.of(historyEntry), + ImmutableList.of(snapshot), + ImmutableList.of(), + null); + + String json = IndexMetadataParser.toJson(expectedMetadata); + IndexMetadata actual = IndexMetadataParser.fromJson(json); + + assertThat(actual.uuid()).isEqualTo(expectedMetadata.uuid()); + assertThat(actual.tableUuid()).isEqualTo(expectedMetadata.tableUuid()); + assertThat(actual.formatVersion()).isEqualTo(expectedMetadata.formatVersion()); + assertThat(actual.type()).isEqualTo(expectedMetadata.type()); + assertThat(actual.indexColumnIds()).isEqualTo(expectedMetadata.indexColumnIds()); + assertThat(actual.optimizedColumnIds()).isEqualTo(expectedMetadata.optimizedColumnIds()); + assertThat(actual.location()).isEqualTo(expectedMetadata.location()); + assertThat(actual.currentVersionId()).isEqualTo(expectedMetadata.currentVersionId()); + assertThat(actual.versions()).hasSize(1); + assertThat(actual.history()).hasSize(1); + assertThat(actual.snapshots()).hasSize(1); + } + + @Test + public void testFailReadingIndexMetadataMissingUuid() { + String json = + """ + { + "format-version": 1, + "table-uuid": "table-uuid", + "index-type": "btree", + "index-column-ids": [1], + "optimized-column-ids": [1], + "location": "s3://bucket/test", + "current-version-id": 1, + "versions": [], + "version-log": [], + "snapshots": [] + } + """; + + assertThatThrownBy(() -> IndexMetadataParser.fromJson(json)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Cannot parse missing string: index-uuid"); + } + + @Test + public void testFailReadingIndexMetadataMissingTableUuid() { + String json = + """ + { + "format-version": 1, + "index-uuid": "uuid", + "index-type": "btree", + "index-column-ids": [1], + "optimized-column-ids": [1], + "location": "s3://bucket/test", + "current-version-id": 1, + "versions": [], + "version-log": [], + "snapshots": [] + } + """; + + assertThatThrownBy(() -> IndexMetadataParser.fromJson(json)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Cannot parse missing string: table-uuid"); + } + + @Test + public void testFailReadingIndexMetadataMissingFormatVersion() { + String json = + """ + { + "index-uuid": "uuid", + "table-uuid": "table-uuid", + "index-type": "btree", + "index-column-ids": [1], + "optimized-column-ids": [1], + "location": "s3://bucket/test", + "current-version-id": 1, + "versions": [], + "version-log": [], + "snapshots": [] + } + """; + + assertThatThrownBy(() -> IndexMetadataParser.fromJson(json)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Cannot parse missing int: format-version"); + } + + @Test + public void testFailReadingIndexMetadataMissingIndexType() { + String json = + """ + { + "index-uuid": "uuid", + "table-uuid": "table-uuid", + "format-version": 1, + "index-column-ids": [1], + "optimized-column-ids": [1], + "location": "s3://bucket/test", + "current-version-id": 1, + "versions": [], + "version-log": [], + "snapshots": [] + } + """; + + assertThatThrownBy(() -> IndexMetadataParser.fromJson(json)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Cannot parse missing string: index-type"); + } + + @Test + public void testFailReadingIndexMetadataMissingLocation() { + String json = + """ + { + "index-uuid": "uuid", + "table-uuid": "table-uuid", + "format-version": 1, + "index-type": "btree", + "index-column-ids": [1], + "optimized-column-ids": [1], + "current-version-id": 1, + "versions": [], + "version-log": [], + "snapshots": [] + } + """; + + assertThatThrownBy(() -> IndexMetadataParser.fromJson(json)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Cannot parse missing string: location"); + } + + @Test + public void testFailReadingIndexMetadataInvalidIndexType() { + String json = + """ + { + "index-uuid": "uuid", + "table-uuid": "table-uuid", + "format-version": 1, + "index-type": "invalid-type", + "index-column-ids": [1], + "optimized-column-ids": [1], + "location": "s3://bucket/test", + "current-version-id": 1, + "versions": [], + "version-log": [], + "snapshots": [] + } + """; + + assertThatThrownBy(() -> IndexMetadataParser.fromJson(json)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Unknown index type: invalid-type"); + } + + @ParameterizedTest + @EnumSource(IndexType.class) + public void testRoundTripWithAllIndexTypes(IndexType indexType) { + IndexVersion version = + ImmutableIndexVersion.builder().versionId(1).timestampMillis(12345L).build(); + + IndexMetadata metadata = + ImmutableIndexMetadata.of( + "test-uuid", + "test-table-uuid", + 1, + indexType, + ImmutableList.of(1), + ImmutableList.of(1), + "s3://bucket/test", + 1, + ImmutableList.of(version), + ImmutableList.of(), + ImmutableList.of(), + ImmutableList.of(), + null); + + String json = IndexMetadataParser.toJson(metadata); + IndexMetadata parsed = IndexMetadataParser.fromJson(json); + + assertThat(parsed.type()).isEqualTo(indexType); + } + + @Test + public void testRoundTripWithMultipleVersions() { + IndexVersion version1 = + ImmutableIndexVersion.builder() + .versionId(1) + .timestampMillis(1000L) + .properties(ImmutableMap.of("v", "1")) + .build(); + + IndexVersion version2 = + ImmutableIndexVersion.builder() + .versionId(2) + .timestampMillis(2000L) + .properties(ImmutableMap.of("v", "2")) + .build(); + + IndexHistoryEntry entry1 = + ImmutableIndexHistoryEntry.builder().versionId(1).timestampMillis(1000L).build(); + + IndexHistoryEntry entry2 = + ImmutableIndexHistoryEntry.builder().versionId(2).timestampMillis(2000L).build(); + + IndexSnapshot snapshot1 = + ImmutableIndexSnapshot.builder() + .tableSnapshotId(10L) + .indexSnapshotId(100L) + .versionId(1) + .build(); + + IndexSnapshot snapshot2 = + ImmutableIndexSnapshot.builder() + .tableSnapshotId(20L) + .indexSnapshotId(200L) + .versionId(2) + .build(); + + IndexMetadata metadata = + ImmutableIndexMetadata.of( + "multi-version-uuid", + "test-table-uuid", + 1, + IndexType.TERM, + ImmutableList.of(1, 2, 3), + ImmutableList.of(1, 2), + "s3://bucket/test/multi", + 2, + ImmutableList.of(version1, version2), + ImmutableList.of(entry1, entry2), + ImmutableList.of(snapshot1, snapshot2), + ImmutableList.of(), + null); + + String json = IndexMetadataParser.toJson(metadata); + IndexMetadata parsed = IndexMetadataParser.fromJson(json); + + assertThat(parsed.versions()).hasSize(2); + assertThat(parsed.history()).hasSize(2); + assertThat(parsed.snapshots()).hasSize(2); + assertThat(parsed.currentVersionId()).isEqualTo(2); + assertThat(parsed.indexColumnIds()).containsExactly(1, 2, 3); + assertThat(parsed.optimizedColumnIds()).containsExactly(1, 2); + } + + @Test + public void testPrettyPrint() { + IndexVersion version = + ImmutableIndexVersion.builder().versionId(1).timestampMillis(12345L).build(); + + IndexMetadata metadata = + ImmutableIndexMetadata.of( + "pretty-uuid", + "test-table-uuid", + 1, + IndexType.IVF, + ImmutableList.of(1), + ImmutableList.of(1), + "s3://bucket/test", + 1, + ImmutableList.of(version), + ImmutableList.of(), + ImmutableList.of(), + ImmutableList.of(), + null); + + String prettyJson = IndexMetadataParser.toJson(metadata, true); + String compactJson = IndexMetadataParser.toJson(metadata, false); + + // Pretty JSON should contain newlines + assertThat(prettyJson).contains("\n"); + // Compact JSON should not contain newlines + assertThat(compactJson).doesNotContain("\n"); + + // Both should parse to equivalent metadata + assertThat(IndexMetadataParser.fromJson(prettyJson).uuid()) + .isEqualTo(IndexMetadataParser.fromJson(compactJson).uuid()); + } + + @Test + public void testCurrentVersionAccess() { + IndexVersion version1 = + ImmutableIndexVersion.builder().versionId(1).timestampMillis(1000L).build(); + + IndexVersion version2 = + ImmutableIndexVersion.builder().versionId(2).timestampMillis(2000L).build(); + + IndexMetadata metadata = + ImmutableIndexMetadata.of( + "test-uuid", + "test-table-uuid", + 1, + IndexType.BTREE, + ImmutableList.of(1), + ImmutableList.of(1), + "s3://bucket/test", + 2, + ImmutableList.of(version1, version2), + ImmutableList.of(), + ImmutableList.of(), + ImmutableList.of(), + null); + + assertThat(metadata.currentVersion().versionId()).isEqualTo(2); + assertThat(metadata.version(1).timestampMillis()).isEqualTo(1000L); + assertThat(metadata.version(2).timestampMillis()).isEqualTo(2000L); + } + + @Test + public void testToJsonWithExpectedString() { + IndexVersion version = + ImmutableIndexVersion.builder() + .versionId(1) + .timestampMillis(1234567890L) + .properties(ImmutableMap.of("key", "value")) + .build(); + + IndexHistoryEntry historyEntry = + ImmutableIndexHistoryEntry.builder().versionId(1).timestampMillis(1234567890L).build(); + + IndexSnapshot snapshot = + ImmutableIndexSnapshot.builder() + .tableSnapshotId(100L) + .indexSnapshotId(200L) + .versionId(1) + .properties(ImmutableMap.of("snap-key", "snap-value")) + .build(); + + IndexMetadata metadata = + ImmutableIndexMetadata.of( + "fa6506c3-7681-40c8-86dc-e36561f83385", + "a1b2c3d4-e5f6-7890-abcd-ef1234567890", + 1, + IndexType.BTREE, + ImmutableList.of(1, 2), + ImmutableList.of(1), + "s3://bucket/test/location", + 1, + ImmutableList.of(version), + ImmutableList.of(historyEntry), + ImmutableList.of(snapshot), + ImmutableList.of(), + null); + + String expectedJson = + """ + { + "index-uuid": "fa6506c3-7681-40c8-86dc-e36561f83385", + "table-uuid": "a1b2c3d4-e5f6-7890-abcd-ef1234567890", + "format-version": 1, + "index-type": "btree", + "index-column-ids": [1, 2], + "optimized-column-ids": [1], + "location": "s3://bucket/test/location", + "current-version-id": 1, + "versions": [ + { + "version-id": 1, + "timestamp-ms": 1234567890, + "properties": { + "key": "value" + } + } + ], + "version-log": [ + { + "timestamp-ms": 1234567890, + "version-id": 1 + } + ], + "snapshots": [ + { + "table-snapshot-id": 100, + "index-snapshot-id": 200, + "version-id": 1, + "properties": { + "snap-key": "snap-value" + } + } + ] + } + """ + .replaceAll("\\s+", ""); + + String actualJson = IndexMetadataParser.toJson(metadata); + assertThat(actualJson).isEqualTo(expectedJson); + + // Also verify round-trip + IndexMetadata parsed = IndexMetadataParser.fromJson(actualJson); + assertThat(parsed.uuid()).isEqualTo("fa6506c3-7681-40c8-86dc-e36561f83385"); + assertThat(parsed.tableUuid()).isEqualTo("a1b2c3d4-e5f6-7890-abcd-ef1234567890"); + assertThat(parsed.formatVersion()).isEqualTo(1); + assertThat(parsed.type()).isEqualTo(IndexType.BTREE); + assertThat(parsed.indexColumnIds()).containsExactly(1, 2); + assertThat(parsed.optimizedColumnIds()).containsExactly(1); + assertThat(parsed.location()).isEqualTo("s3://bucket/test/location"); + assertThat(parsed.currentVersionId()).isEqualTo(1); + assertThat(parsed.versions()).hasSize(1); + assertThat(parsed.history()).hasSize(1); + assertThat(parsed.snapshots()).hasSize(1); + } +} diff --git a/core/src/test/java/org/apache/iceberg/index/TestIndexSnapshotParser.java b/core/src/test/java/org/apache/iceberg/index/TestIndexSnapshotParser.java new file mode 100644 index 000000000000..f50a28611745 --- /dev/null +++ b/core/src/test/java/org/apache/iceberg/index/TestIndexSnapshotParser.java @@ -0,0 +1,165 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.index; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; + +import com.fasterxml.jackson.databind.JsonNode; +import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; +import org.junit.jupiter.api.Test; + +public class TestIndexSnapshotParser { + private static final String INDEX_SNAPSHOT_JSON = + """ + { + "table-snapshot-id": 100, + "index-snapshot-id": 200, + "version-id": 1, + "properties": { + "user-key": "user-value" + } + } + """ + .replaceAll("\\s+", ""); + + private static final IndexSnapshot INDEX_SNAPSHOT = + ImmutableIndexSnapshot.builder() + .tableSnapshotId(100L) + .indexSnapshotId(200L) + .versionId(1) + .properties(ImmutableMap.of("user-key", "user-value")) + .build(); + + private static final String INDEX_SNAPSHOT_WITHOUT_PROPERTIES_JSON = + """ + { + "table-snapshot-id": 100, + "index-snapshot-id": 200, + "version-id": 1 + } + """ + .replaceAll("\\s+", ""); + + private static final IndexSnapshot INDEX_SNAPSHOT_WITHOUT_PROPERTIES = + ImmutableIndexSnapshot.builder() + .tableSnapshotId(100L) + .indexSnapshotId(200L) + .versionId(1) + .properties(ImmutableMap.of()) + .build(); + + @Test + public void testParseIndexSnapshot() { + assertThat(IndexSnapshotParser.fromJson(INDEX_SNAPSHOT_JSON)) + .as("Should be able to parse valid index snapshot") + .isEqualTo(INDEX_SNAPSHOT); + } + + @Test + public void testSerializeIndexSnapshot() { + assertThat(IndexSnapshotParser.toJson(INDEX_SNAPSHOT)) + .as("Should be able to serialize valid index snapshot") + .isEqualTo(INDEX_SNAPSHOT_JSON); + } + + @Test + public void testNullIndexSnapshot() { + assertThatThrownBy(() -> IndexSnapshotParser.toJson(null)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Invalid index snapshot: null"); + + assertThatThrownBy(() -> IndexSnapshotParser.fromJson((JsonNode) null)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Cannot parse index snapshot from null object"); + } + + @Test + public void testIndexSnapshotMissingFields() { + assertThatThrownBy(() -> IndexSnapshotParser.fromJson("{}")) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Cannot parse missing long: table-snapshot-id"); + + assertThatThrownBy(() -> IndexSnapshotParser.fromJson("{\"table-snapshot-id\":100}")) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Cannot parse missing long: index-snapshot-id"); + + assertThatThrownBy( + () -> + IndexSnapshotParser.fromJson( + """ + { + "table-snapshot-id": 100, + "index-snapshot-id": 200 + } + """)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Cannot parse missing int: version-id"); + } + + @Test + public void testIndexSnapshotWithoutProperties() { + IndexSnapshot snapshot = IndexSnapshotParser.fromJson(INDEX_SNAPSHOT_WITHOUT_PROPERTIES_JSON); + assertThat(snapshot).isEqualTo(INDEX_SNAPSHOT_WITHOUT_PROPERTIES); + } + + @Test + public void testIndexSnapshotWithEmptyProperties() { + // Empty properties should not be serialized + String json = IndexSnapshotParser.toJson(INDEX_SNAPSHOT_WITHOUT_PROPERTIES); + assertThat(json).isEqualTo(INDEX_SNAPSHOT_WITHOUT_PROPERTIES_JSON); + } + + @Test + public void testRoundTrip() { + IndexSnapshot original = + ImmutableIndexSnapshot.builder() + .tableSnapshotId(1000L) + .indexSnapshotId(2000L) + .versionId(5) + .properties(ImmutableMap.of("prop1", "val1", "prop2", "val2")) + .build(); + + String json = IndexSnapshotParser.toJson(original); + IndexSnapshot parsed = IndexSnapshotParser.fromJson(json); + + assertThat(parsed.tableSnapshotId()).isEqualTo(original.tableSnapshotId()); + assertThat(parsed.indexSnapshotId()).isEqualTo(original.indexSnapshotId()); + assertThat(parsed.versionId()).isEqualTo(original.versionId()); + assertThat(parsed.properties()).isEqualTo(original.properties()); + } + + @Test + public void testRoundTripWithNullProperties() { + IndexSnapshot original = + ImmutableIndexSnapshot.builder() + .tableSnapshotId(100L) + .indexSnapshotId(200L) + .versionId(1) + .build(); + + String json = IndexSnapshotParser.toJson(original); + IndexSnapshot parsed = IndexSnapshotParser.fromJson(json); + + assertThat(parsed.tableSnapshotId()).isEqualTo(original.tableSnapshotId()); + assertThat(parsed.indexSnapshotId()).isEqualTo(original.indexSnapshotId()); + assertThat(parsed.versionId()).isEqualTo(original.versionId()); + assertThat(parsed.properties()).isEmpty(); + } +} diff --git a/core/src/test/java/org/apache/iceberg/index/TestIndexType.java b/core/src/test/java/org/apache/iceberg/index/TestIndexType.java new file mode 100644 index 000000000000..fab25ccdf02f --- /dev/null +++ b/core/src/test/java/org/apache/iceberg/index/TestIndexType.java @@ -0,0 +1,85 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.index; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; + +import org.junit.jupiter.api.Test; + +public class TestIndexType { + + @Test + public void testFromStringBtree() { + assertThat(IndexType.fromString("btree")).isEqualTo(IndexType.BTREE); + assertThat(IndexType.fromString("BTREE")).isEqualTo(IndexType.BTREE); + assertThat(IndexType.fromString("Btree")).isEqualTo(IndexType.BTREE); + } + + @Test + public void testFromStringTerm() { + assertThat(IndexType.fromString("term")).isEqualTo(IndexType.TERM); + assertThat(IndexType.fromString("TERM")).isEqualTo(IndexType.TERM); + assertThat(IndexType.fromString("Term")).isEqualTo(IndexType.TERM); + } + + @Test + public void testFromStringIvf() { + assertThat(IndexType.fromString("ivf")).isEqualTo(IndexType.IVF); + assertThat(IndexType.fromString("IVF")).isEqualTo(IndexType.IVF); + assertThat(IndexType.fromString("Ivf")).isEqualTo(IndexType.IVF); + } + + @Test + public void testFromStringUnknownType() { + assertThatThrownBy(() -> IndexType.fromString("unknown")) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Unknown index type: unknown"); + + assertThatThrownBy(() -> IndexType.fromString("")) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageContaining("Unknown index type:"); + + assertThatThrownBy(() -> IndexType.fromString("hash")) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Unknown index type: hash"); + } + + @Test + public void testTypeName() { + assertThat(IndexType.BTREE.typeName()).isEqualTo("btree"); + assertThat(IndexType.TERM.typeName()).isEqualTo("term"); + assertThat(IndexType.IVF.typeName()).isEqualTo("ivf"); + } + + @Test + public void testToString() { + assertThat(IndexType.BTREE.toString()).isEqualTo("btree"); + assertThat(IndexType.TERM.toString()).isEqualTo("term"); + assertThat(IndexType.IVF.toString()).isEqualTo("ivf"); + } + + @Test + public void testAllTypesHaveConsistentNameAndToString() { + for (IndexType type : IndexType.values()) { + assertThat(type.typeName()).isEqualTo(type.toString()); + assertThat(IndexType.fromString(type.typeName())).isEqualTo(type); + } + } +} diff --git a/core/src/test/java/org/apache/iceberg/index/TestIndexUpdateParser.java b/core/src/test/java/org/apache/iceberg/index/TestIndexUpdateParser.java new file mode 100644 index 000000000000..437913e863a7 --- /dev/null +++ b/core/src/test/java/org/apache/iceberg/index/TestIndexUpdateParser.java @@ -0,0 +1,396 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.index; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; + +import java.util.Set; +import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; +import org.apache.iceberg.relocated.com.google.common.collect.ImmutableSet; +import org.junit.jupiter.api.Test; + +public class TestIndexUpdateParser { + + private static final IndexSnapshot INDEX_SNAPSHOT = + ImmutableIndexSnapshot.builder() + .tableSnapshotId(100L) + .indexSnapshotId(200L) + .versionId(1) + .properties(ImmutableMap.of("user-key", "user-value")) + .build(); + + private static final IndexVersion INDEX_VERSION = + ImmutableIndexVersion.builder() + .versionId(1) + .timestampMillis(12345L) + .properties(ImmutableMap.of("version-key", "version-value")) + .build(); + + @Test + public void testIndexUpdateWithoutActionCannotDeserialize() { + assertThatThrownBy(() -> IndexUpdateParser.fromJson("{\"action\":null}")) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Cannot parse index update. Missing field: action"); + + assertThatThrownBy(() -> IndexUpdateParser.fromJson("{\"version-id\":1}")) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Cannot parse index update. Missing field: action"); + } + + @Test + public void testIndexUpdateWithInvalidActionCannotDeserialize() { + assertThatThrownBy(() -> IndexUpdateParser.fromJson("{\"action\":\"invalid-action\"}")) + .isInstanceOf(UnsupportedOperationException.class) + .hasMessage("Cannot convert index update action from json: invalid-action"); + } + + /** AddIndexSnapshot */ + @Test + public void testAddIndexSnapshotFromJson() { + String action = IndexUpdateParser.ADD_SNAPSHOT; + String snapshotJson = IndexSnapshotParser.toJson(INDEX_SNAPSHOT); + String json = String.format("{\"action\":\"%s\",\"snapshot\":%s}", action, snapshotJson); + + IndexUpdate update = IndexUpdateParser.fromJson(json); + + assertThat(update).isInstanceOf(IndexUpdate.AddSnapshot.class); + IndexUpdate.AddSnapshot addSnapshot = (IndexUpdate.AddSnapshot) update; + assertThat(addSnapshot.indexSnapshot().tableSnapshotId()) + .isEqualTo(INDEX_SNAPSHOT.tableSnapshotId()); + assertThat(addSnapshot.indexSnapshot().indexSnapshotId()) + .isEqualTo(INDEX_SNAPSHOT.indexSnapshotId()); + assertThat(addSnapshot.indexSnapshot().versionId()).isEqualTo(INDEX_SNAPSHOT.versionId()); + assertThat(addSnapshot.indexSnapshot().properties()).isEqualTo(INDEX_SNAPSHOT.properties()); + } + + @Test + public void testAddIndexSnapshotToJson() { + String snapshotJson = IndexSnapshotParser.toJson(INDEX_SNAPSHOT); + String expected = + String.format( + "{\"action\":\"%s\",\"snapshot\":%s}", IndexUpdateParser.ADD_SNAPSHOT, snapshotJson); + + IndexUpdate update = new IndexUpdate.AddSnapshot(INDEX_SNAPSHOT); + String actual = IndexUpdateParser.toJson(update); + + assertThat(actual) + .as("AddIndexSnapshot should serialize to the correct JSON value") + .isEqualTo(expected); + } + + @Test + public void testAddIndexSnapshotRoundTrip() { + IndexUpdate original = new IndexUpdate.AddSnapshot(INDEX_SNAPSHOT); + String json = IndexUpdateParser.toJson(original); + IndexUpdate parsed = IndexUpdateParser.fromJson(json); + + assertThat(parsed).isInstanceOf(IndexUpdate.AddSnapshot.class); + IndexUpdate.AddSnapshot parsedUpdate = (IndexUpdate.AddSnapshot) parsed; + assertThat(parsedUpdate.indexSnapshot()).isEqualTo(INDEX_SNAPSHOT); + } + + /** RemoveIndexSnapshots */ + @Test + public void testRemoveIndexSnapshotsFromJson() { + String action = IndexUpdateParser.REMOVE_SNAPSHOTS; + Set snapshotIds = ImmutableSet.of(1L, 2L, 3L); + String json = String.format("{\"action\":\"%s\",\"snapshot-ids\":[1,2,3]}", action); + + IndexUpdate update = IndexUpdateParser.fromJson(json); + + assertThat(update).isInstanceOf(IndexUpdate.RemoveSnapshots.class); + IndexUpdate.RemoveSnapshots removeSnapshots = (IndexUpdate.RemoveSnapshots) update; + assertThat(removeSnapshots.indexSnapshotIds()).isEqualTo(snapshotIds); + } + + @Test + public void testRemoveIndexSnapshotsToJson() { + Set snapshotIds = ImmutableSet.of(1L, 2L, 3L); + IndexUpdate update = new IndexUpdate.RemoveSnapshots(snapshotIds); + String json = IndexUpdateParser.toJson(update); + + assertThat(json).contains("\"action\":\"remove-snapshots\""); + assertThat(json).contains("\"snapshot-ids\":"); + + // Verify round-trip + IndexUpdate parsed = IndexUpdateParser.fromJson(json); + assertThat(parsed).isInstanceOf(IndexUpdate.RemoveSnapshots.class); + assertThat(((IndexUpdate.RemoveSnapshots) parsed).indexSnapshotIds()).isEqualTo(snapshotIds); + } + + @Test + public void testRemoveIndexSnapshotsSingleId() { + IndexUpdate update = new IndexUpdate.RemoveSnapshots(42L); + String json = IndexUpdateParser.toJson(update); + IndexUpdate parsed = IndexUpdateParser.fromJson(json); + + assertThat(parsed).isInstanceOf(IndexUpdate.RemoveSnapshots.class); + assertThat(((IndexUpdate.RemoveSnapshots) parsed).indexSnapshotIds()).containsExactly(42L); + } + + /** AddIndexVersion */ + @Test + public void testAddIndexVersionFromJson() { + String action = IndexUpdateParser.ADD_VERSION; + String versionJson = IndexVersionParser.toJson(INDEX_VERSION); + String json = String.format("{\"action\":\"%s\",\"version\":%s}", action, versionJson); + + IndexUpdate update = IndexUpdateParser.fromJson(json); + + assertThat(update).isInstanceOf(IndexUpdate.AddVersion.class); + IndexUpdate.AddVersion addVersion = (IndexUpdate.AddVersion) update; + assertThat(addVersion.indexVersion().versionId()).isEqualTo(INDEX_VERSION.versionId()); + } + + @Test + public void testAddIndexVersionToJson() { + String versionJson = IndexVersionParser.toJson(INDEX_VERSION); + String expected = + String.format( + "{\"action\":\"%s\",\"version\":%s}", IndexUpdateParser.ADD_VERSION, versionJson); + + IndexUpdate update = new IndexUpdate.AddVersion(INDEX_VERSION); + String actual = IndexUpdateParser.toJson(update); + + assertThat(actual) + .as("AddIndexVersion should serialize to the correct JSON value") + .isEqualTo(expected); + } + + @Test + public void testAddIndexVersionRoundTrip() { + IndexUpdate original = new IndexUpdate.AddVersion(INDEX_VERSION); + String json = IndexUpdateParser.toJson(original); + IndexUpdate parsed = IndexUpdateParser.fromJson(json); + + assertThat(parsed).isInstanceOf(IndexUpdate.AddVersion.class); + assertThat(((IndexUpdate.AddVersion) parsed).indexVersion().versionId()) + .isEqualTo(INDEX_VERSION.versionId()); + } + + @Test + public void testAddIndexVersionWithoutProperties() { + IndexVersion versionWithoutProps = + ImmutableIndexVersion.builder() + .versionId(2) + .timestampMillis(67890L) + .properties(ImmutableMap.of()) + .build(); + + IndexUpdate original = new IndexUpdate.AddVersion(versionWithoutProps); + String json = IndexUpdateParser.toJson(original); + IndexUpdate parsed = IndexUpdateParser.fromJson(json); + + assertThat(parsed).isInstanceOf(IndexUpdate.AddVersion.class); + IndexUpdate.AddVersion parsedUpdate = (IndexUpdate.AddVersion) parsed; + assertThat(parsedUpdate.indexVersion().versionId()).isEqualTo(2); + assertThat(parsedUpdate.indexVersion().timestampMillis()).isEqualTo(67890L); + assertThat(parsedUpdate.indexVersion().properties()).isEmpty(); + } + + /** SetCurrentVersionId */ + @Test + public void testSetCurrentVersionIdFromJson() { + String action = IndexUpdateParser.SET_CURRENT_VERSION; + String json = String.format("{\"action\":\"%s\",\"version-id\":5}", action); + + IndexUpdate update = IndexUpdateParser.fromJson(json); + + assertThat(update).isInstanceOf(IndexUpdate.SetCurrentVersion.class); + IndexUpdate.SetCurrentVersion setCurrentVersionId = (IndexUpdate.SetCurrentVersion) update; + assertThat(setCurrentVersionId.versionId()).isEqualTo(5); + } + + @Test + public void testSetCurrentVersionIdToJson() { + String expected = + String.format( + "{\"action\":\"%s\",\"version-id\":5}", IndexUpdateParser.SET_CURRENT_VERSION); + + IndexUpdate update = new IndexUpdate.SetCurrentVersion(5); + String actual = IndexUpdateParser.toJson(update); + + assertThat(actual) + .as("SetCurrentVersionId should serialize to the correct JSON value") + .isEqualTo(expected); + } + + @Test + public void testSetCurrentVersionIdRoundTrip() { + IndexUpdate original = new IndexUpdate.SetCurrentVersion(7); + String json = IndexUpdateParser.toJson(original); + IndexUpdate parsed = IndexUpdateParser.fromJson(json); + + assertThat(parsed).isInstanceOf(IndexUpdate.SetCurrentVersion.class); + assertThat(((IndexUpdate.SetCurrentVersion) parsed).versionId()).isEqualTo(7); + } + + /** SetIndexLocation */ + @Test + public void testSetLocationFromJson() { + String action = IndexUpdateParser.SET_LOCATION; + String location = "s3://bucket/warehouse/index_location"; + String json = String.format("{\"action\":\"%s\",\"location\":\"%s\"}", action, location); + + IndexUpdate update = IndexUpdateParser.fromJson(json); + + assertThat(update).isInstanceOf(IndexUpdate.SetLocation.class); + IndexUpdate.SetLocation setLocation = (IndexUpdate.SetLocation) update; + assertThat(setLocation.location()).isEqualTo(location); + } + + @Test + public void testSetLocationToJson() { + String location = "s3://bucket/warehouse/index_location"; + String expected = + String.format( + "{\"action\":\"%s\",\"location\":\"%s\"}", IndexUpdateParser.SET_LOCATION, location); + + IndexUpdate update = new IndexUpdate.SetLocation(location); + String actual = IndexUpdateParser.toJson(update); + + assertThat(actual) + .as("SetIndexLocation should serialize to the correct JSON value") + .isEqualTo(expected); + } + + @Test + public void testSetLocationRoundTrip() { + String location = "hdfs://namenode/warehouse/index"; + IndexUpdate original = new IndexUpdate.SetLocation(location); + String json = IndexUpdateParser.toJson(original); + IndexUpdate parsed = IndexUpdateParser.fromJson(json); + + assertThat(parsed).isInstanceOf(IndexUpdate.SetLocation.class); + assertThat(((IndexUpdate.SetLocation) parsed).location()).isEqualTo(location); + } + + /** Error cases */ + @Test + public void testFromJsonWithNullNode() { + assertThatThrownBy(() -> IndexUpdateParser.fromJson((String) null)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageContaining("Cannot parse index update from null string"); + } + + @Test + public void testToJsonWithUnrecognizedUpdateType() { + // Create a custom IndexUpdate that's not in the ACTIONS map + IndexUpdate unknownUpdate = + new IndexUpdate() { + @Override + public void applyTo(IndexMetadata.Builder indexMetadataBuilder) { + // no-op + } + }; + + assertThatThrownBy(() -> IndexUpdateParser.toJson(unknownUpdate)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageContaining("Unrecognized index update type"); + } + + @Test + public void testAddIndexSnapshotMissingSnapshot() { + String json = "{\"action\":\"add-snapshot\"}"; + + assertThatThrownBy(() -> IndexUpdateParser.fromJson(json)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageContaining("Cannot parse missing field: snapshot"); + } + + @Test + public void testRemoveIndexSnapshotsMissingIds() { + String json = "{\"action\":\"remove-snapshots\"}"; + + assertThatThrownBy(() -> IndexUpdateParser.fromJson(json)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageContaining("must be non-null"); + } + + @Test + public void testSetIndexCurrentVersionMissingVersion() { + String json = "{\"action\":\"set-current-version\"}"; + + assertThatThrownBy(() -> IndexUpdateParser.fromJson(json)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageContaining("Cannot parse missing int: version-id"); + } + + @Test + public void testSetLocationMissingLocation() { + String json = "{\"action\":\"set-location\"}"; + + assertThatThrownBy(() -> IndexUpdateParser.fromJson(json)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageContaining("Cannot parse missing string: location"); + } + + /** UpgradeFormatVersion */ + @Test + public void testUpgradeFormatVersionFromJson() { + String action = IndexUpdateParser.UPGRADE_FORMAT_VERSION; + int formatVersion = 2; + String json = String.format("{\"action\":\"%s\",\"format-version\":%d}", action, formatVersion); + + IndexUpdate update = IndexUpdateParser.fromJson(json); + + assertThat(update).isInstanceOf(IndexUpdate.UpgradeFormatVersion.class); + IndexUpdate.UpgradeFormatVersion upgradeFormatVersion = + (IndexUpdate.UpgradeFormatVersion) update; + assertThat(upgradeFormatVersion.formatVersion()).isEqualTo(formatVersion); + } + + @Test + public void testUpgradeFormatVersionToJson() { + int formatVersion = 2; + String expected = + String.format( + "{\"action\":\"%s\",\"format-version\":%d}", + IndexUpdateParser.UPGRADE_FORMAT_VERSION, formatVersion); + + IndexUpdate update = new IndexUpdate.UpgradeFormatVersion(formatVersion); + String actual = IndexUpdateParser.toJson(update); + + assertThat(actual) + .as("UpgradeFormatVersion should serialize to the correct JSON value") + .isEqualTo(expected); + } + + @Test + public void testUpgradeFormatVersionRoundTrip() { + int formatVersion = 3; + IndexUpdate original = new IndexUpdate.UpgradeFormatVersion(formatVersion); + String json = IndexUpdateParser.toJson(original); + IndexUpdate parsed = IndexUpdateParser.fromJson(json); + + assertThat(parsed).isInstanceOf(IndexUpdate.UpgradeFormatVersion.class); + assertThat(((IndexUpdate.UpgradeFormatVersion) parsed).formatVersion()) + .isEqualTo(formatVersion); + } + + @Test + public void testUpgradeFormatVersionMissingFormatVersion() { + String json = "{\"action\":\"upgrade-format-version\"}"; + + assertThatThrownBy(() -> IndexUpdateParser.fromJson(json)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageContaining("Cannot parse missing int: format-version"); + } +} diff --git a/core/src/test/java/org/apache/iceberg/index/TestIndexVersionParser.java b/core/src/test/java/org/apache/iceberg/index/TestIndexVersionParser.java new file mode 100644 index 000000000000..16b2be492acb --- /dev/null +++ b/core/src/test/java/org/apache/iceberg/index/TestIndexVersionParser.java @@ -0,0 +1,141 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.index; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; + +import com.fasterxml.jackson.databind.JsonNode; +import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; +import org.junit.jupiter.api.Test; + +public class TestIndexVersionParser { + private static final String INDEX_VERSION_ENTRY_JSON = + """ + { + "version-id": 1, + "timestamp-ms": 12345, + "properties": { + "user-key": "user-value" + } + } + """ + .replaceAll("\\s+", ""); + + private static final IndexVersion INDEX_VERSION_ENTRY = + ImmutableIndexVersion.builder() + .versionId(1) + .timestampMillis(12345) + .properties(ImmutableMap.of("user-key", "user-value")) + .build(); + + private static final String INDEX_VERSION_WITHOUT_PROPERTIES_JSON = + """ + { + "version-id": 1, + "timestamp-ms": 12345 + } + """ + .replaceAll("\\s+", ""); + + private static final IndexVersion INDEX_VERSION_WITHOUT_PROPERTIES = + ImmutableIndexVersion.builder() + .versionId(1) + .timestampMillis(12345) + .properties(ImmutableMap.of()) + .build(); + + @Test + public void testParseIndexVersion() { + assertThat(IndexVersionParser.fromJson(INDEX_VERSION_ENTRY_JSON)) + .as("Should be able to parse valid index version") + .isEqualTo(INDEX_VERSION_ENTRY); + } + + @Test + public void testSerializeIndexVersion() { + assertThat(IndexVersionParser.toJson(INDEX_VERSION_ENTRY)) + .as("Should be able to serialize valid index version") + .isEqualTo(INDEX_VERSION_ENTRY_JSON); + } + + @Test + public void testNullIndexVersion() { + assertThatThrownBy(() -> IndexVersionParser.toJson(null)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Invalid index version: null"); + + assertThatThrownBy(() -> IndexVersionParser.fromJson((JsonNode) null)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Cannot parse index version from null object"); + } + + @Test + public void testIndexVersionMissingFields() { + assertThatThrownBy(() -> IndexVersionParser.fromJson("{}")) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Cannot parse missing int: version-id"); + + assertThatThrownBy(() -> IndexVersionParser.fromJson("{\"version-id\":1}")) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Cannot parse missing long: timestamp-ms"); + } + + @Test + public void testIndexVersionWithoutProperties() { + IndexVersion version = IndexVersionParser.fromJson(INDEX_VERSION_WITHOUT_PROPERTIES_JSON); + assertThat(version).isEqualTo(INDEX_VERSION_WITHOUT_PROPERTIES); + } + + @Test + public void testIndexVersionWithEmptyProperties() { + String json = IndexVersionParser.toJson(INDEX_VERSION_WITHOUT_PROPERTIES); + assertThat(json).isEqualTo(INDEX_VERSION_WITHOUT_PROPERTIES_JSON); + } + + @Test + public void testRoundTrip() { + IndexVersion original = + ImmutableIndexVersion.builder() + .versionId(42) + .timestampMillis(1234567890L) + .properties(ImmutableMap.of("key1", "value1", "key2", "value2")) + .build(); + + String json = IndexVersionParser.toJson(original); + IndexVersion parsed = IndexVersionParser.fromJson(json); + + assertThat(parsed.versionId()).isEqualTo(original.versionId()); + assertThat(parsed.timestampMillis()).isEqualTo(original.timestampMillis()); + assertThat(parsed.properties()).isEqualTo(original.properties()); + } + + @Test + public void testRoundTripWithNullProperties() { + IndexVersion original = + ImmutableIndexVersion.builder().versionId(1).timestampMillis(12345).build(); + + String json = IndexVersionParser.toJson(original); + IndexVersion parsed = IndexVersionParser.fromJson(json); + + assertThat(parsed.versionId()).isEqualTo(original.versionId()); + assertThat(parsed.timestampMillis()).isEqualTo(original.timestampMillis()); + assertThat(parsed.properties()).isEmpty(); + } +} diff --git a/core/src/test/java/org/apache/iceberg/inmemory/TestInMemoryIndexCatalog.java b/core/src/test/java/org/apache/iceberg/inmemory/TestInMemoryIndexCatalog.java new file mode 100644 index 000000000000..7e68280d83f5 --- /dev/null +++ b/core/src/test/java/org/apache/iceberg/inmemory/TestInMemoryIndexCatalog.java @@ -0,0 +1,62 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.inmemory; + +import java.util.Map; +import org.apache.iceberg.CatalogProperties; +import org.apache.iceberg.catalog.Catalog; +import org.apache.iceberg.index.IndexCatalogTests; +import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; +import org.junit.jupiter.api.BeforeEach; + +public class TestInMemoryIndexCatalog extends IndexCatalogTests { + private InMemoryCatalog catalog; + + @BeforeEach + public void before() { + this.catalog = initCatalog("in-memory-index-catalog", ImmutableMap.of()); + } + + @Override + protected InMemoryCatalog catalog() { + return catalog; + } + + @Override + protected Catalog tableCatalog() { + return catalog; + } + + protected InMemoryCatalog initCatalog( + String catalogName, Map additionalProperties) { + InMemoryCatalog cat = new InMemoryCatalog(); + Map properties = + ImmutableMap.builder() + .putAll(additionalProperties) + .put(CatalogProperties.WAREHOUSE_LOCATION, indexLocation("warehouse")) + .buildOrThrow(); + cat.initialize(catalogName, properties); + return cat; + } + + @Override + protected boolean requiresNamespaceCreate() { + return true; + } +} diff --git a/core/src/test/java/org/apache/iceberg/jdbc/TestJdbcIndexCatalog.java b/core/src/test/java/org/apache/iceberg/jdbc/TestJdbcIndexCatalog.java new file mode 100644 index 000000000000..d8efd810a59a --- /dev/null +++ b/core/src/test/java/org/apache/iceberg/jdbc/TestJdbcIndexCatalog.java @@ -0,0 +1,75 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.jdbc; + +import java.util.Map; +import java.util.UUID; +import org.apache.hadoop.conf.Configuration; +import org.apache.iceberg.CatalogProperties; +import org.apache.iceberg.CatalogUtil; +import org.apache.iceberg.catalog.Catalog; +import org.apache.iceberg.index.IndexCatalogTests; +import org.apache.iceberg.relocated.com.google.common.collect.Maps; +import org.junit.jupiter.api.BeforeEach; + +public class TestJdbcIndexCatalog extends IndexCatalogTests { + + private static final Configuration CONF = new Configuration(); + private JdbcCatalog catalog; + + @BeforeEach + public void before() { + this.catalog = initCatalog("jdbc-index-catalog", Maps.newHashMap()); + } + + @Override + protected JdbcCatalog catalog() { + return catalog; + } + + @Override + protected Catalog tableCatalog() { + return catalog; + } + + protected JdbcCatalog initCatalog(String catalogName, Map additionalProperties) { + Map properties = Maps.newHashMap(); + // Use in-memory SQLite database with unique ID to avoid conflicts between tests + properties.put( + CatalogProperties.URI, + "jdbc:sqlite:file::memory:?ic" + UUID.randomUUID().toString().replace("-", "")); + + properties.put(JdbcCatalog.PROPERTY_PREFIX + "username", "user"); + properties.put(JdbcCatalog.PROPERTY_PREFIX + "password", "password"); + properties.put(CatalogProperties.WAREHOUSE_LOCATION, indexLocation("warehouse")); + // Use V1 schema to ensure the iceberg_type column exists for table existence checks + properties.put(JdbcUtil.SCHEMA_VERSION_PROPERTY, "V1"); + properties.put("type", "jdbc"); + properties.putAll(additionalProperties); + + JdbcCatalog jdbcCatalog = + (JdbcCatalog) CatalogUtil.buildIcebergCatalog(catalogName, properties, CONF); + return jdbcCatalog; + } + + @Override + protected boolean requiresNamespaceCreate() { + return true; + } +} diff --git a/core/src/test/java/org/apache/iceberg/rest/RESTCatalogAdapter.java b/core/src/test/java/org/apache/iceberg/rest/RESTCatalogAdapter.java index 8ba5daef3f9b..a20286b2e0a1 100644 --- a/core/src/test/java/org/apache/iceberg/rest/RESTCatalogAdapter.java +++ b/core/src/test/java/org/apache/iceberg/rest/RESTCatalogAdapter.java @@ -37,6 +37,8 @@ import org.apache.iceberg.Transaction; import org.apache.iceberg.Transactions; import org.apache.iceberg.catalog.Catalog; +import org.apache.iceberg.catalog.IndexCatalog; +import org.apache.iceberg.catalog.IndexIdentifier; import org.apache.iceberg.catalog.Namespace; import org.apache.iceberg.catalog.SupportsNamespaces; import org.apache.iceberg.catalog.TableIdentifier; @@ -47,6 +49,7 @@ import org.apache.iceberg.exceptions.ForbiddenException; import org.apache.iceberg.exceptions.NamespaceNotEmptyException; import org.apache.iceberg.exceptions.NoSuchIcebergTableException; +import org.apache.iceberg.exceptions.NoSuchIndexException; import org.apache.iceberg.exceptions.NoSuchNamespaceException; import org.apache.iceberg.exceptions.NoSuchPlanIdException; import org.apache.iceberg.exceptions.NoSuchPlanTaskException; @@ -64,15 +67,18 @@ import org.apache.iceberg.rest.RESTCatalogProperties.SnapshotMode; import org.apache.iceberg.rest.auth.AuthSession; import org.apache.iceberg.rest.requests.CommitTransactionRequest; +import org.apache.iceberg.rest.requests.CreateIndexRequest; import org.apache.iceberg.rest.requests.CreateNamespaceRequest; import org.apache.iceberg.rest.requests.CreateTableRequest; import org.apache.iceberg.rest.requests.CreateViewRequest; import org.apache.iceberg.rest.requests.FetchScanTasksRequest; import org.apache.iceberg.rest.requests.PlanTableScanRequest; +import org.apache.iceberg.rest.requests.RegisterIndexRequest; import org.apache.iceberg.rest.requests.RegisterTableRequest; import org.apache.iceberg.rest.requests.RegisterViewRequest; import org.apache.iceberg.rest.requests.RenameTableRequest; import org.apache.iceberg.rest.requests.ReportMetricsRequest; +import org.apache.iceberg.rest.requests.UpdateIndexRequest; import org.apache.iceberg.rest.requests.UpdateNamespacePropertiesRequest; import org.apache.iceberg.rest.requests.UpdateTableRequest; import org.apache.iceberg.rest.responses.ConfigResponse; @@ -99,6 +105,7 @@ public class RESTCatalogAdapter extends BaseHTTPClient { .put(ForbiddenException.class, 403) .put(NoSuchNamespaceException.class, 404) .put(NoSuchTableException.class, 404) + .put(NoSuchIndexException.class, 404) .put(NotFoundException.class, 404) .put(NoSuchViewException.class, 404) .put(NoSuchIcebergTableException.class, 404) @@ -114,6 +121,7 @@ public class RESTCatalogAdapter extends BaseHTTPClient { private final Catalog catalog; private final SupportsNamespaces asNamespaceCatalog; private final ViewCatalog asViewCatalog; + private final IndexCatalog asIndexCatalog; private AuthSession authSession = AuthSession.EMPTY; private PlanningBehavior planningBehavior; @@ -123,6 +131,7 @@ public RESTCatalogAdapter(Catalog catalog) { this.asNamespaceCatalog = catalog instanceof SupportsNamespaces ? (SupportsNamespaces) catalog : null; this.asViewCatalog = catalog instanceof ViewCatalog ? (ViewCatalog) catalog : null; + this.asIndexCatalog = catalog instanceof IndexCatalog ? (IndexCatalog) catalog : null; } private static OAuthTokenResponse handleOAuthRequest(Object body) { @@ -531,6 +540,97 @@ public T handleRequest( break; } + case LIST_INDEXES: + { + if (null != asIndexCatalog) { + TableIdentifier tableIdent = tableIdentFromPathVars(vars); + String pageToken = PropertyUtil.propertyAsString(vars, "pageToken", null); + String pageSize = PropertyUtil.propertyAsString(vars, "pageSize", null); + if (pageSize != null) { + return castResponse( + responseType, + CatalogHandlers.listIndexes(asIndexCatalog, tableIdent, pageToken, pageSize)); + } else { + return castResponse( + responseType, CatalogHandlers.listIndexes(asIndexCatalog, tableIdent)); + } + } + break; + } + + case CREATE_INDEX: + { + if (null != asIndexCatalog) { + TableIdentifier tableIdent = tableIdentFromPathVars(vars); + CreateIndexRequest request = castRequest(CreateIndexRequest.class, body); + return CatalogHandlers.withIdempotency( + httpRequest, + () -> + castResponse( + responseType, + CatalogHandlers.createIndex(asIndexCatalog, tableIdent, request))); + } + break; + } + + case INDEX_EXISTS: + { + if (null != asIndexCatalog) { + CatalogHandlers.indexExists(asIndexCatalog, indexIdentFromPathVars(vars)); + return null; + } + break; + } + + case LOAD_INDEX: + { + if (null != asIndexCatalog) { + IndexIdentifier ident = indexIdentFromPathVars(vars); + return castResponse(responseType, CatalogHandlers.loadIndex(asIndexCatalog, ident)); + } + break; + } + + case UPDATE_INDEX: + { + if (null != asIndexCatalog) { + IndexIdentifier ident = indexIdentFromPathVars(vars); + UpdateIndexRequest request = castRequest(UpdateIndexRequest.class, body); + return CatalogHandlers.withIdempotency( + httpRequest, + () -> + castResponse( + responseType, CatalogHandlers.updateIndex(asIndexCatalog, ident, request))); + } + break; + } + + case DROP_INDEX: + { + if (null != asIndexCatalog) { + CatalogHandlers.withIdempotency( + httpRequest, + () -> CatalogHandlers.dropIndex(asIndexCatalog, indexIdentFromPathVars(vars))); + return null; + } + break; + } + + case REGISTER_INDEX: + { + if (null != asIndexCatalog) { + TableIdentifier tableIdent = tableIdentFromPathVars(vars); + RegisterIndexRequest request = castRequest(RegisterIndexRequest.class, body); + return CatalogHandlers.withIdempotency( + httpRequest, + () -> + castResponse( + responseType, + CatalogHandlers.registerIndex(asIndexCatalog, tableIdent, request))); + } + break; + } + default: if (responseType == OAuthTokenResponse.class) { return castResponse(responseType, handleOAuthRequest(body)); @@ -737,6 +837,11 @@ private static TableIdentifier viewIdentFromPathVars(Map pathVar namespaceFromPathVars(pathVars), RESTUtil.decodeString(pathVars.get("view"))); } + private static IndexIdentifier indexIdentFromPathVars(Map pathVars) { + return IndexIdentifier.of( + tableIdentFromPathVars(pathVars), RESTUtil.decodeString(pathVars.get("index"))); + } + private static String planIDFromPathVars(Map pathVars) { return RESTUtil.decodeString(pathVars.get("plan-id")); } diff --git a/core/src/test/java/org/apache/iceberg/rest/RESTCatalogServlet.java b/core/src/test/java/org/apache/iceberg/rest/RESTCatalogServlet.java index beb1401c2249..027365898214 100644 --- a/core/src/test/java/org/apache/iceberg/rest/RESTCatalogServlet.java +++ b/core/src/test/java/org/apache/iceberg/rest/RESTCatalogServlet.java @@ -135,13 +135,16 @@ protected void execute(ServletRequestContext context, HttpServletResponse respon } } + @SuppressWarnings("checkstyle:CyclomaticComplexity") private boolean shouldReturn204(Route route) { return route == Route.NAMESPACE_EXISTS || route == Route.TABLE_EXISTS || route == Route.VIEW_EXISTS + || route == Route.INDEX_EXISTS || route == Route.DROP_NAMESPACE || route == Route.DROP_TABLE || route == Route.DROP_VIEW + || route == Route.DROP_INDEX || route == Route.RENAME_TABLE || route == Route.RENAME_VIEW || route == Route.CANCEL_PLAN_TABLE_SCAN diff --git a/core/src/test/java/org/apache/iceberg/rest/Route.java b/core/src/test/java/org/apache/iceberg/rest/Route.java index 8680915bff64..c45b76ecea38 100644 --- a/core/src/test/java/org/apache/iceberg/rest/Route.java +++ b/core/src/test/java/org/apache/iceberg/rest/Route.java @@ -23,15 +23,18 @@ import org.apache.iceberg.relocated.com.google.common.base.Splitter; import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; import org.apache.iceberg.rest.requests.CommitTransactionRequest; +import org.apache.iceberg.rest.requests.CreateIndexRequest; import org.apache.iceberg.rest.requests.CreateNamespaceRequest; import org.apache.iceberg.rest.requests.CreateTableRequest; import org.apache.iceberg.rest.requests.CreateViewRequest; import org.apache.iceberg.rest.requests.FetchScanTasksRequest; import org.apache.iceberg.rest.requests.PlanTableScanRequest; +import org.apache.iceberg.rest.requests.RegisterIndexRequest; import org.apache.iceberg.rest.requests.RegisterTableRequest; import org.apache.iceberg.rest.requests.RegisterViewRequest; import org.apache.iceberg.rest.requests.RenameTableRequest; import org.apache.iceberg.rest.requests.ReportMetricsRequest; +import org.apache.iceberg.rest.requests.UpdateIndexRequest; import org.apache.iceberg.rest.requests.UpdateNamespacePropertiesRequest; import org.apache.iceberg.rest.requests.UpdateTableRequest; import org.apache.iceberg.rest.responses.ConfigResponse; @@ -39,8 +42,10 @@ import org.apache.iceberg.rest.responses.FetchPlanningResultResponse; import org.apache.iceberg.rest.responses.FetchScanTasksResponse; import org.apache.iceberg.rest.responses.GetNamespaceResponse; +import org.apache.iceberg.rest.responses.ListIndexesResponse; import org.apache.iceberg.rest.responses.ListNamespacesResponse; import org.apache.iceberg.rest.responses.ListTablesResponse; +import org.apache.iceberg.rest.responses.LoadIndexResponse; import org.apache.iceberg.rest.responses.LoadTableResponse; import org.apache.iceberg.rest.responses.LoadViewResponse; import org.apache.iceberg.rest.responses.OAuthTokenResponse; @@ -137,7 +142,27 @@ enum Route { FetchScanTasksRequest.class, FetchScanTasksResponse.class), CANCEL_PLAN_TABLE_SCAN( - HTTPRequest.HTTPMethod.DELETE, ResourcePaths.V1_TABLE_SCAN_PLAN, null, null); + HTTPRequest.HTTPMethod.DELETE, ResourcePaths.V1_TABLE_SCAN_PLAN, null, null), + LIST_INDEXES( + HTTPRequest.HTTPMethod.GET, ResourcePaths.V1_INDEXES, null, ListIndexesResponse.class), + CREATE_INDEX( + HTTPRequest.HTTPMethod.POST, + ResourcePaths.V1_INDEXES, + CreateIndexRequest.class, + LoadIndexResponse.class), + INDEX_EXISTS(HTTPRequest.HTTPMethod.HEAD, ResourcePaths.V1_INDEX), + LOAD_INDEX(HTTPRequest.HTTPMethod.GET, ResourcePaths.V1_INDEX, null, LoadIndexResponse.class), + UPDATE_INDEX( + HTTPRequest.HTTPMethod.POST, + ResourcePaths.V1_INDEX, + UpdateIndexRequest.class, + LoadIndexResponse.class), + DROP_INDEX(HTTPRequest.HTTPMethod.DELETE, ResourcePaths.V1_INDEX), + REGISTER_INDEX( + HTTPRequest.HTTPMethod.POST, + ResourcePaths.V1_INDEX_REGISTER, + RegisterIndexRequest.class, + LoadIndexResponse.class); private final HTTPRequest.HTTPMethod method; private final int requiredLength; diff --git a/core/src/test/java/org/apache/iceberg/rest/TestRESTIndexCatalog.java b/core/src/test/java/org/apache/iceberg/rest/TestRESTIndexCatalog.java new file mode 100644 index 000000000000..4a2dd1721fa6 --- /dev/null +++ b/core/src/test/java/org/apache/iceberg/rest/TestRESTIndexCatalog.java @@ -0,0 +1,158 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.rest; + +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.ObjectMapper; +import java.io.File; +import java.net.InetAddress; +import java.net.InetSocketAddress; +import java.nio.file.Path; +import java.util.Map; +import java.util.UUID; +import java.util.function.Consumer; +import org.apache.iceberg.CatalogProperties; +import org.apache.iceberg.catalog.Catalog; +import org.apache.iceberg.catalog.SessionCatalog; +import org.apache.iceberg.index.IndexCatalogTests; +import org.apache.iceberg.inmemory.InMemoryCatalog; +import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; +import org.apache.iceberg.rest.responses.ErrorResponse; +import org.eclipse.jetty.server.Server; +import org.eclipse.jetty.server.handler.gzip.GzipHandler; +import org.eclipse.jetty.servlet.ServletContextHandler; +import org.eclipse.jetty.servlet.ServletHolder; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.io.TempDir; + +public class TestRESTIndexCatalog extends IndexCatalogTests { + private static final ObjectMapper MAPPER = RESTObjectMapper.mapper(); + + @TempDir protected Path temp; + + private RESTCatalog restCatalog; + private InMemoryCatalog backendCatalog; + private Server httpServer; + + @BeforeEach + public void createCatalog() throws Exception { + File warehouse = temp.toFile(); + + this.backendCatalog = new InMemoryCatalog(); + this.backendCatalog.initialize( + "in-memory", + ImmutableMap.of(CatalogProperties.WAREHOUSE_LOCATION, warehouse.getAbsolutePath())); + + RESTCatalogAdapter adaptor = + new RESTCatalogAdapter(backendCatalog) { + @Override + public T execute( + HTTPRequest request, + Class responseType, + Consumer errorHandler, + Consumer> responseHeaders) { + Object body = roundTripSerialize(request.body(), "request"); + HTTPRequest req = ImmutableHTTPRequest.builder().from(request).body(body).build(); + T response = super.execute(req, responseType, errorHandler, responseHeaders); + return roundTripSerialize(response, "response"); + } + }; + + ServletContextHandler servletContext = + new ServletContextHandler(ServletContextHandler.NO_SESSIONS); + servletContext.setContextPath("/"); + servletContext.addServlet(new ServletHolder(new RESTCatalogServlet(adaptor)), "/*"); + servletContext.setHandler(new GzipHandler()); + + this.httpServer = new Server(new InetSocketAddress(InetAddress.getLoopbackAddress(), 0)); + httpServer.setHandler(servletContext); + httpServer.start(); + + SessionCatalog.SessionContext context = + new SessionCatalog.SessionContext( + UUID.randomUUID().toString(), + "user", + ImmutableMap.of("credential", "user:12345"), + ImmutableMap.of()); + + this.restCatalog = + new RESTCatalog( + context, + (config) -> HTTPClient.builder(config).uri(config.get(CatalogProperties.URI)).build()); + restCatalog.initialize( + "prod", + ImmutableMap.of( + CatalogProperties.URI, httpServer.getURI().toString(), "credential", "catalog:12345")); + } + + @SuppressWarnings("unchecked") + public static T roundTripSerialize(T payload, String description) { + if (payload != null) { + try { + if (payload instanceof RESTMessage) { + return (T) MAPPER.readValue(MAPPER.writeValueAsString(payload), payload.getClass()); + } else { + // use Map so that Jackson doesn't try to instantiate ImmutableMap from payload.getClass() + return (T) MAPPER.readValue(MAPPER.writeValueAsString(payload), Map.class); + } + } catch (JsonProcessingException e) { + throw new RuntimeException( + String.format("Failed to serialize and deserialize %s: %s", description, payload), e); + } + } + return null; + } + + @AfterEach + public void closeCatalog() throws Exception { + if (restCatalog != null) { + restCatalog.close(); + } + + if (backendCatalog != null) { + backendCatalog.close(); + } + + if (httpServer != null) { + httpServer.stop(); + httpServer.join(); + } + } + + @Override + protected RESTCatalog catalog() { + return restCatalog; + } + + @Override + protected Catalog tableCatalog() { + return restCatalog; + } + + @Override + protected boolean requiresNamespaceCreate() { + return true; + } + + @Override + protected boolean supportsServerSideRetry() { + return true; + } +} diff --git a/core/src/test/java/org/apache/iceberg/rest/requests/TestCreateIndexRequest.java b/core/src/test/java/org/apache/iceberg/rest/requests/TestCreateIndexRequest.java new file mode 100644 index 000000000000..82a0587c8f4d --- /dev/null +++ b/core/src/test/java/org/apache/iceberg/rest/requests/TestCreateIndexRequest.java @@ -0,0 +1,282 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.rest.requests; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; + +import com.fasterxml.jackson.databind.JsonNode; +import org.apache.iceberg.index.IndexType; +import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList; +import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; +import org.junit.jupiter.api.Test; + +public class TestCreateIndexRequest { + + private static final String TABLE_UUID = "test-table-uuid"; + + @Test + public void nullAndEmptyCheck() { + assertThatThrownBy(() -> CreateIndexRequestParser.toJson(null)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Invalid create index request: null"); + + assertThatThrownBy(() -> CreateIndexRequestParser.fromJson((JsonNode) null)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Cannot parse create index request from null object"); + } + + @Test + public void missingRequiredFields() { + assertThatThrownBy(() -> CreateIndexRequestParser.fromJson("{}")) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Cannot parse missing string: table-uuid"); + + String missingName = "{\"table-uuid\":\"test-table-uuid\"}"; + assertThatThrownBy(() -> CreateIndexRequestParser.fromJson(missingName)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Cannot parse missing string: name"); + + String missingType = "{\"table-uuid\":\"test-table-uuid\",\"name\":\"my_index\"}"; + assertThatThrownBy(() -> CreateIndexRequestParser.fromJson(missingType)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Cannot parse missing string: type"); + + String missingColumnIds = + """ + { + "table-uuid": "test-table-uuid", + "name": "my_index", + "type": "btree" + }""" + .replaceAll("\\s+", ""); + assertThatThrownBy(() -> CreateIndexRequestParser.fromJson(missingColumnIds)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Cannot parse missing list: index-column-ids"); + } + + @Test + public void roundTripSerde() { + CreateIndexRequest request = + CreateIndexRequest.builder() + .withTableUuid(TABLE_UUID) + .withName("customer_id_idx") + .withType(IndexType.BTREE) + .withIndexColumnIds(ImmutableList.of(1, 2)) + .withOptimizedColumnIds(ImmutableList.of(1)) + .withLocation("s3://bucket/indexes/customer_id_idx") + .setProperty("key1", "value1") + .build(); + + String json = CreateIndexRequestParser.toJson(request); + CreateIndexRequest parsed = CreateIndexRequestParser.fromJson(json); + + assertThat(parsed.tableUuid()).isEqualTo(TABLE_UUID); + assertThat(parsed.name()).isEqualTo("customer_id_idx"); + assertThat(parsed.type()).isEqualTo(IndexType.BTREE); + assertThat(parsed.indexColumnIds()).containsExactly(1, 2); + assertThat(parsed.optimizedColumnIds()).containsExactly(1); + assertThat(parsed.location()).isEqualTo("s3://bucket/indexes/customer_id_idx"); + assertThat(parsed.properties()).containsEntry("key1", "value1"); + } + + @Test + public void roundTripSerdeMinimal() { + CreateIndexRequest request = + CreateIndexRequest.builder() + .withTableUuid(TABLE_UUID) + .withName("simple_idx") + .withType(IndexType.BLOOM) + .withIndexColumnIds(ImmutableList.of(5)) + .build(); + + String json = CreateIndexRequestParser.toJson(request); + CreateIndexRequest parsed = CreateIndexRequestParser.fromJson(json); + + assertThat(parsed.name()).isEqualTo("simple_idx"); + assertThat(parsed.type()).isEqualTo(IndexType.BLOOM); + assertThat(parsed.indexColumnIds()).containsExactly(5); + assertThat(parsed.optimizedColumnIds()).isEmpty(); + assertThat(parsed.location()).isNull(); + assertThat(parsed.properties()).isEmpty(); + } + + @Test + public void testToJsonWithExpectedString() { + CreateIndexRequest request = + CreateIndexRequest.builder() + .withTableUuid(TABLE_UUID) + .withName("customer_id_btree_idx") + .withType(IndexType.BTREE) + .withIndexColumnIds(ImmutableList.of(1, 2)) + .withOptimizedColumnIds(ImmutableList.of(1)) + .withLocation("s3://bucket/indexes") + .setProperty("prop1", "value1") + .build(); + + String expectedJson = + """ + { + "table-uuid": "test-table-uuid", + "name": "customer_id_btree_idx", + "type": "btree", + "index-column-ids": [1, 2], + "optimized-column-ids": [1], + "location": "s3://bucket/indexes", + "properties": { + "prop1": "value1" + } + } + """ + .replaceAll("\\s+", ""); + + String actualJson = CreateIndexRequestParser.toJson(request); + assertThat(actualJson).isEqualTo(expectedJson); + + // Verify round-trip + CreateIndexRequest parsed = CreateIndexRequestParser.fromJson(actualJson); + assertThat(parsed.name()).isEqualTo("customer_id_btree_idx"); + assertThat(parsed.type()).isEqualTo(IndexType.BTREE); + assertThat(parsed.indexColumnIds()).containsExactly(1, 2); + assertThat(parsed.optimizedColumnIds()).containsExactly(1); + assertThat(parsed.location()).isEqualTo("s3://bucket/indexes"); + assertThat(parsed.properties()).containsEntry("prop1", "value1"); + } + + @Test + public void testToJsonMinimalWithExpectedString() { + CreateIndexRequest request = + CreateIndexRequest.builder() + .withTableUuid(TABLE_UUID) + .withName("term_idx") + .withType(IndexType.TERM) + .withIndexColumnIds(ImmutableList.of(3)) + .build(); + + String expectedJson = + """ + { + "table-uuid": "test-table-uuid", + "name": "term_idx", + "type": "term", + "index-column-ids": [3] + } + """ + .replaceAll("\\s+", ""); + + String actualJson = CreateIndexRequestParser.toJson(request); + assertThat(actualJson).isEqualTo(expectedJson); + } + + @Test + public void testBuilderValidation() { + assertThatThrownBy(() -> CreateIndexRequest.builder().build()) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Invalid table uuid: null or empty"); + + assertThatThrownBy(() -> CreateIndexRequest.builder().withTableUuid(TABLE_UUID).build()) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Invalid index name: null or empty"); + + assertThatThrownBy( + () -> CreateIndexRequest.builder().withTableUuid(TABLE_UUID).withName("test").build()) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Invalid index type: null"); + + assertThatThrownBy( + () -> + CreateIndexRequest.builder() + .withTableUuid(TABLE_UUID) + .withName("test") + .withType(IndexType.BTREE) + .build()) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Invalid index column IDs: null or empty"); + } + + @Test + public void testAllIndexTypes() { + for (IndexType indexType : IndexType.values()) { + CreateIndexRequest request = + CreateIndexRequest.builder() + .withTableUuid(TABLE_UUID) + .withName("idx_" + indexType.typeName()) + .withType(indexType) + .withIndexColumnIds(ImmutableList.of(1)) + .build(); + + String json = CreateIndexRequestParser.toJson(request); + CreateIndexRequest parsed = CreateIndexRequestParser.fromJson(json); + + assertThat(parsed.type()).isEqualTo(indexType); + } + } + + @Test + public void testAddMethods() { + CreateIndexRequest request = + CreateIndexRequest.builder() + .withTableUuid(TABLE_UUID) + .withName("test_idx") + .withType(IndexType.IVF) + .addIndexColumnId(1) + .addIndexColumnId(2) + .addOptimizedColumnId(1) + .setProperty("a", "1") + .setProperty("b", "2") + .build(); + + assertThat(request.indexColumnIds()).containsExactly(1, 2); + assertThat(request.optimizedColumnIds()).containsExactly(1); + assertThat(request.properties()) + .containsExactlyInAnyOrderEntriesOf(ImmutableMap.of("a", "1", "b", "2")); + } + + @Test + public void testSetProperties() { + CreateIndexRequest request = + CreateIndexRequest.builder() + .withTableUuid(TABLE_UUID) + .withName("test_idx") + .withType(IndexType.BTREE) + .withIndexColumnIds(ImmutableList.of(1)) + .setProperties(ImmutableMap.of("key1", "val1", "key2", "val2")) + .build(); + + assertThat(request.properties()) + .containsExactlyInAnyOrderEntriesOf(ImmutableMap.of("key1", "val1", "key2", "val2")); + } + + @Test + public void testInvalidIndexType() { + String jsonWithInvalidType = + """ + { + "table-uuid": "test-table-uuid", + "name": "test_idx", + "type": "invalid_type", + "index-column-ids": [1] + }""" + .replaceAll("\\s+", ""); + + assertThatThrownBy(() -> CreateIndexRequestParser.fromJson(jsonWithInvalidType)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Unknown index type: invalid_type"); + } +} diff --git a/core/src/test/java/org/apache/iceberg/rest/requests/TestRegisterIndexRequest.java b/core/src/test/java/org/apache/iceberg/rest/requests/TestRegisterIndexRequest.java new file mode 100644 index 000000000000..74e28a7ded80 --- /dev/null +++ b/core/src/test/java/org/apache/iceberg/rest/requests/TestRegisterIndexRequest.java @@ -0,0 +1,140 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.rest.requests; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; + +import com.fasterxml.jackson.databind.JsonNode; +import org.junit.jupiter.api.Test; + +public class TestRegisterIndexRequest { + + @Test + public void nullAndEmptyCheck() { + assertThatThrownBy(() -> RegisterIndexRequestParser.toJson(null)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Invalid register index request: null"); + + assertThatThrownBy(() -> RegisterIndexRequestParser.fromJson((JsonNode) null)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Cannot parse register index request from null object"); + } + + @Test + public void missingRequiredFields() { + assertThatThrownBy(() -> RegisterIndexRequestParser.fromJson("{}")) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Cannot parse missing string: name"); + + String missingMetadataLocation = "{\"name\":\"my_index\"}"; + assertThatThrownBy(() -> RegisterIndexRequestParser.fromJson(missingMetadataLocation)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Cannot parse missing string: metadata-location"); + } + + @Test + public void roundTripSerde() { + RegisterIndexRequest request = + ImmutableRegisterIndexRequest.builder() + .name("customer_id_idx") + .metadataLocation("s3://bucket/indexes/customer_id_idx/metadata/v1.metadata.json") + .build(); + + String json = RegisterIndexRequestParser.toJson(request); + RegisterIndexRequest parsed = RegisterIndexRequestParser.fromJson(json); + + assertThat(parsed.name()).isEqualTo("customer_id_idx"); + assertThat(parsed.metadataLocation()) + .isEqualTo("s3://bucket/indexes/customer_id_idx/metadata/v1.metadata.json"); + } + + @Test + public void testToJsonWithExpectedString() { + RegisterIndexRequest request = + ImmutableRegisterIndexRequest.builder() + .name("my_btree_idx") + .metadataLocation("s3://bucket/indexes/my_btree_idx/metadata/v1.metadata.json") + .build(); + + String expectedJson = + """ + { + "name": "my_btree_idx", + "metadata-location": "s3://bucket/indexes/my_btree_idx/metadata/v1.metadata.json" + } + """ + .replaceAll("\\s+", ""); + + String actualJson = RegisterIndexRequestParser.toJson(request); + assertThat(actualJson).isEqualTo(expectedJson); + + // Verify round-trip + RegisterIndexRequest parsed = RegisterIndexRequestParser.fromJson(actualJson); + assertThat(parsed.name()).isEqualTo("my_btree_idx"); + assertThat(parsed.metadataLocation()) + .isEqualTo("s3://bucket/indexes/my_btree_idx/metadata/v1.metadata.json"); + } + + @Test + public void testBuilderValidation() { + assertThatThrownBy(() -> ImmutableRegisterIndexRequest.builder().build()) + .isInstanceOf(IllegalStateException.class) + .hasMessageContaining( + "Cannot build RegisterIndexRequest, some of required attributes are not set"); + + assertThatThrownBy(() -> ImmutableRegisterIndexRequest.builder().name("test").build()) + .isInstanceOf(IllegalStateException.class) + .hasMessageContaining( + "Cannot build RegisterIndexRequest, some of required attributes are not set"); + + assertThatThrownBy( + () -> + ImmutableRegisterIndexRequest.builder() + .metadataLocation("s3://bucket/metadata.json") + .build()) + .isInstanceOf(IllegalStateException.class) + .hasMessageContaining( + "Cannot build RegisterIndexRequest, some of required attributes are not set"); + } + + @Test + public void testPrettyJsonOutput() { + RegisterIndexRequest request = + ImmutableRegisterIndexRequest.builder() + .name("test_idx") + .metadataLocation("s3://bucket/metadata.json") + .build(); + + String prettyJson = RegisterIndexRequestParser.toJson(request, true); + String compactJson = RegisterIndexRequestParser.toJson(request, false); + + // Pretty JSON should contain newlines + assertThat(prettyJson).contains("\n"); + // Compact JSON should not contain newlines + assertThat(compactJson).doesNotContain("\n"); + + // Both should parse to equivalent requests + RegisterIndexRequest fromPretty = RegisterIndexRequestParser.fromJson(prettyJson); + RegisterIndexRequest fromCompact = RegisterIndexRequestParser.fromJson(compactJson); + + assertThat(fromPretty.name()).isEqualTo(fromCompact.name()); + assertThat(fromPretty.metadataLocation()).isEqualTo(fromCompact.metadataLocation()); + } +} diff --git a/core/src/test/java/org/apache/iceberg/rest/requests/TestUpdateIndexRequestParser.java b/core/src/test/java/org/apache/iceberg/rest/requests/TestUpdateIndexRequestParser.java new file mode 100644 index 000000000000..906af1ebddf9 --- /dev/null +++ b/core/src/test/java/org/apache/iceberg/rest/requests/TestUpdateIndexRequestParser.java @@ -0,0 +1,290 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.rest.requests; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; + +import com.fasterxml.jackson.databind.JsonNode; +import org.apache.iceberg.catalog.TableIdentifier; +import org.apache.iceberg.index.ImmutableIndexVersion; +import org.apache.iceberg.index.IndexRequirement; +import org.apache.iceberg.index.IndexUpdate; +import org.apache.iceberg.index.IndexVersion; +import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList; +import org.junit.jupiter.api.Test; + +public class TestUpdateIndexRequestParser { + + private static final IndexVersion TEST_INDEX_VERSION = + ImmutableIndexVersion.builder().versionId(23).timestampMillis(12345L).build(); + + @Test + public void nullAndEmptyCheck() { + assertThatThrownBy(() -> UpdateIndexRequestParser.toJson(null)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Invalid update index request: null"); + + assertThatThrownBy(() -> UpdateIndexRequestParser.fromJson((JsonNode) null)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Cannot parse update index request from null object"); + + UpdateIndexRequest request = UpdateIndexRequestParser.fromJson("{}"); + assertThat(request.identifier()).isNull(); + assertThat(request.updates()).isEmpty(); + assertThat(request.requirements()).isEmpty(); + } + + @Test + public void invalidIndexIdentifier() { + // index identifier is optional + String json = + """ + {"requirements": [], "updates": []}""" + .replaceAll("\\s+", ""); + UpdateIndexRequest request = UpdateIndexRequestParser.fromJson(json); + assertThat(request.identifier()).isNull(); + + String invalidIdentifier = + """ + {"identifier": {}}""" + .replaceAll("\\s+", ""); + assertThatThrownBy(() -> UpdateIndexRequestParser.fromJson(invalidIdentifier)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Cannot parse missing string: name"); + + String invalidNameType = + """ + {"identifier": {"name": 23}}""" + .replaceAll("\\s+", ""); + assertThatThrownBy(() -> UpdateIndexRequestParser.fromJson(invalidNameType)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Cannot parse to a string value: name: 23"); + } + + @Test + public void invalidRequirements() { + String invalidRequirementType = + """ + { + "identifier": {"namespace": ["ns1"], "name": "index1"}, + "requirements": [23], + "updates": [] + }""" + .replaceAll("\\s+", ""); + assertThatThrownBy(() -> UpdateIndexRequestParser.fromJson(invalidRequirementType)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Cannot parse update requirement from non-object value: 23"); + + String missingRequirementType = + """ + { + "identifier": {"namespace": ["ns1"], "name": "index1"}, + "requirements": [{}], + "updates": [] + }""" + .replaceAll("\\s+", ""); + assertThatThrownBy(() -> UpdateIndexRequestParser.fromJson(missingRequirementType)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Cannot parse update requirement. Missing field: type"); + + String missingUuid = + """ + { + "identifier": {"namespace": ["ns1"], "name": "index1"}, + "requirements": [{"type": "assert-index-uuid"}], + "updates": [] + }""" + .replaceAll("\\s+", ""); + assertThatThrownBy(() -> UpdateIndexRequestParser.fromJson(missingUuid)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Cannot parse missing string: uuid"); + } + + @Test + public void invalidMetadataUpdates() { + String invalidUpdateType = + """ + { + "identifier": {"namespace": ["ns1"], "name": "index1"}, + "requirements": [], + "updates": [23] + }""" + .replaceAll("\\s+", ""); + assertThatThrownBy(() -> UpdateIndexRequestParser.fromJson(invalidUpdateType)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Cannot parse index update from non-object value: 23"); + + String missingAction = + """ + { + "identifier": {"namespace": ["ns1"], "name": "index1"}, + "requirements": [], + "updates": [{}] + }""" + .replaceAll("\\s+", ""); + assertThatThrownBy(() -> UpdateIndexRequestParser.fromJson(missingAction)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Cannot parse index update. Missing field: action"); + + String missingVersion = + """ + { + "identifier": {"namespace": ["ns1"], "name": "index1"}, + "requirements": [], + "updates": [{"action": "add-version"}] + }""" + .replaceAll("\\s+", ""); + assertThatThrownBy(() -> UpdateIndexRequestParser.fromJson(missingVersion)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Cannot parse missing field: version"); + } + + @Test + public void roundTripSerde() { + String uuid = "2cc52516-5e73-41f2-b139-545d41a4e151"; + UpdateIndexRequest request = + UpdateIndexRequest.create( + TableIdentifier.of("ns1", "index1"), + ImmutableList.of( + new IndexRequirement.AssertIndexUUID(uuid), + new IndexRequirement.AssertIndexDoesNotExist()), + ImmutableList.of( + new IndexUpdate.AddVersion(TEST_INDEX_VERSION), + new IndexUpdate.SetCurrentVersion(TEST_INDEX_VERSION.versionId()))); + + String expectedJson = + """ + { + "identifier": { + "namespace": ["ns1"], + "name": "index1" + }, + "requirements": [ + { + "type": "assert-index-uuid", + "uuid": "2cc52516-5e73-41f2-b139-545d41a4e151" + }, + { + "type": "assert-create" + } + ], + "updates": [ + { + "action": "add-version", + "version": { + "version-id":23, + "timestamp-ms":12345 + } + }, + { + "action": "set-current-version", + "version-id":23 + } + ] + }""" + .replaceAll("\\s+", ""); + + String json = UpdateIndexRequestParser.toJson(request); + assertThat(json).isEqualTo(expectedJson); + + // can't do an equality comparison on UpdateIndexRequest because updates/requirements + // don't implement equals/hashcode + assertThat(UpdateIndexRequestParser.toJson(UpdateIndexRequestParser.fromJson(json))) + .isEqualTo(expectedJson); + } + + @Test + public void roundTripSerdeWithoutIdentifier() { + String uuid = "2cc52516-5e73-41f2-b139-545d41a4e151"; + UpdateIndexRequest request = + new UpdateIndexRequest( + ImmutableList.of( + new IndexRequirement.AssertIndexUUID(uuid), + new IndexRequirement.AssertIndexDoesNotExist()), + ImmutableList.of( + new IndexUpdate.AddVersion(TEST_INDEX_VERSION), + new IndexUpdate.SetCurrentVersion(TEST_INDEX_VERSION.versionId()))); + + String expectedJson = + """ + { + "requirements": [ + { + "type": "assert-index-uuid", + "uuid": "2cc52516-5e73-41f2-b139-545d41a4e151" + }, + { + "type": "assert-create" + } + ], + "updates": [ + { + "action": "add-version", + "version": { + "version-id":23, + "timestamp-ms":12345 + } + }, + { + "action": "set-current-version", + "version-id":23 + } + ] + }""" + .replaceAll("\\s+", ""); + + String json = UpdateIndexRequestParser.toJson(request); + assertThat(json).isEqualTo(expectedJson); + + // can't do an equality comparison on UpdateIndexRequest because updates/requirements + // don't implement equals/hashcode + assertThat(UpdateIndexRequestParser.toJson(UpdateIndexRequestParser.fromJson(json))) + .isEqualTo(expectedJson); + } + + @Test + public void emptyRequirementsAndUpdates() { + UpdateIndexRequest request = + UpdateIndexRequest.create( + TableIdentifier.of("ns1", "index1"), ImmutableList.of(), ImmutableList.of()); + + String expectedJson = + """ + { + "identifier": {"namespace": ["ns1"], "name": "index1"}, + "requirements": [], + "updates": [] + }""" + .replaceAll("\\s+", ""); + + assertThat(UpdateIndexRequestParser.toJson(request)).isEqualTo(expectedJson); + // can't do an equality comparison on UpdateIndexRequest because updates/requirements + // don't implement equals/hashcode + assertThat(UpdateIndexRequestParser.toJson(UpdateIndexRequestParser.fromJson(expectedJson))) + .isEqualTo(expectedJson); + + String minimalJson = + """ + {"identifier": {"namespace": ["ns1"], "name": "index1"}}""" + .replaceAll("\\s+", ""); + assertThat(UpdateIndexRequestParser.toJson(UpdateIndexRequestParser.fromJson(minimalJson))) + .isEqualTo(expectedJson); + } +} diff --git a/core/src/test/java/org/apache/iceberg/rest/responses/TestListIndexesResponse.java b/core/src/test/java/org/apache/iceberg/rest/responses/TestListIndexesResponse.java new file mode 100644 index 000000000000..a297aa87d5d2 --- /dev/null +++ b/core/src/test/java/org/apache/iceberg/rest/responses/TestListIndexesResponse.java @@ -0,0 +1,226 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.rest.responses; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; + +import com.fasterxml.jackson.core.JsonProcessingException; +import java.util.List; +import org.apache.iceberg.catalog.IndexIdentifier; +import org.apache.iceberg.catalog.Namespace; +import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList; +import org.apache.iceberg.relocated.com.google.common.collect.Lists; +import org.apache.iceberg.rest.RequestResponseTestBase; +import org.junit.jupiter.api.Test; + +public class TestListIndexesResponse extends RequestResponseTestBase { + + private static final List IDENTIFIERS = + ImmutableList.of( + IndexIdentifier.of(Namespace.of("accounting", "tax"), "paid", "customer_idx")); + + private static final String FULL_JSON = + """ + { + "identifiers": [ + { + "namespace": ["accounting", "tax"], + "table": "paid", + "name": "customer_idx" + } + ], + "next-page-token": null + } + """ + .replaceAll("\\s+", ""); + + private static final String EMPTY_IDENTIFIERS_JSON = + """ + { + "identifiers": [], + "next-page-token": null + } + """ + .replaceAll("\\s+", ""); + + @Test + public void testRoundTripSerDe() throws JsonProcessingException { + assertRoundTripSerializesEquallyFrom( + FULL_JSON, ListIndexesResponse.builder().addAll(IDENTIFIERS).build()); + + assertRoundTripSerializesEquallyFrom( + EMPTY_IDENTIFIERS_JSON, ListIndexesResponse.builder().build()); + } + + @Test + public void testDeserializeInvalidResponsesThrows() { + assertThatThrownBy(() -> deserialize("{\"identifiers\":\"accounting%1Ftax\"}")) + .isInstanceOf(JsonProcessingException.class) + .hasMessageContaining("Cannot deserialize"); + + assertThatThrownBy(() -> deserialize("{}")) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Invalid identifier list: null"); + + String jsonWithKeysSpelledIncorrectly = + """ + { + "identifyrezzzz": [ + { + "namespace": ["accounting", "tax"], + "table": "paid", + "name": "customer_idx" + } + ] + }""" + .replaceAll("\\s+", ""); + assertThatThrownBy(() -> deserialize(jsonWithKeysSpelledIncorrectly)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Invalid identifier list: null"); + + String jsonWithInvalidIdentifiersInList = + """ + { + "identifiers": [ + { + "namespace": "accounting.tax", + "table": "paid", + "name": "customer_idx" + } + ] + }""" + .replaceAll("\\s+", ""); + assertThatThrownBy(() -> deserialize(jsonWithInvalidIdentifiersInList)) + .isInstanceOf(JsonProcessingException.class) + .hasMessageContaining("Cannot parse JSON array from non-array value"); + + String jsonWithInvalidIdentifiersInList2 = + """ + { + "identifiers": [ + { + "namespace": ["accounting", "tax"], + "table": "paid", + "name": "customer_idx" + }, + "accounting.tax.paid.customer_idx" + ] + }""" + .replaceAll("\\s+", ""); + assertThatThrownBy(() -> deserialize(jsonWithInvalidIdentifiersInList2)) + .isInstanceOf(JsonProcessingException.class) + .hasMessageContaining("Cannot parse missing or non-object index identifier"); + + String jsonWithInvalidTypeForNamePartOfIdentifier = + """ + { + "identifiers": [ + { + "namespace": ["accounting", "tax"], + "table": "paid", + "name": true + } + ] + }""" + .replaceAll("\\s+", ""); + assertThatThrownBy(() -> deserialize(jsonWithInvalidTypeForNamePartOfIdentifier)) + .isInstanceOf(JsonProcessingException.class) + .hasMessageContaining("Cannot parse to a string value"); + + assertThatThrownBy(() -> deserialize(null)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("argument \"content\" is null"); + } + + @Test + public void testBuilderDoesNotCreateInvalidObjects() { + assertThatThrownBy(() -> ListIndexesResponse.builder().add(null)) + .isInstanceOf(NullPointerException.class) + .hasMessage("Invalid index identifier: null"); + + assertThatThrownBy(() -> ListIndexesResponse.builder().addAll(null)) + .isInstanceOf(NullPointerException.class) + .hasMessage("Invalid index identifier list: null"); + + List listWithNullElement = + Lists.newArrayList(IndexIdentifier.of(Namespace.of("foo"), "bar", "idx"), null); + assertThatThrownBy(() -> ListIndexesResponse.builder().addAll(listWithNullElement)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Invalid index identifier: null"); + } + + @Test + public void testWithNullPaginationToken() throws JsonProcessingException { + ListIndexesResponse response = + ListIndexesResponse.builder().addAll(IDENTIFIERS).nextPageToken(null).build(); + assertRoundTripSerializesEquallyFrom(FULL_JSON, response); + assertThat(response.nextPageToken()).isNull(); + assertThat(response.identifiers()).isEqualTo(IDENTIFIERS); + } + + @Test + public void testWithPaginationToken() throws JsonProcessingException { + String pageToken = "token"; + String jsonWithPageToken = + """ + { + "identifiers": [ + { + "namespace": ["accounting", "tax"], + "table": "paid", + "name": "customer_idx" + } + ], + "next-page-token": "token" + } + """ + .replaceAll("\\s+", ""); + ListIndexesResponse response = + ListIndexesResponse.builder().addAll(IDENTIFIERS).nextPageToken(pageToken).build(); + assertRoundTripSerializesEquallyFrom(jsonWithPageToken, response); + assertThat(response.nextPageToken()).isEqualTo("token"); + assertThat(response.identifiers()).isEqualTo(IDENTIFIERS); + } + + @Override + public String[] allFieldsFromSpec() { + return new String[] {"identifiers", "next-page-token"}; + } + + @Override + public ListIndexesResponse createExampleInstance() { + return ListIndexesResponse.builder().addAll(IDENTIFIERS).build(); + } + + @Override + public void assertEquals(ListIndexesResponse actual, ListIndexesResponse expected) { + assertThat(actual.identifiers()) + .as("Identifiers should be equal") + .hasSameSizeAs(expected.identifiers()) + .containsExactlyInAnyOrderElementsOf(expected.identifiers()); + } + + @Override + public ListIndexesResponse deserialize(String json) throws JsonProcessingException { + ListIndexesResponse resp = mapper().readValue(json, ListIndexesResponse.class); + resp.validate(); + return resp; + } +} diff --git a/core/src/test/java/org/apache/iceberg/rest/responses/TestLoadIndexResponseParser.java b/core/src/test/java/org/apache/iceberg/rest/responses/TestLoadIndexResponseParser.java new file mode 100644 index 000000000000..352ea8fade25 --- /dev/null +++ b/core/src/test/java/org/apache/iceberg/rest/responses/TestLoadIndexResponseParser.java @@ -0,0 +1,262 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.rest.responses; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; + +import com.fasterxml.jackson.databind.JsonNode; +import java.util.List; +import org.apache.iceberg.index.ImmutableIndexVersion; +import org.apache.iceberg.index.IndexMetadata; +import org.apache.iceberg.index.IndexType; +import org.apache.iceberg.index.IndexVersion; +import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList; +import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; +import org.junit.jupiter.api.Test; + +public class TestLoadIndexResponseParser { + + private static final String TABLE_UUID = "test-table-uuid"; + private static final String TEST_LOCATION = "s3://bucket/test/location"; + private static final String TEST_METADATA_LOCATION = "s3://bucket/test/metadata/v1.metadata.json"; + private static final List INDEX_COLUMN_IDS = ImmutableList.of(1, 2); + private static final List OPTIMIZED_COLUMN_IDS = ImmutableList.of(1); + + private static IndexMetadata createTestMetadata() { + IndexVersion version = + ImmutableIndexVersion.builder() + .versionId(1) + .timestampMillis(1234567890L) + .properties(ImmutableMap.of("key", "value")) + .build(); + + return IndexMetadata.builder() + .setTableUuid(TABLE_UUID) + .setLocation(TEST_LOCATION) + .setType(IndexType.BTREE) + .setIndexColumnIds(INDEX_COLUMN_IDS) + .setOptimizedColumnIds(OPTIMIZED_COLUMN_IDS) + .addVersion(version) + .setCurrentVersion(version.versionId()) + .build(); + } + + @Test + public void nullAndEmptyCheck() { + assertThatThrownBy(() -> LoadIndexResponseParser.toJson(null)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Invalid load index response: null"); + + assertThatThrownBy(() -> LoadIndexResponseParser.fromJson((JsonNode) null)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Cannot parse load index response from null object"); + + assertThatThrownBy(() -> LoadIndexResponseParser.fromJson("{}")) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Cannot parse missing field: metadata"); + } + + @Test + public void missingMetadataField() { + assertThatThrownBy( + () -> + LoadIndexResponseParser.fromJson( + "{\"metadata-location\": \"s3://bucket/metadata.json\"}")) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Cannot parse missing field: metadata"); + } + + @Test + public void roundTripSerde() { + IndexMetadata metadata = createTestMetadata(); + + LoadIndexResponse response = + LoadIndexResponse.builder() + .withMetadata(metadata) + .withMetadataLocation(TEST_METADATA_LOCATION) + .build(); + + String json = LoadIndexResponseParser.toJson(response); + LoadIndexResponse parsed = LoadIndexResponseParser.fromJson(json); + + assertThat(parsed.metadataLocation()).isEqualTo(TEST_METADATA_LOCATION); + assertThat(parsed.metadata().uuid()).isEqualTo(metadata.uuid()); + assertThat(parsed.metadata().formatVersion()).isEqualTo(metadata.formatVersion()); + assertThat(parsed.metadata().type()).isEqualTo(metadata.type()); + assertThat(parsed.metadata().indexColumnIds()).isEqualTo(metadata.indexColumnIds()); + assertThat(parsed.metadata().optimizedColumnIds()).isEqualTo(metadata.optimizedColumnIds()); + assertThat(parsed.metadata().location()).isEqualTo(metadata.location()); + assertThat(parsed.config()).isEmpty(); + } + + @Test + public void roundTripSerdeWithConfig() { + IndexMetadata metadata = createTestMetadata(); + + LoadIndexResponse response = + LoadIndexResponse.builder() + .withMetadata(metadata) + .withMetadataLocation(TEST_METADATA_LOCATION) + .addConfig("key1", "value1") + .addConfig("key2", "value2") + .build(); + + String json = LoadIndexResponseParser.toJson(response); + LoadIndexResponse parsed = LoadIndexResponseParser.fromJson(json); + + assertThat(parsed.metadataLocation()).isEqualTo(TEST_METADATA_LOCATION); + assertThat(parsed.metadata().uuid()).isEqualTo(metadata.uuid()); + assertThat(parsed.config()) + .containsExactlyInAnyOrderEntriesOf(ImmutableMap.of("key1", "value1", "key2", "value2")); + } + + @Test + public void roundTripSerdeWithoutMetadataLocation() { + IndexMetadata metadata = createTestMetadata(); + + LoadIndexResponse response = LoadIndexResponse.builder().withMetadata(metadata).build(); + + String json = LoadIndexResponseParser.toJson(response); + LoadIndexResponse parsed = LoadIndexResponseParser.fromJson(json); + + assertThat(parsed.metadataLocation()).isNull(); + assertThat(parsed.metadata().uuid()).isEqualTo(metadata.uuid()); + } + + @Test + public void testPrettyJsonOutput() { + IndexMetadata metadata = createTestMetadata(); + + LoadIndexResponse response = + LoadIndexResponse.builder() + .withMetadata(metadata) + .withMetadataLocation(TEST_METADATA_LOCATION) + .build(); + + String prettyJson = LoadIndexResponseParser.toJson(response, true); + String compactJson = LoadIndexResponseParser.toJson(response, false); + + // Pretty JSON should contain newlines + assertThat(prettyJson).contains("\n"); + // Compact JSON should not contain newlines + assertThat(compactJson).doesNotContain("\n"); + + // Both should parse to equivalent responses + LoadIndexResponse fromPretty = LoadIndexResponseParser.fromJson(prettyJson); + LoadIndexResponse fromCompact = LoadIndexResponseParser.fromJson(compactJson); + + assertThat(fromPretty.metadata().uuid()).isEqualTo(fromCompact.metadata().uuid()); + assertThat(fromPretty.metadataLocation()).isEqualTo(fromCompact.metadataLocation()); + } + + @Test + public void testBuilderValidation() { + assertThatThrownBy(() -> LoadIndexResponse.builder().build()) + .isInstanceOf(NullPointerException.class) + .hasMessage("Invalid metadata: null"); + } + + @Test + public void testAddAllConfig() { + IndexMetadata metadata = createTestMetadata(); + + LoadIndexResponse response = + LoadIndexResponse.builder() + .withMetadata(metadata) + .addAllConfig(ImmutableMap.of("a", "1", "b", "2")) + .build(); + + assertThat(response.config()) + .containsExactlyInAnyOrderEntriesOf(ImmutableMap.of("a", "1", "b", "2")); + } + + @Test + public void testToJsonWithExpectedString() { + IndexVersion version = + ImmutableIndexVersion.builder() + .versionId(1) + .timestampMillis(1234567890L) + .properties(ImmutableMap.of()) + .build(); + + IndexMetadata metadata = + IndexMetadata.builder() + .setTableUuid(TABLE_UUID) + .setLocation(TEST_LOCATION) + .setType(IndexType.BTREE) + .setIndexColumnIds(INDEX_COLUMN_IDS) + .setOptimizedColumnIds(OPTIMIZED_COLUMN_IDS) + .addVersion(version) + .setCurrentVersion(version.versionId()) + .build(); + + LoadIndexResponse response = + LoadIndexResponse.builder() + .withMetadata(metadata) + .withMetadataLocation(TEST_METADATA_LOCATION) + .addConfig("key1", "value1") + .build(); + + String expectedJson = + String.format( + """ + { + "metadata-location": "s3://bucket/test/metadata/v1.metadata.json", + "metadata": { + "index-uuid": "%s", + "table-uuid": "test-table-uuid", + "format-version": 1, + "index-type": "btree", + "index-column-ids": [1, 2], + "optimized-column-ids": [1], + "location": "s3://bucket/test/location", + "current-version-id": 1, + "versions": [ + { + "version-id": 1, + "timestamp-ms": 1234567890 + } + ], + "version-log": [ + { + "timestamp-ms": 1234567890, + "version-id": 1 + } + ], + "snapshots": [] + }, + "config": { + "key1": "value1" + } + } + """ + .replaceAll("\\s+", ""), + metadata.uuid()); + + String actualJson = LoadIndexResponseParser.toJson(response); + assertThat(actualJson).isEqualTo(expectedJson); + + // Also verify round-trip + LoadIndexResponse parsed = LoadIndexResponseParser.fromJson(actualJson); + assertThat(parsed.metadata().uuid()).isEqualTo(metadata.uuid()); + assertThat(parsed.metadataLocation()).isEqualTo(TEST_METADATA_LOCATION); + assertThat(parsed.config()).containsEntry("key1", "value1"); + } +} diff --git a/open-api/rest-catalog-open-api.py b/open-api/rest-catalog-open-api.py index 32cf975cf5b6..e90d28ee3083 100644 --- a/open-api/rest-catalog-open-api.py +++ b/open-api/rest-catalog-open-api.py @@ -336,6 +336,331 @@ class ViewVersion(BaseModel): default_namespace: Namespace = Field(..., alias='default-namespace') +class IndexIdentifier(BaseModel): + """ + Identifies an index within a catalog, scoped to a table + """ + + namespace: Namespace + table: str = Field(..., description='The table name') + name: str = Field(..., description='The index name') + + +class IndexType(BaseModel): + __root__: Literal['bloom', 'btree', 'term', 'ivf'] = Field( + ..., description='The type of index algorithm', example='btree' + ) + + +class IndexSummary(BaseModel): + """ + A lightweight summary of an index for discovery and optimization + """ + + identifier: IndexIdentifier + type: IndexType + index_column_ids: list[int] = Field( + ..., + alias='index-column-ids', + description='IDs of columns stored losslessly in the index', + example=[1, 2], + ) + optimized_column_ids: list[int] = Field( + ..., + alias='optimized-column-ids', + description='IDs of columns the index is optimized for retrieval', + example=[1], + ) + available_table_snapshots: list[int] = Field( + ..., + alias='available-table-snapshots', + description='Table snapshot IDs for which this index has valid snapshots', + example=[1234567890123, 1234567890456], + ) + + +class IndexSnapshot(BaseModel): + """ + Represents a point-in-time snapshot of an index + """ + + index_snapshot_id: int = Field( + ..., + alias='index-snapshot-id', + description='Unique identifier for this index snapshot', + ) + table_snapshot_id: int = Field( + ..., + alias='table-snapshot-id', + description='The table snapshot ID this index snapshot corresponds to', + ) + timestamp_ms: int = Field( + ..., + alias='timestamp-ms', + description='Timestamp when this snapshot was created (ms since epoch)', + ) + manifest_list: str = Field( + ..., + alias='manifest-list', + description='Location of the manifest list for this snapshot', + ) + snapshot_properties: dict[str, str] | None = Field( + None, + alias='snapshot-properties', + description='Properties for this index snapshot typically set by the index maintenance process', + ) + + +class IndexVersion(BaseModel): + """ + Represents a version of the index schema/configuration + """ + + version_id: int = Field( + ..., alias='version-id', description='Unique identifier for this version' + ) + timestamp_ms: int = Field( + ..., + alias='timestamp-ms', + description='Timestamp when this version was created (ms since epoch)', + ) + snapshot_id: int | None = Field( + None, + alias='snapshot-id', + description='The index snapshot ID associated with this version', + ) + user_properties: dict[str, str] | None = Field( + None, + alias='user-properties', + description='User provided properties for this version', + ) + + +class IndexHistoryEntry(BaseModel): + """ + An entry in the index version history + """ + + timestamp_ms: int = Field( + ..., + alias='timestamp-ms', + description='Timestamp of the history entry (ms since epoch)', + ) + version_id: int = Field( + ..., alias='version-id', description='The version ID at this point in history' + ) + + +class IndexMetadata(BaseModel): + """ + Full metadata for an index + """ + + format_version: int = Field( + ..., + alias='format-version', + description='Format version of the index metadata', + example=1, + ) + index_uuid: UUID = Field( + ..., alias='index-uuid', description='Unique UUID for this index' + ) + name: str = Field(..., description='The name of the index') + type: IndexType + location: str = Field(..., description='Base location for index files') + table_uuid: UUID = Field( + ..., + alias='table-uuid', + description='Unique UUID for the table this index is associated with', + ) + index_column_ids: list[int] = Field( + ..., + alias='index-column-ids', + description='IDs of columns stored losslessly in the index', + ) + optimized_column_ids: list[int] = Field( + ..., + alias='optimized-column-ids', + description='IDs of columns the index is optimized for retrieval', + ) + current_version_id: int = Field( + ..., alias='current-version-id', description='ID of the current index version' + ) + versions: list[IndexVersion] | None = Field( + None, description='List of known index versions' + ) + version_history: list[IndexHistoryEntry] | None = Field( + None, alias='version-history', description='History of version changes' + ) + snapshots: list[IndexSnapshot] | None = Field( + None, description='List of index snapshots' + ) + + +class IndexRequirement(BaseModel): + """ + A requirement that must be met for an index commit to succeed + """ + + type: Literal['assert-index-uuid'] = Field( + ..., description='The type of requirement' + ) + uuid: UUID | None = Field(None, description='Required for assert-index-uuid') + + +class AssertIndexUUID(IndexRequirement): + uuid: UUID = Field(..., description='Required for assert-index-uuid') + + +class IndexUpdate(BaseModel): + """ + An update to apply to an index + """ + + action: Literal[ + 'update-format-version', + 'add-snapshot', + 'remove-snapshots', + 'add-version', + 'set-current-version', + 'set-location', + ] = Field(..., description='The type of update') + + +class UpgradeIndexFormatVersionUpdate(IndexUpdate): + action: Literal[ + 'update-format-version', + 'add-snapshot', + 'remove-snapshots', + 'add-version', + 'set-current-version', + 'set-location', + ] = Field('upgrade-format-version', const=True, description='The type of update') + format_version: int = Field(..., alias='format-version') + + +class AddIndexSnapshotUpdate(IndexUpdate): + snapshot: IndexSnapshot + + +class RemoveIndexSnapshotsUpdate(IndexUpdate): + snapshot_ids: list[int] = Field(..., alias='snapshot-ids') + + +class AddIndexVersionUpdate(IndexUpdate): + version: IndexVersion + + +class SetCurrentIndexVersionUpdate(IndexUpdate): + action: Literal['set-current-index-version'] = Field( + 'set-current-index-version', const=True + ) + version_id: int = Field( + ..., + alias='version-id', + description='The index version id to set as current, or -1 to set last added index version id', + ) + + +class SetIndexLocationUpdate(IndexUpdate): + location: str + + +class LoadIndexResult(BaseModel): + """ + Result for loading an index + """ + + metadata_location: str | None = Field( + None, + alias='metadata-location', + description='The location of the index metadata file', + ) + metadata: IndexMetadata + config: dict[str, str] | None = None + + +class CreateIndexRequest(BaseModel): + """ + Request to create a new index + """ + + name: str = Field( + ..., description='The name for the new index', example='customer_id_btree_idx' + ) + type: IndexType + table_uuid: UUID = Field( + ..., alias='table-uuid', description='The UUID of the table to index' + ) + index_column_ids: list[int] = Field( + ..., + alias='index-column-ids', + description='IDs of columns to store losslessly in the index', + example=[1, 2], + ) + optimized_column_ids: list[int] | None = Field( + None, + alias='optimized-column-ids', + description='IDs of columns the index is optimized for retrieval', + example=[1], + ) + location: str | None = Field( + None, description='Optional base location for index files' + ) + properties: dict[str, str] | None = Field( + None, description='Index-specific properties' + ) + index_snapshot_id: int | None = Field( + None, + alias='index-snapshot-id', + description='Unique identifier for this index snapshot', + ) + table_snapshot_id: int | None = Field( + None, + alias='table-snapshot-id', + description='The table snapshot ID this index snapshot corresponds to', + ) + snapshot_properties: dict[str, str] | None = Field( + None, + alias='snapshot-properties', + description='Properties for this index snapshot typically set by the index maintenance process', + ) + + +class RegisterIndexRequest(BaseModel): + """ + Request to register an existing index by metadata file location + """ + + name: str = Field(..., description='The name to register the index under') + metadata_location: str = Field( + ..., + alias='metadata-location', + description='Location of the index metadata file', + ) + + +class CommitIndexRequest(BaseModel): + """ + Request to commit updates to an index + """ + + requirements: list[AssertIndexUUID] | None = Field( + None, description='Requirements that must be met before applying updates' + ) + updates: ( + list[ + UpgradeIndexFormatVersionUpdate + | AddIndexSnapshotUpdate + | RemoveIndexSnapshotsUpdate + | AddIndexVersionUpdate + | SetCurrentIndexVersionUpdate + | SetIndexLocationUpdate + ] + | None + ) = Field(None, description='Updates to apply to the index') + + class BaseUpdate(BaseModel): action: str diff --git a/open-api/rest-catalog-open-api.yaml b/open-api/rest-catalog-open-api.yaml index ee0097042534..3f7ee51ded50 100644 --- a/open-api/rest-catalog-open-api.yaml +++ b/open-api/rest-catalog-open-api.yaml @@ -1897,6 +1897,345 @@ paths: 5XX: $ref: '#/components/responses/ServerErrorResponse' + /v1/{prefix}/namespaces/{namespace}/tables/{table}/indexes: + parameters: + - $ref: '#/components/parameters/prefix' + - $ref: '#/components/parameters/namespace' + - $ref: '#/components/parameters/table' + + get: + tags: + - Index Catalog API + summary: List all indexes for a table filtered by the types if requested + description: | + Return a list of index summaries for the specified table. + The list can be filtered by index type(s) using the `types` query parameter. + This enables query optimizers to discover indexes available for a given table. + operationId: listIndexes + parameters: + - name: types + in: query + description: | + Optional comma-separated list of index types to filter by. + If not provided, all indexes are returned. + required: false + schema: + type: array + items: + $ref: '#/components/schemas/IndexType' + style: form + explode: false + example: "bloom,btree" + - $ref: '#/components/parameters/page-token' + - $ref: '#/components/parameters/page-size' + responses: + 200: + $ref: '#/components/responses/ListIndexesResponse' + 400: + $ref: '#/components/responses/BadRequestErrorResponse' + 401: + $ref: '#/components/responses/UnauthorizedResponse' + 403: + $ref: '#/components/responses/ForbiddenResponse' + 404: + description: Not Found - The table or namespace does not exist + content: + application/json: + schema: + $ref: '#/components/schemas/IcebergErrorResponse' + examples: + NoSuchTableError: + $ref: '#/components/examples/NoSuchTableError' + NoSuchNamespaceError: + $ref: '#/components/examples/NoSuchNamespaceError' + 419: + $ref: '#/components/responses/AuthenticationTimeoutResponse' + 503: + $ref: '#/components/responses/ServiceUnavailableResponse' + 5XX: + $ref: '#/components/responses/ServerErrorResponse' + + post: + tags: + - Index Catalog API + summary: Create an index on a table + description: | + Create a new index on the specified table. + The index type, column IDs, and other configuration must be provided in the request body. + operationId: createIndex + parameters: + - $ref: '#/components/parameters/idempotency-key' + requestBody: + required: true + content: + application/json: + schema: + $ref: '#/components/schemas/CreateIndexRequest' + responses: + 200: + $ref: '#/components/responses/LoadIndexResponse' + 400: + $ref: '#/components/responses/BadRequestErrorResponse' + 401: + $ref: '#/components/responses/UnauthorizedResponse' + 403: + $ref: '#/components/responses/ForbiddenResponse' + 404: + description: Not Found - The table or namespace does not exist + content: + application/json: + schema: + $ref: '#/components/schemas/IcebergErrorResponse' + examples: + NoSuchTableError: + $ref: '#/components/examples/NoSuchTableError' + NoSuchNamespaceError: + $ref: '#/components/examples/NoSuchNamespaceError' + 409: + description: Conflict - An index with the same name already exists + content: + application/json: + schema: + $ref: '#/components/schemas/IcebergErrorResponse' + examples: + IndexAlreadyExists: + $ref: '#/components/examples/IndexAlreadyExistsError' + 419: + $ref: '#/components/responses/AuthenticationTimeoutResponse' + 503: + $ref: '#/components/responses/ServiceUnavailableResponse' + 5XX: + $ref: '#/components/responses/ServerErrorResponse' + + /v1/{prefix}/namespaces/{namespace}/tables/{table}/indexes/{index}: + parameters: + - $ref: '#/components/parameters/prefix' + - $ref: '#/components/parameters/namespace' + - $ref: '#/components/parameters/table' + - $ref: '#/components/parameters/index' + + get: + tags: + - Index Catalog API + summary: Load an index from the catalog + description: | + Load the full metadata for an index. + The response contains the complete index definition including versions, snapshots, and history. + operationId: loadIndex + responses: + 200: + $ref: '#/components/responses/LoadIndexResponse' + 400: + $ref: '#/components/responses/BadRequestErrorResponse' + 401: + $ref: '#/components/responses/UnauthorizedResponse' + 403: + $ref: '#/components/responses/ForbiddenResponse' + 404: + description: Not Found - The index, table, or namespace does not exist + content: + application/json: + schema: + $ref: '#/components/schemas/IcebergErrorResponse' + examples: + NoSuchIndexError: + $ref: '#/components/examples/NoSuchIndexError' + NoSuchTableError: + $ref: '#/components/examples/NoSuchTableError' + NoSuchNamespaceError: + $ref: '#/components/examples/NoSuchNamespaceError' + 419: + $ref: '#/components/responses/AuthenticationTimeoutResponse' + 503: + $ref: '#/components/responses/ServiceUnavailableResponse' + 5XX: + $ref: '#/components/responses/ServerErrorResponse' + + post: + tags: + - Index Catalog API + summary: Commit updates to an index + description: | + Commit updates to an index. + This can be used to add new versions, snapshots, or update index metadata. + The request includes requirements that must be met and updates to apply. + operationId: updateIndex + parameters: + - $ref: '#/components/parameters/idempotency-key' + requestBody: + required: true + content: + application/json: + schema: + $ref: '#/components/schemas/CommitIndexRequest' + responses: + 200: + $ref: '#/components/responses/LoadIndexResponse' + 400: + $ref: '#/components/responses/BadRequestErrorResponse' + 401: + $ref: '#/components/responses/UnauthorizedResponse' + 403: + $ref: '#/components/responses/ForbiddenResponse' + 404: + description: Not Found - The index, table, or namespace does not exist + content: + application/json: + schema: + $ref: '#/components/schemas/IcebergErrorResponse' + examples: + NoSuchIndexError: + $ref: '#/components/examples/NoSuchIndexError' + NoSuchTableError: + $ref: '#/components/examples/NoSuchTableError' + NoSuchNamespaceError: + $ref: '#/components/examples/NoSuchNamespaceError' + 409: + description: Conflict - CommitFailedException, one or more requirements failed + content: + application/json: + schema: + $ref: '#/components/schemas/IcebergErrorResponse' + 419: + $ref: '#/components/responses/AuthenticationTimeoutResponse' + 500: + description: An unknown server-side problem occurred; the commit state is unknown. + content: + application/json: + schema: + $ref: '#/components/schemas/IcebergErrorResponse' + example: + error: + message: "Internal Server Error" + type: "CommitStateUnknownException" + code: 500 + 503: + $ref: '#/components/responses/ServiceUnavailableResponse' + 5XX: + $ref: '#/components/responses/ServerErrorResponse' + + delete: + tags: + - Index Catalog API + summary: Drop an index from the catalog + description: Remove an index from the catalog. This will delete the index and all its data. + operationId: dropIndex + parameters: + - $ref: '#/components/parameters/idempotency-key' + responses: + 204: + description: Success, no content + 400: + $ref: '#/components/responses/BadRequestErrorResponse' + 401: + $ref: '#/components/responses/UnauthorizedResponse' + 403: + $ref: '#/components/responses/ForbiddenResponse' + 404: + description: Not Found - The index does not exist + content: + application/json: + schema: + $ref: '#/components/schemas/IcebergErrorResponse' + examples: + NoSuchIndexError: + $ref: '#/components/examples/NoSuchIndexError' + 419: + $ref: '#/components/responses/AuthenticationTimeoutResponse' + 503: + $ref: '#/components/responses/ServiceUnavailableResponse' + 5XX: + $ref: '#/components/responses/ServerErrorResponse' + + head: + tags: + - Index Catalog API + summary: Check if an index exists + description: Check if an index exists for a given table. The response does not contain a body. + operationId: indexExists + responses: + 204: + description: Success, no content - Index exists + 400: + $ref: '#/components/responses/BadRequestErrorResponse' + 401: + $ref: '#/components/responses/UnauthorizedResponse' + 403: + $ref: '#/components/responses/ForbiddenResponse' + 404: + description: Not Found - Index not found + content: + application/json: + schema: + $ref: '#/components/schemas/IcebergErrorResponse' + examples: + NoSuchIndexError: + $ref: '#/components/examples/NoSuchIndexError' + 419: + $ref: '#/components/responses/AuthenticationTimeoutResponse' + 503: + $ref: '#/components/responses/ServiceUnavailableResponse' + 5XX: + $ref: '#/components/responses/ServerErrorResponse' + + /v1/{prefix}/namespaces/{namespace}/tables/{table}/register-index: + parameters: + - $ref: '#/components/parameters/prefix' + - $ref: '#/components/parameters/namespace' + - $ref: '#/components/parameters/table' + + post: + tags: + - Index Catalog API + summary: Register an index using a metadata file location + description: | + Register an index in the catalog using a given metadata file location. + This allows importing an existing index from its metadata file. + operationId: registerIndex + parameters: + - $ref: '#/components/parameters/idempotency-key' + requestBody: + required: true + content: + application/json: + schema: + $ref: '#/components/schemas/RegisterIndexRequest' + responses: + 200: + $ref: '#/components/responses/LoadIndexResponse' + 400: + $ref: '#/components/responses/BadRequestErrorResponse' + 401: + $ref: '#/components/responses/UnauthorizedResponse' + 403: + $ref: '#/components/responses/ForbiddenResponse' + 404: + description: Not Found - The table or namespace does not exist + content: + application/json: + schema: + $ref: '#/components/schemas/IcebergErrorResponse' + examples: + NoSuchTableError: + $ref: '#/components/examples/NoSuchTableError' + NoSuchNamespaceError: + $ref: '#/components/examples/NoSuchNamespaceError' + 409: + description: Conflict - An index with the same identifier already exists + content: + application/json: + schema: + $ref: '#/components/schemas/IcebergErrorResponse' + examples: + IndexAlreadyExists: + $ref: '#/components/examples/IndexAlreadyExistsError' + 419: + $ref: '#/components/responses/AuthenticationTimeoutResponse' + 503: + $ref: '#/components/responses/ServiceUnavailableResponse' + 5XX: + $ref: '#/components/responses/ServerErrorResponse' + components: ####################################################### # Common Parameter Definitions Used In Several Routes # @@ -1954,6 +2293,15 @@ components: type: string example: "sales" + index: + name: index + in: path + description: An index name + required: true + schema: + type: string + example: "customer_id_btree_idx" + data-access: name: X-Iceberg-Access-Delegation in: header @@ -2679,187 +3027,608 @@ components: type: integer format: int64 - TableMetadata: + TableMetadata: + type: object + required: + - format-version + - table-uuid + properties: + format-version: + type: integer + minimum: 1 + maximum: 3 + table-uuid: + type: string + location: + type: string + last-updated-ms: + type: integer + format: int64 + next-row-id: + type: integer + format: int64 + description: A long higher than all assigned row IDs; the next snapshot's first-row-id. + properties: + type: object + additionalProperties: + type: string + # schema tracking + schemas: + type: array + items: + $ref: '#/components/schemas/Schema' + current-schema-id: + type: integer + last-column-id: + type: integer + # partition spec tracking + partition-specs: + type: array + items: + $ref: '#/components/schemas/PartitionSpec' + default-spec-id: + type: integer + last-partition-id: + type: integer + # sort order tracking + sort-orders: + type: array + items: + $ref: '#/components/schemas/SortOrder' + default-sort-order-id: + type: integer + # encryption + encryption-keys: + type: array + items: + $ref: '#/components/schemas/EncryptedKey' + # snapshot tracking + snapshots: + type: array + items: + $ref: '#/components/schemas/Snapshot' + refs: + $ref: '#/components/schemas/SnapshotReferences' + current-snapshot-id: + type: integer + format: int64 + last-sequence-number: + type: integer + format: int64 + # logs + snapshot-log: + $ref: '#/components/schemas/SnapshotLog' + metadata-log: + $ref: '#/components/schemas/MetadataLog' + # statistics + statistics: + type: array + items: + $ref: '#/components/schemas/StatisticsFile' + partition-statistics: + type: array + items: + $ref: '#/components/schemas/PartitionStatisticsFile' + + SQLViewRepresentation: + type: object + required: + - type + - sql + - dialect + properties: + type: + type: string + sql: + type: string + dialect: + type: string + + ViewRepresentation: + oneOf: + - $ref: '#/components/schemas/SQLViewRepresentation' + + ViewHistoryEntry: + type: object + required: + - version-id + - timestamp-ms + properties: + version-id: + type: integer + timestamp-ms: + type: integer + format: int64 + + ViewVersion: + type: object + required: + - version-id + - timestamp-ms + - schema-id + - summary + - representations + - default-namespace + properties: + version-id: + type: integer + timestamp-ms: + type: integer + format: int64 + schema-id: + type: integer + description: Schema ID to set as current, or -1 to set last added schema + summary: + type: object + additionalProperties: + type: string + representations: + type: array + items: + $ref: '#/components/schemas/ViewRepresentation' + default-catalog: + type: string + default-namespace: + $ref: '#/components/schemas/Namespace' + + ViewMetadata: + type: object + required: + - view-uuid + - format-version + - location + - current-version-id + - versions + - version-log + - schemas + properties: + view-uuid: + type: string + format-version: + type: integer + minimum: 1 + maximum: 1 + location: + type: string + current-version-id: + type: integer + versions: + type: array + items: + $ref: '#/components/schemas/ViewVersion' + version-log: + type: array + items: + $ref: '#/components/schemas/ViewHistoryEntry' + schemas: + type: array + items: + $ref: '#/components/schemas/Schema' + properties: + type: object + additionalProperties: + type: string + + IndexIdentifier: + type: object + description: Identifies an index within a catalog, scoped to a table + required: + - namespace + - table + - name + properties: + namespace: + $ref: '#/components/schemas/Namespace' + table: + type: string + description: The table name + name: + type: string + description: The index name + example: + namespace: ["accounting", "tax"] + table: "sales" + name: "customer_id_btree_idx" + + IndexType: + type: string + description: The type of index algorithm + enum: + - bloom + - btree + - term + - ivf + example: "btree" + + IndexSummary: + type: object + description: A lightweight summary of an index for discovery and optimization + required: + - identifier + - type + - index-column-ids + - optimized-column-ids + - available-table-snapshots + properties: + identifier: + $ref: '#/components/schemas/IndexIdentifier' + type: + $ref: '#/components/schemas/IndexType' + index-column-ids: + type: array + description: IDs of columns stored losslessly in the index + items: + type: integer + example: [1, 2] + optimized-column-ids: + type: array + description: IDs of columns the index is optimized for retrieval + items: + type: integer + example: [1] + available-table-snapshots: + type: array + description: Table snapshot IDs for which this index has valid snapshots + items: + type: integer + format: int64 + example: [1234567890123, 1234567890456] + + IndexSnapshot: + type: object + description: Represents a point-in-time snapshot of an index + required: + - index-snapshot-id + - table-snapshot-id + - timestamp-ms + - manifest-list + properties: + index-snapshot-id: + type: integer + format: int64 + description: Unique identifier for this index snapshot + table-snapshot-id: + type: integer + format: int64 + description: The table snapshot ID this index snapshot corresponds to + timestamp-ms: + type: integer + format: int64 + description: Timestamp when this snapshot was created (ms since epoch) + manifest-list: + type: string + description: Location of the manifest list for this snapshot + snapshot-properties: + type: object + additionalProperties: + type: string + description: Properties for this index snapshot typically set by the index maintenance process + + IndexVersion: + type: object + description: Represents a version of the index schema/configuration + required: + - version-id + - timestamp-ms + properties: + version-id: + type: integer + description: Unique identifier for this version + timestamp-ms: + type: integer + format: int64 + description: Timestamp when this version was created (ms since epoch) + snapshot-id: + type: integer + format: int64 + description: The index snapshot ID associated with this version + user-properties: + type: object + additionalProperties: + type: string + description: User provided properties for this version + + IndexHistoryEntry: + type: object + description: An entry in the index version history + required: + - timestamp-ms + - version-id + properties: + timestamp-ms: + type: integer + format: int64 + description: Timestamp of the history entry (ms since epoch) + version-id: + type: integer + description: The version ID at this point in history + + IndexMetadata: type: object + description: Full metadata for an index required: - format-version + - index-uuid + - name + - type + - location - table-uuid + - index-column-ids + - optimized-column-ids + - current-version-id properties: format-version: type: integer - minimum: 1 - maximum: 3 - table-uuid: + description: Format version of the index metadata + example: 1 + index-uuid: + type: string + format: uuid + description: Unique UUID for this index + name: type: string + description: The name of the index + type: + $ref: '#/components/schemas/IndexType' location: type: string - last-updated-ms: - type: integer - format: int64 - next-row-id: - type: integer - format: int64 - description: A long higher than all assigned row IDs; the next snapshot's first-row-id. - properties: - type: object - additionalProperties: - type: string - # schema tracking - schemas: + description: Base location for index files + table-uuid: + type: string + format: uuid + description: Unique UUID for the table this index is associated with + index-column-ids: type: array + description: IDs of columns stored losslessly in the index items: - $ref: '#/components/schemas/Schema' - current-schema-id: - type: integer - last-column-id: - type: integer - # partition spec tracking - partition-specs: + type: integer + optimized-column-ids: type: array + description: IDs of columns the index is optimized for retrieval items: - $ref: '#/components/schemas/PartitionSpec' - default-spec-id: - type: integer - last-partition-id: + type: integer + current-version-id: type: integer - # sort order tracking - sort-orders: + description: ID of the current index version + versions: type: array + description: List of known index versions items: - $ref: '#/components/schemas/SortOrder' - default-sort-order-id: - type: integer - # encryption - encryption-keys: + $ref: '#/components/schemas/IndexVersion' + version-history: type: array + description: History of version changes items: - $ref: '#/components/schemas/EncryptedKey' - # snapshot tracking + $ref: '#/components/schemas/IndexHistoryEntry' snapshots: type: array + description: List of index snapshots items: - $ref: '#/components/schemas/Snapshot' - refs: - $ref: '#/components/schemas/SnapshotReferences' - current-snapshot-id: - type: integer - format: int64 - last-sequence-number: - type: integer - format: int64 - # logs - snapshot-log: - $ref: '#/components/schemas/SnapshotLog' - metadata-log: - $ref: '#/components/schemas/MetadataLog' - # statistics - statistics: - type: array - items: - $ref: '#/components/schemas/StatisticsFile' - partition-statistics: - type: array - items: - $ref: '#/components/schemas/PartitionStatisticsFile' + $ref: '#/components/schemas/IndexSnapshot' - SQLViewRepresentation: + IndexRequirement: type: object + description: A requirement that must be met for an index commit to succeed required: - type - - sql - - dialect properties: type: type: string - sql: - type: string - dialect: + enum: + - assert-index-uuid + description: The type of requirement + uuid: type: string + format: uuid + description: Required for assert-index-uuid + discriminator: + propertyName: type + mapping: + assert-index-uuid: '#/components/schemas/AssertIndexUUID' - ViewRepresentation: - oneOf: - - $ref: '#/components/schemas/SQLViewRepresentation' + AssertIndexUUID: + allOf: + - $ref: '#/components/schemas/IndexRequirement' + - type: object + required: + - uuid + properties: + uuid: + type: string + format: uuid - ViewHistoryEntry: + IndexUpdate: type: object + description: An update to apply to an index + required: + - action + properties: + action: + type: string + enum: + - update-format-version + - add-snapshot + - remove-snapshots + - add-version + - set-current-version + - set-location + description: The type of update + discriminator: + propertyName: action + mapping: + upgrade-format-version: '#/components/schemas/UpgradeIndexFormatVersionUpdate' + add-snapshot: '#/components/schemas/AddIndexSnapshotUpdate' + remove-snapshots: '#/components/schemas/RemoveIndexSnapshotsUpdate' + add-version: '#/components/schemas/AddIndexVersionUpdate' + set-current-version-id: '#/components/schemas/SetCurrentIndexVersionUpdate' + set-location: '#/components/schemas/SetIndexLocationUpdate' + + UpgradeIndexFormatVersionUpdate: + allOf: + - $ref: '#/components/schemas/IndexUpdate' + - type: object + required: + - format-version + properties: + action: + type: string + const: "upgrade-format-version" + format-version: + type: integer + + AddIndexSnapshotUpdate: + allOf: + - $ref: '#/components/schemas/IndexUpdate' + - type: object + required: + - snapshot + properties: + snapshot: + $ref: '#/components/schemas/IndexSnapshot' + + RemoveIndexSnapshotsUpdate: + allOf: + - $ref: '#/components/schemas/IndexUpdate' + - type: object + required: + - snapshot-ids + properties: + snapshot-ids: + type: array + items: + type: integer + format: int64 + + AddIndexVersionUpdate: + allOf: + - $ref: '#/components/schemas/IndexUpdate' + - type: object + required: + - version + properties: + version: + $ref: '#/components/schemas/IndexVersion' + + SetCurrentIndexVersionUpdate: + allOf: + - $ref: '#/components/schemas/IndexUpdate' required: - version-id - - timestamp-ms properties: + action: + type: string + const: "set-current-index-version" version-id: type: integer - timestamp-ms: - type: integer - format: int64 + description: The index version id to set as current, or -1 to set last added index version id - ViewVersion: + SetIndexLocationUpdate: + allOf: + - $ref: '#/components/schemas/IndexUpdate' + - type: object + required: + - location + properties: + location: + type: string + + LoadIndexResult: type: object + description: Result for loading an index required: - - version-id - - timestamp-ms - - schema-id - - summary - - representations - - default-namespace + - metadata properties: - version-id: - type: integer - timestamp-ms: - type: integer - format: int64 - schema-id: - type: integer - description: Schema ID to set as current, or -1 to set last added schema - summary: + metadata-location: + type: string + description: The location of the index metadata file + metadata: + $ref: '#/components/schemas/IndexMetadata' + config: type: object additionalProperties: type: string - representations: - type: array - items: - $ref: '#/components/schemas/ViewRepresentation' - default-catalog: - type: string - default-namespace: - $ref: '#/components/schemas/Namespace' - ViewMetadata: + CreateIndexRequest: type: object + description: Request to create a new index required: - - view-uuid - - format-version - - location - - current-version-id - - versions - - version-log - - schemas + - name + - type + - table-uuid + - index-column-ids properties: - view-uuid: + name: type: string - format-version: - type: integer - minimum: 1 - maximum: 1 - location: + description: The name for the new index + example: "customer_id_btree_idx" + type: + $ref: '#/components/schemas/IndexType' + table-uuid: type: string - current-version-id: - type: integer - versions: - type: array - items: - $ref: '#/components/schemas/ViewVersion' - version-log: + format: uuid + description: The UUID of the table to index + index-column-ids: type: array + description: IDs of columns to store losslessly in the index items: - $ref: '#/components/schemas/ViewHistoryEntry' - schemas: + type: integer + example: [1, 2] + optimized-column-ids: type: array + description: IDs of columns the index is optimized for retrieval items: - $ref: '#/components/schemas/Schema' + type: integer + example: [1] + location: + type: string + description: Optional base location for index files properties: type: object additionalProperties: type: string + description: Index-specific properties + index-snapshot-id: + type: integer + format: int64 + description: Unique identifier for this index snapshot + table-snapshot-id: + type: integer + format: int64 + description: The table snapshot ID this index snapshot corresponds to + snapshot-properties: + type: object + additionalProperties: + type: string + description: Properties for this index snapshot typically set by the index maintenance process + + RegisterIndexRequest: + type: object + description: Request to register an existing index by metadata file location + required: + - name + - metadata-location + properties: + name: + type: string + description: The name to register the index under + metadata-location: + type: string + description: Location of the index metadata file + + CommitIndexRequest: + type: object + description: Request to commit updates to an index + properties: + requirements: + type: array + description: Requirements that must be met before applying updates + items: + $ref: '#/components/schemas/IndexRequirement' + updates: + type: array + description: Updates to apply to the index + items: + $ref: '#/components/schemas/IndexUpdate' BaseUpdate: discriminator: @@ -4977,6 +5746,29 @@ components: schema: $ref: '#/components/schemas/LoadCredentialsResponse' + ListIndexesResponse: + description: A list of index summaries + content: + application/json: + schema: + type: object + required: + - indexes + properties: + next-page-token: + $ref: '#/components/schemas/PageToken' + indexes: + type: array + items: + $ref: '#/components/schemas/IndexSummary' + + LoadIndexResponse: + description: The loaded index metadata + content: + application/json: + schema: + $ref: '#/components/schemas/LoadIndexResult' + ####################################### # Common examples of different values # ####################################### @@ -5144,6 +5936,26 @@ components: "updates": { "owner": "Raoul" } } + NoSuchIndexError: + summary: The requested index does not exist + value: { + "error": { + "message": "Index does not exist: customer_id_btree_idx in accounting.sales", + "type": "NoSuchIndexException", + "code": 404 + } + } + + IndexAlreadyExistsError: + summary: An index with the same name already exists + value: { + "error": { + "message": "Index already exists: customer_id_btree_idx in accounting.sales", + "type": "AlreadyExistsException", + "code": 409 + } + } + securitySchemes: OAuth2: type: oauth2 diff --git a/site/docs/secondary-indexes.md b/site/docs/secondary-indexes.md new file mode 100644 index 000000000000..9bfa0dd18c38 --- /dev/null +++ b/site/docs/secondary-indexes.md @@ -0,0 +1,415 @@ + +# Iceberg secondary indexes + +# Motivation +Well established database systems like Oracle, MsSQL, Postgres, MySql provide a possibility for the users to create indexes above existing tables. Database indexes are powerful because they significantly reduce query latency by allowing engines to locate data without scanning entire datasets. They enable efficient filtering, point lookups, and range queries, which is critical for large-scale analytics and mixed workloads. For Apache Iceberg, integrating index support would unlock similar benefits: faster query execution, reduced I/O, and shared indexes across engines. Features like inverted indexes for deletes, auxiliary versions for selective access, and vector indexes for similarity search can make Iceberg tables more suitable for real-time analytics and AI workloads, while preserving its strong governance and snapshot-based consistency model. + +# Goals +* Define the generic metadata structure for indexes +* For a few specific index types + * Define data and metadata structure for these indexes + * Defining the maintenance processes for these indexes + * Defining supported usage patterns for indexes + +# Non-Goals +* The mentioned index types are just examples, and far from exhaustive +* The concrete implementation of the indexes could be discussed later in more details + +# Definitions +## Index +A **database index** is a specialized data structure that improves the speed of data retrieval operations on a database table. The index can be computed either synchronously and committed along the DDL/DML changes or asynchronously and updated by an index maintenance process. This proposal focuses on indexes that are maintained asynchronously. +## Index type +Index data structures and algorithms are constantly evolving, so it’s important to define a flexible metadata framework that supports adding new index types as the Iceberg specification evolves. For reference, here is a [collection](https://docs.google.com/spreadsheets/d/14cBdwsOw89ivolHtAw342YNoGmb1-Kri1E80hwWymL0) of structures referred to as indexes by various engines. + +An **index type** defines the algorithm and the underlying data structure that governs the behavior of the index. Together with its configuration parameters, this specification enables both query engines and users to evaluate whether employing the index will provide performance benefits for specific workloads. + +An index type specifies: +* The column types it can optimize for +* The column types it can include +* The available user-defined properties for the index +* The data layout used by the index +## Index instance +An **index instance** is a concrete, parameterized realization of an index type applied to a specific table. It represents an actual index built with defined properties, configuration settings, and scoped for the designated table. + +Users can create index instances by specifying: +* The source table +* The columns to optimize +* The columns to include +* The user-defined properties for the index +Users can create multiple instances of the same index type, each configured with different properties. +## Index snapshot +An **index snapshot** is a version of the index data that corresponds to a specific snapshot of the table. When table data changes, the index maintenance process periodically generates new index snapshots, ensuring that queries against the latest table snapshot can leverage the updated index. + +The maintenance process may also optimize the index layout and produce additional properties for each snapshot to enhance the index’s effectiveness. + +# Layout +We evaluated the pros and cons of storing index metadata in dedicated index metadata files versus embedding it in the table metadata file. + +Advantages of separate index metadata files: +* Prevents the table metadata file from growing in size. +* Aligns with common engine practices, where indexes are maintained in separate structures. +* Reduces coupling between indexes and tables, allowing for greater flexibility. + +Advantages of storing base index metadata in the table metadata file: +* During query planning, the engine needs quick access to: + * Available indexes + * Index freshness +* If this metadata is embedded in the table metadata, only the indexes actually used need to be read. Otherwise, multiple additional file reads would be required to retrieve index metadata during planning. + +To stay aligned with current metadata layout practices, we chose to store index metadata in dedicated metadata files. In some cases, retrieving this metadata may be too costly. When that happens, the catalog can use caching to serve index metadata more efficiently, or we may later decide to duplicate selected portions of it into the table metadata file to speed up access for query optimizers. + +# Usage +## SQL +### Listing +It should be possible to list the available indexes for a table through SQL using a metadata tables like: + +```sql +SELECT * FROM persons.indexes; +``` + +### Manipulation +We need to provide an API to create drop and update indexes through SQL: +* We could prepare for SQL commands for creation like this: +```sql +CREATE INDEX nat_index ON persons USING BTREE([nationality], [first_name, last_name]); +``` + or in Spark +```sql +CALL create_index( + table => "persons", + name => "nat_index" + type => BTREE, + optimized-columns => ARRAY(nationality), + index-columns => ARRAY(first_name, last_name)); +``` +* We could prepare for SQL commands for dropping an index like this: +```sql +DROP INDEX persons.nat_index; +``` + or in Spark +```sql +CALL drop_index( + table => "persons", + name => "nat_index"); +``` +* We could prepare for SQL commands for altering an index like this: +```sql +ALTER INDEX persons.ivf_index SET quantizer = new_quantizer; +``` + or in Spark +```sql +CALL alter_index( + table => "persons", + name => "nat_index", + properties => MAP("quantizer", "new_quantizer")); +``` + +## Catalog +The indexes will be stored and accessible through the Catalog. + +An index instance is uniquely identified by its **IndexIdentifier**, which is constructed by combining the **TableIdentifier** with the index name. This ensures that index names are scoped to their respective tables. + +**Example**: + +For a table *persons* in the *company* database with an index named *nationality\_index*, the resulting **IndexIdentifier** would be: + +*company.persons.nationality\_index* + +This format guarantees uniqueness across tables and databases. +### Java API +We need to provide a *listIndexes* functionality which enables query optimizers to discover the indexes available for a given table. The returned list must already be filtered to include only index types supported by the engine. Each returned *BaseIndex* entry must provide all information required for the optimizer to decide whether the index is applicable to a query or should be skipped. + +The **BaseIndex** fields are: + +| Type | Name | Requirement | Description | +| :---- | :---- | :---- | :---- | +| IndexIdentifier | id | required | The unique identifier for the index instance. | +| IndexType | type | required | The type of the index instance. | +| int\[\] | indexColumnIds | required | The columns which are stored losslessly in the table instance. | +| int\[\] | optimizedColumnIds | required | The index is optimized for retrieval based on these columns. | +| long\[\] | availableTableSnapshots | required | The index has valid snapshots corresponding to these source table snapshots. | +We also require methods to load, create, update, and delete indexes. Each of these methods must return the complete set of index metadata, encapsulated in a DetailedIndex object. + +The **DetailedIndex** is an extension of the BaseIndex and the extra fields are: + +| Type | Name | Requirement | Description | +| :---- | :---- | :---- | :---- | +| String | location | required | Index’s base location; used to create index file locations. | +| Long | currentVersionId | required | ID of the current version of the index (version-id). | +| List\ | versions | required | The columns which are stored losslessly in the table instance. | +| List\ | versionLog | required | A list of known versions of the index, the number of versions retained is implementation-specific. current-version-id must be present in this list. | +| List\ | snapshots | required | During the index maintenance a new index snapshot is generated for the specific Table snapshot and it is added to the snapshots list. | +All-in-all, we need the following new Catalog methods: + +| Return type | Name | Parameters | Description | +| :---- | :---- | :---- | :---- | +| List\ | listIndexes | TableIdentifier, IndexType\[\] | Returns a list of index instances for the specified table, filtered to include only those whose type matches one of the provided types. | +| DetailedIndex | createIndex | TableIdentifier, String name, String location, IndexType, int\[\] indexColumnIds, int\[\] optimizedColumnIds, Map userProperties | Creates an index instance with the given parameters. | +| DetailedIndex | loadIndex | IndexIdentifier | Loads the details of a specific index instance. | +| DetailedIndex | updateIndex | IndexIdentifier, IndexUpdateData | Updates a given index instance. The IndexUpdateData is either: VersionUpdateData, when the user updates the index properties. This contains a single userProperties Map SnapshotUpdateData, when the index maintenance process. This contains: Map snapshotProperties Long tableSnapshotId Long indexSnapshotId | +| void | dropIndex | IndexIdentifier | Drops a specific index instance | +| boolean | indexExists | IndexIdentifier | Checks if an index instance exists with the given id. | + +## Data +The index type must explicitly define all available properties \- both user-defined and maintenance-service-defined \- as well as the contents of the index location. If certain parameters cannot be represented within the properties map, the index may specify custom Puffin file layouts to store these values. In such cases, the properties map should include references to the corresponding data, such as file names and byte offsets within the Puffin file. + +## In query +There are 2 main use-cases for indexes depending on the content of the index: +* **Covering index**: When all of the queried data is contained in the index then it is enough to query the index to return the query results +* **Skipping index**: The skipping index could be used to collect the rowIds which need to be read, and then the engine needs to read the original table to read the actual rows not contained in the index itself. + +# Index Metadata +Every available index should be listed in the catalog. The list of the indexes could be used by the engines and the query planner to decide if an index could be useful during the query execution/scan. + +The following metadata should be stored per index in the index metadata json: + +| Field name | Field Type | Requirement | Description | +|:-------------------|:-----------------------|:------------|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| index-uuid | String | required | A UUID that identifies the index, generated when the index is created. Implementations must throw an exception if an index's UUID does not match the expected UUID after refreshing metadata | +| format-version | Integer | required | An integer version number for the index metadata format; format-version is 1 for current version of spec. | +| type | String | required | One of the supported index-types. For example: BTREE, TERM, IVF. Must be supplied during the creation of an index and must not be changed. | +| index-columns | List of Integer | required | The ids of the columns contained by the index. | +| optimized-columns | List of Integer | required | The ids of the columns that the index is designed to optimize for retrieval. | +| location | String | required | index’s base location; used to create index file locations. | +| current-version-id | Integer | required | ID of the current version of the index (version-id). | +| versions | List of \ | required | A list of known versions of the index, the number of versions retained is implementation-specific. current-version-id must be present in this list. | +| version-log | List of \ | optional | A list of version log entries with the timestamp and version-id for every change to current-version-id. The number of entries retained is implementation-specific. current-version-id may or may not be present in this list. | +| snapshots | List of \ | optional | During the index maintenance a new index snapshot is generated for the specific Table snapshot and it is added to the snapshots list. | +## Version +Each Version in versions is a struct with the following fields: + +| Field name | Field type | Requirement | Description | +| :---- | :---- | :---- | :---- | +| version-id | Integer | required | ID for the version | +| timestamp-ms | Long | required | Timestamp when the version was created (ms since epoch) | +| properties | Map | optional | A map of index properties, represented as string-to-string pairs, supplied by the user. | +## Version log +The version log tracks changes to the index's current version. This is the index's history and allows reconstructing what version of the index would have been used at some point in time. + +Note that this is not the version's creation time, which is stored in each version's metadata. A version can appear multiple times in the version log, indicating that the index definition was rolled back. + +Each entry in version-log is a struct with the following fields: + +| Field name | Type | Requirement | Description | +| :---- | :---- | :---- | :---- | +| timestamp-ms | Long | required | Timestamp when the index’s current-version-id was updated (ms from epoch) | +| version-id | String | required | ID that current-version-id was set to | +## Snapshot +Index data is versioned using snapshots, similar to table data. Each index snapshot is derived from a specific table snapshot, ensuring consistency. When an engine queries a table snapshot, it must determine whether a corresponding index snapshot exists and, if so, determine which properties should be applied for index evaluation. + +This relationship is maintained in the table’s metadata file through an index-snapshot list. This list is updated whenever an index maintenance process creates a new snapshot for the index and links it to the corresponding base table snapshot, or when the index maintenance process decides to expire index snapshots. + +| Field name | Type | Requirement | Description | +| :---- | :---- | :---- | :---- | +| table-snapshot-id | Long | required | The table snapshot id which is the base of the index version | +| index-snapshot-id | Long | required | The index snapshot id | +| version-id | Long | required | The index version id when the snapshot was created. | +| properties | Map | optional | A map of index properties, represented as string-to-string pairs, supplied by the Index Maintenance process. | +# Proposed implementation +To reduce complexity, we propose focusing on a few examples: +* **B-Tree index**: For tables stored in blobstores, the current sorted and partitioned tables are a prime example for B-Tree indexes. +* **Full text index:** A full-text index can be implemented as a B-Tree, where terms serve as keys and the associated values are lists of pointers to the target rows. Considering how Parquet handles lists, an alternative approach is to store term–*\_file*–*\_pos* triplets directly. +* **IVF-PQ index:** IVF-PQ is essentially a PK-Quantized value table which is partitioned based on the nearest centroid. + +These examples illustrate that several key index types can be implemented as standard [Iceberg Materialized Views](https://docs.google.com/document/d/1UnhldHhe3Grz8JBngwXPA6ZZord1xMedY5ukEhZYF-A), enabling engines to reuse existing components for reading, writing, and maintaining indexes. In such cases, the index’s data content is simply a storage table of a materialized view. The column identifiers in the materialized view schema should match those in the original table schema to allow query engines to seamlessly substitute the original table’s data files with the materialized view’s data files and execute queries against the view immediately. If they do not match, a name‑mapping process must be applied. +## Example +If we have a table defined as (pseudo code): +```sql +CREATE TABLE persons ( + person_id int, + salary int, + last_name string, + first_name string, + resume string, + nationality string +) ORDER BY person_id ASC; +``` +This table is optimized to return persons when searched by person\_id, but would be very inefficient to find persons based on nationality. +### B-Tree index +When the user creates a B-Tree index, like: +```sql +CREATE INDEX nat_index + ON persons + USING BTREE([nationality], [person_id, last_name, first_name]); +``` +We could create a B-Tree index by creating a materialized above the original table for enum columns like: +```sql +CREATE MATERIALIZED VIEW nat_index AS + SELECT person_id, last_name, first_name, nationality + FROM persons + PARTITIONED BY bucket(nationality, bucket_num) + ORDER BY nationality ASC; +``` +We could create a B-Tree index by creating a materialized above the original table for columns containing high cardinality and contain orderable data, such as numeric or date fields: +```sql +CREATE MATERIALIZED VIEW salary_index AS + SELECT person_id, last_name, first_name, nationality + FROM persons + PARTITIONED BY truncate(salary, truncate_width) + ORDER BY salary ASC; +``` +The resulting view should include all optimized columns and index columns. The index creation and maintenance logic can apply partitioning and ordering to produce an optimal layout, ensuring the view returns efficient results when queried by the optimized columns. +### Full text index +When the user creates a B-TREE index, like: +```sql +CREATE INDEX term_index ON persons USING TERM(resume, [_file, _pos]); +``` +If a UDF named *get\_terms* is available to extract terms from resumes and returns *row\_id–term* pairs, we can create a Term index by defining a materialized view on top of the original table as follows: +```sql +CREATE MATERIALIZED VIEW term_index AS + SELECT terms.term, persons._file, persons._pos + FROM persons, TABLE(get_terms(persons.row_id, persons.resume)) terms + WHERE persons.row_id = terms.row_id + PARTITIONED BY truncate(term, truncate_size) + ORDER BY term ASC; +``` +```sql +CREATE MATERIALIZED VIEW term_index AS + SELECT terms.term, persons._file, persons._pos + FROM persons + LATERAL VIEW get_terms(persons.resume) AS terms + PARTITIONED BY truncate(term, truncate_size) + ORDER BY term ASC; +``` +The resulting view should include the terms, along with the *\_file* and *\_pos* columns for rows containing those terms. The index creation and maintenance logic can apply partitioning and ordering to produce an optimal layout, ensuring the view returns efficiently positions of records where a person's résumé contains a given term. +### IVF-PQ index +When the user creates a IVF index, like: +```sql +CREATE INDEX ivf_index ON persons USING IVF(resume, embedding, quantizer, [_file, _pos]); +``` +If we have: +* An UDF for getting the embedding for the resume called *embedding*, and +* An UDF for quantizing the vector called *quantizer*, and +* An UDF for getting the nearest centroids for a vector called *centroid*, +then we could create a Vector index by creating a materialized view above the original table table like: +```sql +CREATE MATERIALIZED VIEW ivf_index AS + SELECT quantizer(embedding(resume)), _file, _pos + FROM persons + PARTITIONED BY centroid(embedding(resume)); +``` +The resulting view should include the quantized vector for the résumé along with the *\_file* and *\_pos* columns for each row. The index creation and maintenance logic can apply partitioning and ordering to produce an optimal layout, ensuring the view efficiently returns the positions of records where a person's résumé is most similar to the one being searched. +## Usage +### Index properties +Certain indexes require properties provided by the user during index creation or generated by table maintenance operations. These properties are defined by the index type and stored in the index version or index snapshot metadata—for example, quantizer settings for vector indexes. For example, the quantizer properties are kept in a separate Puffin file and referenced in the snapshot properties map. +### Covering index +When an index contains all the columns referenced in a query, the Iceberg planner can substitute the original table with the corresponding materialized view during scan planning. +If there is an index that: +* Has a matching materialized view snapshot for the table snapshot referenced by the query, +* Includes all required columns, +* And has its ordering columns aligned with those referenced in the filter expression and/or the order by columns, +then the materialized view should serve as the base for the query instead of the original table. +### Skipping index +For skipping indexes, the engine may need to split the query into two phases: +* Scan the relevant index to retrieve row identifiers (filename and pos, or rowId, or primary key columns). +* Use these results to query the original table and fetch the actual rows. +The optimizer should choose this approach when: +* The query is expected to return only a small number of rows, and +* There is an index that: + * Has a matching materialized view snapshot for the table snapshot referenced by the query, + * Includes the most selective filtering columns, + * Contains columns which identify a single row + * filename and position columns, or + * rowId column, or + * the primary key columns for the table + * Has ordering columns aligned with those used in the filter expression. +In such cases, the engine should first query the index, then use the returned filenames and positions to retrieve the final results from the original table. +### Example queries +#### Example table +```sql +CREATE TABLE persons ( + person_id int, + salary int, + last_name string, + first_name string, + resume string, + nationality string +) ORDER BY person_id ASC; +``` +#### Initial index definitions +```sql +CREATE INDEX nat_index + ON persons + USING BTREE([nationality], [person_id, last_name, first_name]); + +CREATE INDEX salary_index + ON persons + USING BTREE([salary], [last_name, first_name]); + +CREATE INDEX term_index ON persons USING TERM(resume, [_file, _pos]); + +CREATE INDEX ivf_index ON persons USING IVF(resume, embedding, quantizer, [_file, _pos]); +``` +#### Query behavior +Covering query: +* Query: `SELECT first_name, last_name WHERE nationality='HU'` +* Index Usage: *nat\_index* can act as a **covering index**, allowing the engine to query only the materialized view. + +Range query: +* Query: `SELECT first_name, last_name WHERE salary > 100000` +* Index Usage: *salary\_index* can act as a **covering index**, allowing the engine to query only the materialized view. + +Skipping query: +* Query: `SELECT first_name, last_name, resume WHERE nationality='HU'` +* Index Usage: *nat\_index* works as a **skipping index**. The engine retrieves the relevant identifier list (*person\_id* in this case) from the index, then performs an additional step to fetch rows from the base table. + +Term query: +* Query: `SELECT first_name, last_name WHERE MATCH(resume, 'Iceberg')` +* Index Usage: *term\_index* works as a **skipping index**, returning *\_file* and *\_pos* references, followed by a lookup in the original table. + +IVF query: +* Query: `SELECT first_name, last_name WHERE IVF_SEARCH(resume, embedding('Ryan Blue's resume'), TOP_K => 10)` +* Index Usage: *ivf\_index* works as a **skipping index**, returning *\_file* and *\_pos* references, followed by a lookup in the original table. + +#### Improving Performance +If users want faster results for two-stage queries and accept the extra storage and maintenance cost, they can create specialized indexes that store additional columns: +```sql +CREATE INDEX nat_index + ON persons + USING BTREE([nationality], [last_name, first_name, resume]); + +CREATE INDEX term_index ON persons USING TERM(persons, [last_name, first_name]); + +CREATE INDEX ivf_index + ON persons + USING IVF(resume, embedding, quantizer, [last_name, first_name]); +``` +With these enhanced indexes, all the queries above can be resolved by querying only the indexes, eliminating the need for additional lookups. +## Index Maintenance +### View Maintenance +Index maintenance would happen asynchronously, and the Materialized view maintenance procedures could be used. Index metadata contains information about the freshness of the index, and the engines/planners could decide if the index could be used, or the query should be issued against the original table. +### Index optimization +Index internal parameters could be optimized by the maintenance process for better performance, like partitioning logic, or quantizer parameters. +### Snapshot expiration +Unused index snapshots should be removed. This process can be triggered when the number of snapshots for an index exceeds the configured limit, or when the corresponding table snapshot has been deleted and the index snapshot is no longer in use. +## Missing building blocks +Many of the building blocks for the proposed solution is already available, or the specification is in progress: + +| Block | Component | Status | | +| :---- | :---- | :---- | :---- | +| **Spec changes for indexes** | Specification | New | [https://docs.google.com/document/d/1N6a2IOzC6Qsqv7NBqHKesees4N6WF49YUSIX2FrF7S0](https://docs.google.com/document/d/1N6a2IOzC6Qsqv7NBqHKesees4N6WF49YUSIX2FrF7S0) | +| **Catalog changes and example JDBC implementation** | Catalog implementation | New | | +| **REST Catalog changes** | Catalog implementation | New | | +| **Materialized views** | Index implementation | ⚠️- in progress | [https://docs.google.com/document/d/1UnhldHhe3Grz8JBngwXPA6ZZord1xMedY5ukEhZYF-A](https://docs.google.com/document/d/1UnhldHhe3Grz8JBngwXPA6ZZord1xMedY5ukEhZYF-A) | +| **View switching for covering indexes** | Index implementation | New | | +| **UDF specification** | Term index | ⚠️- in progress | [https://docs.google.com/document/d/1BDvOfhrH0ZQiQv9eLBqeAu8k8Vjfmeql9VzIiW1F0vc](https://docs.google.com/document/d/1BDvOfhrH0ZQiQv9eLBqeAu8k8Vjfmeql9VzIiW1F0vc) | +| **Rust/Java UDF** | IVF index \- Quantizer | New \- as a workaround, we can register the UDF in the engine using the provided JARs and invoke it from SQL. | | +| **UDF partitioning** | IVF index \- Centroid | New \- as a workaround, it could be an extra column | | +| **Query optimizer changes** | Skipping index implementation | New | | +| **Index maintenance** | Index manitenance | ⚠️- same as for materialized views | | +| **Index optimization** | Index manitenance | New | |