|
33 | 33 | import io.trino.plugin.hive.HiveCompressionCodec;
|
34 | 34 | import io.trino.plugin.hive.NodeVersion;
|
35 | 35 | import io.trino.plugin.hive.orc.OrcWriterConfig;
|
| 36 | +import io.trino.plugin.iceberg.delete.DeletionVectorWriter; |
36 | 37 | import io.trino.plugin.iceberg.fileio.ForwardingOutputFile;
|
37 | 38 | import io.trino.spi.TrinoException;
|
38 | 39 | import io.trino.spi.connector.ConnectorSession;
|
39 | 40 | import io.trino.spi.type.Type;
|
40 | 41 | import io.trino.spi.type.TypeManager;
|
| 42 | +import org.apache.iceberg.FileFormat; |
41 | 43 | import org.apache.iceberg.MetricsConfig;
|
| 44 | +import org.apache.iceberg.PartitionSpec; |
42 | 45 | import org.apache.iceberg.Schema;
|
| 46 | +import org.apache.iceberg.deletes.PositionDeleteIndex; |
43 | 47 | import org.apache.iceberg.types.Types;
|
| 48 | +import org.apache.iceberg.util.DeleteFileSet; |
44 | 49 | import org.weakref.jmx.Managed;
|
45 | 50 |
|
46 | 51 | import java.io.Closeable;
|
47 | 52 | import java.io.IOException;
|
48 | 53 | import java.util.List;
|
49 | 54 | import java.util.Map;
|
50 | 55 | import java.util.Optional;
|
| 56 | +import java.util.function.Function; |
51 | 57 | import java.util.function.Supplier;
|
52 | 58 | import java.util.stream.IntStream;
|
53 | 59 |
|
54 | 60 | import static com.google.common.base.Preconditions.checkArgument;
|
| 61 | +import static com.google.common.base.Preconditions.checkState; |
55 | 62 | import static com.google.common.collect.ImmutableList.toImmutableList;
|
56 | 63 | import static io.trino.plugin.hive.HiveCompressionCodecs.toCompressionCodec;
|
57 | 64 | import static io.trino.plugin.hive.HiveMetadata.TRINO_QUERY_ID_NAME;
|
|
82 | 89 | import static io.trino.spi.StandardErrorCode.NOT_SUPPORTED;
|
83 | 90 | import static java.lang.String.format;
|
84 | 91 | import static java.util.Objects.requireNonNull;
|
| 92 | +import static org.apache.iceberg.MetadataColumns.DELETE_FILE_POS; |
85 | 93 | import static org.apache.iceberg.TableProperties.DEFAULT_WRITE_METRICS_MODE;
|
86 | 94 | import static org.apache.iceberg.io.DeleteSchemaUtil.pathPosSchema;
|
87 | 95 | import static org.apache.iceberg.parquet.ParquetSchemaUtil.convert;
|
@@ -139,16 +147,37 @@ public IcebergFileWriter createPositionDeleteWriter(
|
139 | 147 | TrinoFileSystem fileSystem,
|
140 | 148 | Location outputPath,
|
141 | 149 | ConnectorSession session,
|
142 |
| - IcebergFileFormat fileFormat, |
143 |
| - Map<String, String> storageProperties) |
| 150 | + String dataFilePath, |
| 151 | + FileFormat fileFormat, |
| 152 | + PartitionSpec partitionSpec, |
| 153 | + Optional<PartitionData> partition, |
| 154 | + Map<String, String> storageProperties, |
| 155 | + Map<String, DeleteFileSet> previousDeleteFiles) |
144 | 156 | {
|
145 | 157 | return switch (fileFormat) {
|
| 158 | + case PUFFIN -> createDeletionVectorWriter(nodeVersion, fileSystem, outputPath, dataFilePath, partitionSpec, partition, previousDeleteFiles); |
146 | 159 | case PARQUET -> createParquetWriter(FULL_METRICS_CONFIG, fileSystem, outputPath, POSITION_DELETE_SCHEMA, session, storageProperties);
|
147 | 160 | case ORC -> createOrcWriter(FULL_METRICS_CONFIG, fileSystem, outputPath, POSITION_DELETE_SCHEMA, session, storageProperties, DataSize.ofBytes(Integer.MAX_VALUE));
|
148 | 161 | case AVRO -> createAvroWriter(fileSystem, outputPath, POSITION_DELETE_SCHEMA, session);
|
| 162 | + case METADATA -> throw new IllegalArgumentException("Unexpected METADATA file format"); |
149 | 163 | };
|
150 | 164 | }
|
151 | 165 |
|
| 166 | + private static DeletionVectorWriter createDeletionVectorWriter( |
| 167 | + NodeVersion nodeVersion, |
| 168 | + TrinoFileSystem fileSystem, |
| 169 | + Location outputPath, |
| 170 | + String dataFilePath, |
| 171 | + PartitionSpec partitionSpec, |
| 172 | + Optional<PartitionData> partition, |
| 173 | + Map<String, DeleteFileSet> previousDeleteFiles) |
| 174 | + { |
| 175 | + Function<CharSequence, PositionDeleteIndex> previousDeleteLoader = DeletionVectorWriter.create(fileSystem, previousDeleteFiles); |
| 176 | + int positionChannel = POSITION_DELETE_SCHEMA.columns().indexOf(DELETE_FILE_POS); |
| 177 | + checkState(positionChannel != -1, "positionChannel not found"); |
| 178 | + return new DeletionVectorWriter(nodeVersion, fileSystem, outputPath, dataFilePath, partitionSpec, partition, previousDeleteLoader::apply, positionChannel); |
| 179 | + } |
| 180 | + |
152 | 181 | private IcebergFileWriter createParquetWriter(
|
153 | 182 | MetricsConfig metricsConfig,
|
154 | 183 | TrinoFileSystem fileSystem,
|
@@ -234,6 +263,7 @@ private IcebergFileWriter createOrcWriter(
|
234 | 263 | }
|
235 | 264 |
|
236 | 265 | return new IcebergOrcFileWriter(
|
| 266 | + outputPath, |
237 | 267 | metricsConfig,
|
238 | 268 | icebergSchema,
|
239 | 269 | orcDataSink,
|
|
0 commit comments