Skip to content

Commit 69f00c5

Browse files
Maintain partition schema even after altering table schema
1 parent 9c26e5f commit 69f00c5

File tree

7 files changed

+61
-15
lines changed

7 files changed

+61
-15
lines changed

presto-hive/src/main/java/io/prestosql/plugin/hive/HivePageSource.java

+6-1
Original file line numberDiff line numberDiff line change
@@ -126,7 +126,12 @@ public HivePageSource(
126126
}
127127

128128
if (columnMapping.getKind() == PREFILLED) {
129-
prefilledValues[columnIndex] = typedPartitionKey(columnMapping.getPrefilledValue(), type, name);
129+
if (columnMapping.getPrefilledValue() == null) {
130+
prefilledValues[columnIndex] = null;
131+
}
132+
else {
133+
prefilledValues[columnIndex] = typedPartitionKey(columnMapping.getPrefilledValue(), type, name);
134+
}
130135
}
131136
}
132137
this.coercers = coercers.build();

presto-hive/src/main/java/io/prestosql/plugin/hive/HivePageSourceProvider.java

+38-8
Original file line numberDiff line numberDiff line change
@@ -50,12 +50,15 @@
5050
import io.prestosql.spi.type.TypeManager;
5151
import org.apache.hadoop.conf.Configuration;
5252
import org.apache.hadoop.fs.Path;
53+
import org.apache.hadoop.hive.serde.serdeConstants;
54+
import org.apache.hadoop.hive.serde2.SerDeUtils;
5355
import org.eclipse.jetty.util.URIUtil;
5456

5557
import javax.inject.Inject;
5658

5759
import java.net.URI;
5860
import java.util.ArrayList;
61+
import java.util.Arrays;
5962
import java.util.HashSet;
6063
import java.util.List;
6164
import java.util.Map;
@@ -75,8 +78,10 @@
7578
import static io.prestosql.plugin.hive.HivePageSourceProvider.ColumnMapping.toColumnHandles;
7679
import static io.prestosql.plugin.hive.HiveUtil.isPartitionFiltered;
7780
import static io.prestosql.plugin.hive.coercions.HiveCoercer.createCoercer;
81+
import static io.prestosql.plugin.hive.metastore.MetastoreUtil.META_PARTITION_COLUMNS;
7882
import static java.util.Objects.requireNonNull;
7983
import static java.util.stream.Collectors.toList;
84+
import static org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_COLUMNS;
8085

8186
public class HivePageSourceProvider
8287
implements ConnectorPageSourceProvider
@@ -170,6 +175,24 @@ private ConnectorPageSource createPageSourceInternal(ConnectorSession session,
170175
Configuration configuration = hdfsEnvironment.getConfiguration(
171176
new HdfsEnvironment.HdfsContext(session, hiveSplit.getDatabase(), hiveSplit.getTable()), path);
172177

178+
Properties schema = hiveSplit.getSchema();
179+
String columnNameDelimiter = schema.containsKey(serdeConstants.COLUMN_NAME_DELIMITER) ? schema
180+
.getProperty(serdeConstants.COLUMN_NAME_DELIMITER) : String.valueOf(SerDeUtils.COMMA);
181+
List<String> partitionColumnNames;
182+
if (schema.containsKey(META_PARTITION_COLUMNS)) {
183+
partitionColumnNames = Arrays.asList(schema.getProperty(META_PARTITION_COLUMNS).split(columnNameDelimiter));
184+
}
185+
else if (schema.containsKey(META_TABLE_COLUMNS)) {
186+
partitionColumnNames = Arrays.asList(schema.getProperty(META_TABLE_COLUMNS).split(columnNameDelimiter));
187+
}
188+
else {
189+
partitionColumnNames = new ArrayList<>();
190+
}
191+
192+
List<String> tableColumns = hiveColumns.stream().map(cols -> cols.getName()).collect(toList());
193+
194+
List<String> missingColumns = tableColumns.stream().filter(cols -> !partitionColumnNames.contains(cols)).collect(toList());
195+
173196
List<IndexMetadata> indexes = new ArrayList<>();
174197
if (indexCache != null && session.isHeuristicIndexFilterEnabled()) {
175198
indexes.addAll(this.indexCache.getIndices(session
@@ -221,7 +244,7 @@ session, hiveSplit, assignUniqueIndicesToPartitionColumns(hiveColumns), typeMana
221244
hiveTable.getDisjunctCompactEffectivePredicate(),
222245
hiveSplit.getBucketConversion(),
223246
hiveSplit.getBucketNumber(),
224-
hiveSplit.getLastModifiedTime());
247+
hiveSplit.getLastModifiedTime(), missingColumns);
225248
}
226249

227250
Optional<ConnectorPageSource> pageSource = createHivePageSource(
@@ -249,7 +272,7 @@ session, hiveSplit, assignUniqueIndicesToPartitionColumns(hiveColumns), typeMana
249272
splitMetadata,
250273
hiveSplit.isCacheable(),
251274
hiveSplit.getLastModifiedTime(),
252-
hiveSplit.getCustomSplitInfo());
275+
hiveSplit.getCustomSplitInfo(), missingColumns);
253276
if (pageSource.isPresent()) {
254277
return pageSource.get();
255278
}
@@ -291,6 +314,7 @@ private static List<HiveColumnHandle> assignUniqueIndicesToPartitionColumns(List
291314
* @param predicateColumns Map of all columns handles being part of predicate
292315
* @param additionPredicates Predicates related to OR clause.
293316
* Remaining columns are same as for createHivePageSource.
317+
* @param missingColumns
294318
* @return
295319
*/
296320
private static ConnectorPageSource createSelectivePageSource(
@@ -310,7 +334,7 @@ private static ConnectorPageSource createSelectivePageSource(
310334
Optional<List<TupleDomain<HiveColumnHandle>>> additionPredicates,
311335
Optional<HiveSplit.BucketConversion> bucketConversion,
312336
OptionalInt bucketNumber,
313-
long dataSourceLastModifiedTime)
337+
long dataSourceLastModifiedTime, List<String> missingColumns)
314338
{
315339
Set<HiveColumnHandle> interimColumns = ImmutableSet.<HiveColumnHandle>builder()
316340
.addAll(predicateColumns.values())
@@ -325,7 +349,7 @@ private static ConnectorPageSource createSelectivePageSource(
325349
split.getColumnCoercions(),
326350
path,
327351
bucketNumber,
328-
true);
352+
true, missingColumns);
329353

330354
List<ColumnMapping> regularAndInterimColumnMappings = ColumnMapping.extractRegularAndInterimColumnMappings(
331355
columnMappings);
@@ -411,7 +435,7 @@ public static Optional<ConnectorPageSource> createHivePageSource(
411435
SplitMetadata splitMetadata,
412436
boolean splitCacheable,
413437
long dataSourceLastModifiedTime,
414-
Map<String, String> customSplitInfo)
438+
Map<String, String> customSplitInfo, List<String> missingColumns)
415439
{
416440
List<ColumnMapping> columnMappings = ColumnMapping.buildColumnMappings(
417441
partitionKeys,
@@ -420,7 +444,7 @@ public static Optional<ConnectorPageSource> createHivePageSource(
420444
columnCoercions,
421445
path,
422446
bucketNumber,
423-
true);
447+
true, missingColumns);
424448
List<ColumnMapping> regularAndInterimColumnMappings = ColumnMapping.extractRegularAndInterimColumnMappings(
425449
columnMappings);
426450

@@ -603,7 +627,7 @@ public ColumnMappingKind getKind()
603627
public String getPrefilledValue()
604628
{
605629
checkState(kind == ColumnMappingKind.PREFILLED);
606-
return prefilledValue.get();
630+
return prefilledValue.isPresent() ? prefilledValue.get() : HIVE_DEFAULT_PARTITION_VALUE;
607631
}
608632

609633
public HiveColumnHandle getHiveColumnHandle()
@@ -628,6 +652,7 @@ public Optional<HiveType> getCoercionFrom()
628652
* @param requiredInterimColumns columns that are needed for processing, but shouldn't be returned to engine (may overlaps with columns)
629653
* @param columnCoercions map from hive column index to hive type
630654
* @param bucketNumber empty if table is not bucketed, a number within [0, # bucket in table) otherwise
655+
* @param missingColumns
631656
*/
632657
public static List<ColumnMapping> buildColumnMappings(
633658
List<HivePartitionKey> partitionKeys,
@@ -636,7 +661,7 @@ public static List<ColumnMapping> buildColumnMappings(
636661
Map<Integer, HiveType> columnCoercions,
637662
Path path,
638663
OptionalInt bucketNumber,
639-
boolean filterPushDown)
664+
boolean filterPushDown, List<String> missingColumns)
640665
{
641666
Map<String, HivePartitionKey> partitionKeysByName = uniqueIndex(partitionKeys, HivePartitionKey::getName);
642667
int regularIndex = 0;
@@ -645,6 +670,11 @@ public static List<ColumnMapping> buildColumnMappings(
645670
for (HiveColumnHandle column : columns) {
646671
Optional<HiveType> coercionFrom = Optional.ofNullable(columnCoercions.get(column.getHiveColumnIndex()));
647672
if (column.getColumnType() == REGULAR) {
673+
if (missingColumns.contains(column.getColumnName())) {
674+
columnMappings.add(new ColumnMapping(ColumnMappingKind.PREFILLED, column, Optional.empty(),
675+
OptionalInt.empty(), coercionFrom));
676+
continue;
677+
}
648678
checkArgument(regularColumnIndices.add(column.getHiveColumnIndex()), "duplicate hiveColumnIndex in columns list");
649679

650680
columnMappings.add(regular(column, regularIndex, coercionFrom));

presto-hive/src/main/java/io/prestosql/plugin/hive/metastore/MetastoreUtil.java

+9-1
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
import java.util.Objects;
3030
import java.util.Optional;
3131
import java.util.Properties;
32+
import java.util.stream.Collectors;
3233

3334
import static com.google.common.base.Preconditions.checkArgument;
3435
import static com.google.common.base.Strings.isNullOrEmpty;
@@ -53,6 +54,9 @@
5354

5455
public class MetastoreUtil
5556
{
57+
public static final String META_PARTITION_COLUMNS = "partition_metadata";
58+
public static final String COLUMN_COMMENTS = "columns.comments";
59+
5660
private MetastoreUtil()
5761
{
5862
}
@@ -132,10 +136,14 @@ private static Properties getHiveSchema(
132136
first = false;
133137
}
134138
String columnNames = columnNameBuilder.toString();
139+
String partitionColumnNames = dataColumns.stream()
140+
.map(Column::getName)
141+
.collect(Collectors.joining(","));
135142
String columnTypes = columnTypeBuilder.toString();
136143
schema.setProperty(META_TABLE_COLUMNS, columnNames);
144+
schema.setProperty(META_PARTITION_COLUMNS, partitionColumnNames);
137145
schema.setProperty(META_TABLE_COLUMN_TYPES, columnTypes);
138-
schema.setProperty("columns.comments", columnCommentBuilder.toString());
146+
schema.setProperty(COLUMN_COMMENTS, columnCommentBuilder.toString());
139147

140148
schema.setProperty(SERIALIZATION_DDL, toThriftDdl(tableName, dataColumns));
141149

presto-hive/src/test/java/io/prestosql/plugin/hive/TestHiveFileFormats.java

+2-2
Original file line numberDiff line numberDiff line change
@@ -605,7 +605,7 @@ private void testCursorProvider(HiveRecordCursorProvider cursorProvider,
605605
null,
606606
false,
607607
-1L,
608-
ImmutableMap.of());
608+
ImmutableMap.of(), ImmutableList.of());
609609

610610
RecordCursor cursor = ((RecordPageSource) pageSource.get()).getCursor();
611611

@@ -658,7 +658,7 @@ private void testPageSourceFactory(HivePageSourceFactory sourceFactory,
658658
null,
659659
false,
660660
-1L,
661-
ImmutableMap.of());
661+
ImmutableMap.of(), ImmutableList.of());
662662

663663
assertTrue(pageSource.isPresent());
664664

presto-hive/src/test/java/io/prestosql/plugin/hive/TestHiveIntegrationSmokeTest.java

+2-2
Original file line numberDiff line numberDiff line change
@@ -3150,10 +3150,10 @@ public void testRenameColumn()
31503150

31513151
assertUpdate(createTable, "SELECT count(*) FROM orders");
31523152
assertUpdate("ALTER TABLE test_rename_column RENAME COLUMN orderkey TO new_orderkey");
3153-
assertQuery("SELECT new_orderkey, orderstatus FROM test_rename_column", "SELECT orderkey, orderstatus FROM orders");
3153+
assertQuery("SELECT new_orderkey, orderstatus FROM test_rename_column", "SELECT NULL, orderstatus FROM orders where orderstatus != 'dfd'");
31543154
assertQueryFails("ALTER TABLE test_rename_column RENAME COLUMN \"$path\" TO test", ".* Cannot rename hidden column");
31553155
assertQueryFails("ALTER TABLE test_rename_column RENAME COLUMN orderstatus TO new_orderstatus", "Renaming partition columns is not supported");
3156-
assertQuery("SELECT new_orderkey, orderstatus FROM test_rename_column", "SELECT orderkey, orderstatus FROM orders");
3156+
assertQuery("SELECT new_orderkey, orderstatus FROM test_rename_column", "SELECT NULL, orderstatus FROM orders");
31573157
assertUpdate("DROP TABLE test_rename_column");
31583158
}
31593159

presto-hive/src/test/java/io/prestosql/plugin/hive/TestOrcPageSourceMemoryTracking.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -517,7 +517,7 @@ public ConnectorPageSource newPageSource(FileFormatDataSourceStats stats, Connec
517517
null,
518518
false,
519519
-1L,
520-
ImmutableMap.of())
520+
ImmutableMap.of(), ImmutableList.of())
521521
.get();
522522
}
523523

presto-hive/src/test/java/io/prestosql/plugin/hive/metastore/TestMetastoreUtil.java

+3
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
import java.util.List;
3030
import java.util.Properties;
3131

32+
import static io.prestosql.plugin.hive.metastore.MetastoreUtil.META_PARTITION_COLUMNS;
3233
import static org.apache.hadoop.hive.serde.serdeConstants.COLUMN_NAME_DELIMITER;
3334
import static org.testng.Assert.assertEquals;
3435

@@ -142,6 +143,7 @@ public void testHiveSchemaTable()
142143
Properties expected = MetaStoreUtils.getTableMetadata(TEST_TABLE_WITH_UNSUPPORTED_FIELDS);
143144
expected.remove(COLUMN_NAME_DELIMITER);
144145
Properties actual = MetastoreUtil.getHiveSchema(ThriftMetastoreUtil.fromMetastoreApiTable(TEST_TABLE_WITH_UNSUPPORTED_FIELDS, TEST_SCHEMA));
146+
actual.remove(META_PARTITION_COLUMNS);
145147
assertEquals(actual, expected);
146148
}
147149

@@ -151,6 +153,7 @@ public void testHiveSchemaPartition()
151153
Properties expected = MetaStoreUtils.getPartitionMetadata(TEST_PARTITION_WITH_UNSUPPORTED_FIELDS, TEST_TABLE_WITH_UNSUPPORTED_FIELDS);
152154
expected.remove(COLUMN_NAME_DELIMITER);
153155
Properties actual = MetastoreUtil.getHiveSchema(ThriftMetastoreUtil.fromMetastoreApiPartition(TEST_PARTITION_WITH_UNSUPPORTED_FIELDS), ThriftMetastoreUtil.fromMetastoreApiTable(TEST_TABLE_WITH_UNSUPPORTED_FIELDS, TEST_SCHEMA));
156+
actual.remove(META_PARTITION_COLUMNS);
154157
assertEquals(actual, expected);
155158
}
156159

0 commit comments

Comments
 (0)