Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Core: Apply correct metric configs in GenericAppenderFactory #12366

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,13 @@
import java.io.IOException;
import java.io.UncheckedIOException;
import java.util.Map;
import java.util.function.Supplier;
import org.apache.iceberg.FileFormat;
import org.apache.iceberg.MetricsConfig;
import org.apache.iceberg.PartitionSpec;
import org.apache.iceberg.Schema;
import org.apache.iceberg.StructLike;
import org.apache.iceberg.Table;
import org.apache.iceberg.avro.Avro;
import org.apache.iceberg.data.avro.DataWriter;
import org.apache.iceberg.data.orc.GenericOrcWriter;
Expand All @@ -44,41 +46,85 @@

/** Factory to create a new {@link FileAppender} to write {@link Record}s. */
public class GenericAppenderFactory implements FileAppenderFactory<Record> {

private final Table table;
private final Schema schema;
private final PartitionSpec spec;
private final int[] equalityFieldIds;
private final Schema eqDeleteRowSchema;
private final Schema posDeleteRowSchema;
private final Map<String, String> config = Maps.newHashMap();
private final Map<String, String> config;

private static final String WRITE_METRICS_PREFIX = "write.metadata.metrics.";

@Deprecated
public GenericAppenderFactory(Schema schema) {
this(schema, PartitionSpec.unpartitioned(), null, null, null);
this(schema, PartitionSpec.unpartitioned());
}

@Deprecated
public GenericAppenderFactory(Schema schema, PartitionSpec spec) {
this(schema, spec, null, null, null);
}

@Deprecated
public GenericAppenderFactory(
Schema schema,
PartitionSpec spec,
int[] equalityFieldIds,
Schema eqDeleteRowSchema,
Schema posDeleteRowSchema) {
this.schema = schema;
this.spec = spec;
this(null, schema, spec, null, equalityFieldIds, eqDeleteRowSchema, posDeleteRowSchema);
}

public GenericAppenderFactory(Table table) {
this(table, null, null, null, null, null, null);
}

public GenericAppenderFactory(
Table table,
Schema schema,
PartitionSpec spec,
Map<String, String> config,
int[] equalityFieldIds,
Schema eqDeleteRowSchema,
Schema posDeleteRowSchema) {
this.table = table;
if (table != null && schema == null) {
this.schema = table.schema();
} else {
this.schema = schema;
}

if (table != null && spec == null) {
this.spec = table.spec();
} else {
this.spec = spec;
}

this.config = config == null ? Maps.newHashMap() : config;
this.equalityFieldIds = equalityFieldIds;
this.eqDeleteRowSchema = eqDeleteRowSchema;
this.posDeleteRowSchema = posDeleteRowSchema;
}

public GenericAppenderFactory set(String property, String value) {
if (property.startsWith(WRITE_METRICS_PREFIX) && table != null) {
throw new IllegalArgumentException(
String.format(
"Cannot set metrics property: %s directly. Use table properties instead.", property));
}

config.put(property, value);
return this;
}

public GenericAppenderFactory setAll(Map<String, String> properties) {
if (properties.keySet().stream().anyMatch(k -> k.startsWith(WRITE_METRICS_PREFIX))
&& table != null) {
throw new IllegalArgumentException(
"Cannot set metrics properties directly. Use table properties instead.");
}

config.putAll(properties);
return this;
}
Expand All @@ -91,7 +137,8 @@ public FileAppender<Record> newAppender(OutputFile outputFile, FileFormat fileFo
@Override
public FileAppender<Record> newAppender(
EncryptedOutputFile encryptedOutputFile, FileFormat fileFormat) {
MetricsConfig metricsConfig = MetricsConfig.fromProperties(config);
MetricsConfig metricsConfig = applyMetricsConfig(() -> MetricsConfig.forTable(table));

try {
switch (fileFormat) {
case AVRO:
Expand Down Expand Up @@ -151,8 +198,7 @@ public EqualityDeleteWriter<Record> newEqDeleteWriter(
Preconditions.checkNotNull(
eqDeleteRowSchema,
"Equality delete row schema shouldn't be null when creating equality-delete writer");

MetricsConfig metricsConfig = MetricsConfig.fromProperties(config);
MetricsConfig metricsConfig = applyMetricsConfig(() -> MetricsConfig.forTable(table));

try {
switch (format) {
Expand Down Expand Up @@ -206,7 +252,7 @@ public EqualityDeleteWriter<Record> newEqDeleteWriter(
@Override
public PositionDeleteWriter<Record> newPosDeleteWriter(
EncryptedOutputFile file, FileFormat format, StructLike partition) {
MetricsConfig metricsConfig = MetricsConfig.fromProperties(config);
MetricsConfig metricsConfig = applyMetricsConfig(() -> MetricsConfig.forPositionDelete(table));

try {
switch (format) {
Expand Down Expand Up @@ -252,4 +298,15 @@ public PositionDeleteWriter<Record> newPosDeleteWriter(
throw new UncheckedIOException(e);
}
}

private MetricsConfig applyMetricsConfig(Supplier<MetricsConfig> metricsConfigSupplier) {
MetricsConfig metricsConfig;
if (table == null) {
metricsConfig = MetricsConfig.fromProperties(config);
} else {
metricsConfig = metricsConfigSupplier.get();
}

return metricsConfig;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -18,15 +18,21 @@
*/
package org.apache.iceberg;

import static org.assertj.core.api.Assertions.assertThatNoException;
import static org.assertj.core.api.Assertions.assertThatThrownBy;

import java.util.List;
import java.util.Map;
import org.apache.iceberg.data.GenericAppenderFactory;
import org.apache.iceberg.data.GenericRecord;
import org.apache.iceberg.data.Record;
import org.apache.iceberg.io.FileAppenderFactory;
import org.apache.iceberg.io.TestAppenderFactory;
import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap;
import org.apache.iceberg.relocated.com.google.common.collect.Maps;
import org.apache.iceberg.util.ArrayUtil;
import org.apache.iceberg.util.StructLikeSet;
import org.junit.jupiter.api.TestTemplate;

public class TestGenericAppenderFactory extends TestAppenderFactory<Record> {

Expand All @@ -36,8 +42,10 @@ public class TestGenericAppenderFactory extends TestAppenderFactory<Record> {
protected FileAppenderFactory<Record> createAppenderFactory(
List<Integer> equalityFieldIds, Schema eqDeleteSchema, Schema posDeleteRowSchema) {
return new GenericAppenderFactory(
table,
table.schema(),
table.spec(),
Maps.newHashMap(),
ArrayUtil.toIntArray(equalityFieldIds),
eqDeleteSchema,
posDeleteRowSchema);
Expand All @@ -54,4 +62,60 @@ protected StructLikeSet expectedRowSet(Iterable<Record> records) {
records.forEach(set::add);
return set;
}

@TestTemplate
void illegalSetConfig() {
GenericAppenderFactory appenderFactory =
(GenericAppenderFactory) createAppenderFactory(null, null, null);

assertThatThrownBy(
() ->
appenderFactory.set(
TableProperties.METRICS_MAX_INFERRED_COLUMN_DEFAULTS,
MetricsModes.None.get().toString()))
.as("Should not allow setting metrics property if the table was provided")
.isInstanceOf(IllegalArgumentException.class)
.hasMessageContaining(
"Cannot set metrics property: " + TableProperties.METRICS_MAX_INFERRED_COLUMN_DEFAULTS);
}

@TestTemplate
void illegalSetAllConfigs() {
GenericAppenderFactory appenderFactory =
(GenericAppenderFactory) createAppenderFactory(null, null, null);

Map<String, String> properties =
ImmutableMap.of(
TableProperties.METRICS_MAX_INFERRED_COLUMN_DEFAULTS,
"10",
TableProperties.METRICS_MODE_COLUMN_CONF_PREFIX + "id",
MetricsModes.Full.get().toString());

assertThatThrownBy(() -> appenderFactory.setAll(properties))
.as("Should not allow setting metrics property if the table was provided")
.isInstanceOf(IllegalArgumentException.class)
.hasMessageContaining("Cannot set metrics properties directly");
}

@TestTemplate
void setConfigExcludeMetrics() {
GenericAppenderFactory appenderFactory =
(GenericAppenderFactory) createAppenderFactory(null, null, null);
assertThatNoException().isThrownBy(() -> appenderFactory.set("key1", "value1"));
assertThatNoException()
.isThrownBy(() -> appenderFactory.setAll(ImmutableMap.of("key2", "value2")));
}

@TestTemplate
void setConfigWithoutTable() {
GenericAppenderFactory appenderFactory = new GenericAppenderFactory(SCHEMA);
assertThatNoException()
.isThrownBy(
() -> appenderFactory.set(TableProperties.METRICS_MAX_INFERRED_COLUMN_DEFAULTS, "10"));
assertThatNoException()
.isThrownBy(
() ->
appenderFactory.setAll(
ImmutableMap.of(TableProperties.DEFAULT_WRITE_METRICS_MODE, "full")));
}
}