diff --git a/pinot-core/src/main/java/org/apache/pinot/core/operator/transform/function/TransformFunction.java b/pinot-core/src/main/java/org/apache/pinot/core/operator/transform/function/TransformFunction.java index 95ee40f7279a..b50a0ad8a708 100644 --- a/pinot-core/src/main/java/org/apache/pinot/core/operator/transform/function/TransformFunction.java +++ b/pinot-core/src/main/java/org/apache/pinot/core/operator/transform/function/TransformFunction.java @@ -26,6 +26,7 @@ import org.apache.pinot.core.operator.blocks.ValueBlock; import org.apache.pinot.core.operator.transform.TransformResultMetadata; import org.apache.pinot.segment.spi.index.reader.Dictionary; +import org.apache.pinot.spi.data.FieldSpec; import org.roaringbitmap.RoaringBitmap; @@ -171,4 +172,55 @@ default void init(List arguments, Map */ @Nullable RoaringBitmap getNullBitmap(ValueBlock block); + + /** + * Validates transform function configuration during table creation. + */ + default void validateIngestionConfig(String transformFunctionExpression, org.apache.pinot.spi.data.Schema schema) { + // Default: no validation + } + + /** + * Returns whether this function supports ingestion-time transformation. + */ + default boolean supportsIngestionTransform() { + return true; + } + + /** + * Infers output data type based on input arguments. + */ + default org.apache.pinot.spi.data.FieldSpec.DataType inferOutputDataType(List inputArguments, + org.apache.pinot.spi.data.Schema schema) { + return FieldSpec.DataType.STRING; + } + + /** + * Returns expected input data types for validation. + */ + default FieldSpec.DataType[] getExpectedInputDataTypes() { + return new org.apache.pinot.spi.data.FieldSpec.DataType[0]; + } + + /** + * Returns minimum number of arguments required. + */ + default int getMinArgumentCount() { + return 0; + } + + /** + * Returns maximum number of arguments allowed (-1 for unlimited). + */ + default int getMaxArgumentCount() { + return -1; + } + + /** + * Validates input data type compatibility. + */ + default boolean isInputDataTypeSupported(FieldSpec.DataType inputType, + int argumentIndex) { + return true; + } } diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/config/table/ingestion/IngestionConfig.java b/pinot-spi/src/main/java/org/apache/pinot/spi/config/table/ingestion/IngestionConfig.java index 2c3fc664b879..31e2b15b97c6 100644 --- a/pinot-spi/src/main/java/org/apache/pinot/spi/config/table/ingestion/IngestionConfig.java +++ b/pinot-spi/src/main/java/org/apache/pinot/spi/config/table/ingestion/IngestionConfig.java @@ -76,6 +76,9 @@ public void setSchemaConformingTransformerV2Config( @JsonPropertyDescription("Configs related to check time value for segment") private boolean _segmentTimeValueCheck = true; + @JsonPropertyDescription("Default validation mode for transform functions: STRICT, LENIENT, or LEGACY") + private TransformConfig.ValidationMode _defaultTransformValidationMode; + @Deprecated public IngestionConfig(@Nullable BatchIngestionConfig batchIngestionConfig, @Nullable StreamIngestionConfig streamIngestionConfig, @Nullable FilterConfig filterConfig, @@ -200,4 +203,14 @@ public void setRowTimeValueCheck(boolean rowTimeValueCheck) { public void setSegmentTimeValueCheck(boolean segmentTimeValueCheck) { _segmentTimeValueCheck = segmentTimeValueCheck; } + + @Nullable + public TransformConfig.ValidationMode getDefaultTransformValidationMode() { + return _defaultTransformValidationMode; + } + + public void setDefaultTransformValidationMode( + @Nullable TransformConfig.ValidationMode defaultTransformValidationMode) { + _defaultTransformValidationMode = defaultTransformValidationMode; + } } diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/config/table/ingestion/TransformConfig.java b/pinot-spi/src/main/java/org/apache/pinot/spi/config/table/ingestion/TransformConfig.java index 468308d196ce..024ae3e0774c 100644 --- a/pinot-spi/src/main/java/org/apache/pinot/spi/config/table/ingestion/TransformConfig.java +++ b/pinot-spi/src/main/java/org/apache/pinot/spi/config/table/ingestion/TransformConfig.java @@ -29,17 +29,33 @@ */ public class TransformConfig extends BaseJsonConfig { + public enum ValidationMode { + STRICT, // No automatic type conversions allowed + LENIENT, // Allow safe type conversions (recommended) + LEGACY // Allow all existing conversions including STRING->numeric (default) + } + @JsonPropertyDescription("Column name") private final String _columnName; @JsonPropertyDescription("Transformation function string") private final String _transformFunction; + @JsonPropertyDescription("Validation mode for type checking: STRICT, LENIENT, or LEGACY") + private final ValidationMode _validationMode; + @JsonCreator public TransformConfig(@JsonProperty("columnName") String columnName, - @JsonProperty("transformFunction") String transformFunction) { + @JsonProperty("transformFunction") String transformFunction, + @JsonProperty("validationMode") ValidationMode validationMode) { _columnName = columnName; _transformFunction = transformFunction; + _validationMode = validationMode != null ? validationMode : ValidationMode.LEGACY; + } + + // Backward compatibility constructor + public TransformConfig(String columnName, String transformFunction) { + this(columnName, transformFunction, ValidationMode.LEGACY); } public String getColumnName() { @@ -49,4 +65,8 @@ public String getColumnName() { public String getTransformFunction() { return _transformFunction; } + + public ValidationMode getValidationMode() { + return _validationMode; + } }