|
| 1 | +/** |
| 2 | + * Copyright © 2017 Jeremy Custenborder ([email protected]) |
| 3 | + * |
| 4 | + * Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | + * you may not use this file except in compliance with the License. |
| 6 | + * You may obtain a copy of the License at |
| 7 | + * |
| 8 | + * http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | + * |
| 10 | + * Unless required by applicable law or agreed to in writing, software |
| 11 | + * distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | + * See the License for the specific language governing permissions and |
| 14 | + * limitations under the License. |
| 15 | + */ |
| 16 | +package com.github.jcustenborder.kafka.connect.transform.common; |
| 17 | + |
| 18 | +import com.github.jcustenborder.kafka.connect.utils.config.Description; |
| 19 | +import com.github.jcustenborder.kafka.connect.utils.config.DocumentationNote; |
| 20 | +import com.github.jcustenborder.kafka.connect.utils.config.Title; |
| 21 | +import com.github.jcustenborder.kafka.connect.utils.transformation.BaseKeyValueTransformation; |
| 22 | +import org.apache.kafka.common.config.ConfigDef; |
| 23 | +import org.apache.kafka.connect.connector.ConnectRecord; |
| 24 | +import org.apache.kafka.connect.data.Struct; |
| 25 | +import org.apache.kafka.connect.data.Schema; |
| 26 | +import org.apache.kafka.connect.data.SchemaAndValue; |
| 27 | +import org.apache.kafka.connect.data.Field; |
| 28 | +import org.apache.kafka.connect.data.SchemaBuilder; |
| 29 | +import org.apache.kafka.connect.data.Decimal; |
| 30 | +import org.slf4j.Logger; |
| 31 | +import org.slf4j.LoggerFactory; |
| 32 | + |
| 33 | +import java.math.BigDecimal; |
| 34 | +import java.math.MathContext; |
| 35 | +import java.math.RoundingMode; |
| 36 | +import java.util.Set; |
| 37 | +import java.util.Map; |
| 38 | +import java.util.HashSet; |
| 39 | +import java.util.LinkedHashMap; |
| 40 | + |
| 41 | +@Title("AdjustPrecisionAndScale") |
| 42 | +@Description("This transformation is used to ensure that all decimal fields in a struct fall within" + |
| 43 | + "the desired range. Can set a max precision and max scale, as well as require a positive scale.") |
| 44 | +@DocumentationNote("The Confluent AvroConverter uses a default precision of 64 which can be too large " + |
| 45 | + "for some database systems.") |
| 46 | +public class AdjustPrecisionAndScale<R extends ConnectRecord<R>> extends BaseKeyValueTransformation<R> { |
| 47 | + private static final Logger log = LoggerFactory.getLogger(AdjustPrecisionAndScale.class); |
| 48 | + |
| 49 | + public AdjustPrecisionAndScale(boolean isKey) { |
| 50 | + super(isKey); |
| 51 | + } |
| 52 | + |
| 53 | + @Override |
| 54 | + public ConfigDef config() { |
| 55 | + return AdjustPrecisionAndScaleConfig.config(); |
| 56 | + } |
| 57 | + |
| 58 | + @Override |
| 59 | + public void close() { |
| 60 | + } |
| 61 | + |
| 62 | + AdjustPrecisionAndScaleConfig config; |
| 63 | + |
| 64 | + static final String CONNECT_AVRO_DECIMAL_PRECISION_PROP = "connect.decimal.precision"; |
| 65 | + static final String CONNECT_AVRO_DECIMAL_SCALE_PROP = "scale"; |
| 66 | + |
| 67 | + // Streams/lambdas don't add a ton of value here; becomes both really hard to reason about and not really that clean |
| 68 | + // Also, for loops are faster |
| 69 | + @Override |
| 70 | + protected SchemaAndValue processStruct(R record, Schema inputSchema, Struct inputStruct) { |
| 71 | + Set<String> modifiedFields = new HashSet<>(); |
| 72 | + |
| 73 | + Schema outputSchema; |
| 74 | + Struct outputStruct; |
| 75 | + |
| 76 | + SchemaBuilder builder = SchemaBuilder.struct() |
| 77 | + .name(inputSchema.name()) |
| 78 | + .doc(inputSchema.doc()) |
| 79 | + .version(inputSchema.version()); |
| 80 | + if (null != inputSchema.parameters() && !inputSchema.parameters().isEmpty()) { |
| 81 | + builder.parameters(inputSchema.parameters()); |
| 82 | + } |
| 83 | + |
| 84 | + // Iterate over all fields to generate new schemas |
| 85 | + // Only perform logic on 'org.apache.kafka.connect.data.Decimal' fields; otherwise, directly copy field schema to new schema |
| 86 | + for (Field field: inputSchema.fields()) { |
| 87 | + if (Decimal.LOGICAL_NAME.equals(field.schema().name())) { |
| 88 | + log.trace("Looking at {}", field.name()); |
| 89 | + |
| 90 | + int scale, precision; |
| 91 | + // If we're coming from a Connector that doesn't have precision defined, default to 'undefined precision' (which is 0) |
| 92 | + precision = Integer.parseInt(inputSchema.field(field.name()).schema().parameters().getOrDefault(CONNECT_AVRO_DECIMAL_PRECISION_PROP, "0")); |
| 93 | + scale = Integer.parseInt(inputSchema.field(field.name()).schema().parameters().getOrDefault(CONNECT_AVRO_DECIMAL_SCALE_PROP, "0")); |
| 94 | + |
| 95 | + boolean undefinedPrecision = precision == config.undefinedPrecisionValue; |
| 96 | + boolean exceededPrecision = precision > config.precision; |
| 97 | + |
| 98 | + // If precision is undefined, we assume scale is undefined as well |
| 99 | + boolean undefinedScale = undefinedPrecision || scale == config.undefinedScaleValue; |
| 100 | + boolean zeroScale = scale == 0; |
| 101 | + boolean exceededScale = scale > config.scale; |
| 102 | + boolean negativeScale = scale < 0; |
| 103 | + |
| 104 | + // If in undefined mode, set precision to provided value if precision is undefined |
| 105 | + // If in max mode, set precision to provided value if precision is undefined or exceeds provided value |
| 106 | + boolean setPrecision = (config.precisionMode.equals(AdjustPrecisionAndScaleConfig.PRECISION_MODE_UNDEFINED) && undefinedPrecision) || |
| 107 | + (config.precisionMode.equals(AdjustPrecisionAndScaleConfig.PRECISION_MODE_MAX) && (undefinedPrecision || exceededPrecision)); |
| 108 | + |
| 109 | + // Set scale to provided scale value if any of the following are true: |
| 110 | + // scale mode is 'undefined' and scale is undefined |
| 111 | + // scale mode is 'max' and scale is undefined or over value |
| 112 | + // scale zero mode is 'value' and scale is zero |
| 113 | + // scale negative mode is 'value' and scale is negative |
| 114 | + boolean setScaleValue = (config.scaleMode.equals(AdjustPrecisionAndScaleConfig.SCALE_MODE_UNDEFINED) && undefinedScale) || |
| 115 | + (config.scaleMode.equals(AdjustPrecisionAndScaleConfig.SCALE_MODE_MAX) && (undefinedScale || exceededScale)) || |
| 116 | + (config.scaleZeroMode.equals(AdjustPrecisionAndScaleConfig.SCALE_ZERO_MODE_VALUE) && zeroScale) || |
| 117 | + (config.scaleNegativeMode.equals(AdjustPrecisionAndScaleConfig.SCALE_NEGATIVE_MODE_VALUE) && negativeScale); |
| 118 | + |
| 119 | + // Set scale to zero if any of the following are true: |
| 120 | + // scale negative mode is 'zero' and scale is negative |
| 121 | + boolean setScaleZero = (config.scaleNegativeMode.equals(AdjustPrecisionAndScaleConfig.SCALE_NEGATIVE_MODE_ZERO) && negativeScale); |
| 122 | + |
| 123 | + // Do nothing to scale in these situations: |
| 124 | + // scale mode is none OR (scale is positive and does not exceed provided value) |
| 125 | + // AND scale negative mode is none |
| 126 | + // AND scale zero mode is none OR scale is nonzero |
| 127 | + |
| 128 | + Map<String, String> parameters = new LinkedHashMap<>(); |
| 129 | + if (null != field.schema().parameters() && !field.schema().parameters().isEmpty()) { |
| 130 | + parameters.putAll(field.schema().parameters()); |
| 131 | + } |
| 132 | + |
| 133 | + // Set precision to provided value or pull from struct schema |
| 134 | + if (setPrecision) { |
| 135 | + parameters.put(CONNECT_AVRO_DECIMAL_PRECISION_PROP, Integer.toString(config.precision)); |
| 136 | + } else { |
| 137 | + parameters.put(CONNECT_AVRO_DECIMAL_PRECISION_PROP, Integer.toString(precision)); |
| 138 | + } |
| 139 | + |
| 140 | + // Set scale to provided value, to zero, or pull from struct schema |
| 141 | + if (setScaleValue) { |
| 142 | + parameters.put(CONNECT_AVRO_DECIMAL_SCALE_PROP, Integer.toString(config.scale)); |
| 143 | + scale = config.scale; |
| 144 | + } else if (setScaleZero) { |
| 145 | + parameters.put(CONNECT_AVRO_DECIMAL_SCALE_PROP, Integer.toString(0)); |
| 146 | + scale = 0; |
| 147 | + } else { |
| 148 | + parameters.put(CONNECT_AVRO_DECIMAL_SCALE_PROP, Integer.toString(scale)); |
| 149 | + } |
| 150 | + |
| 151 | + if (setPrecision || setScaleValue || setScaleZero) { |
| 152 | + modifiedFields.add(field.name()); |
| 153 | + } |
| 154 | + |
| 155 | + SchemaBuilder fieldBuilder = Decimal.builder(scale) |
| 156 | + .parameters(parameters) |
| 157 | + .doc(field.schema().doc()) |
| 158 | + .version(field.schema().version()); |
| 159 | + |
| 160 | + if (field.schema().isOptional()) { |
| 161 | + fieldBuilder.optional(); |
| 162 | + } |
| 163 | + Schema fieldSchema = fieldBuilder.build(); |
| 164 | + builder.field(field.name(), fieldSchema); |
| 165 | + } else { |
| 166 | + // Not a Decimal |
| 167 | + log.trace("state() - copying field '{}' to new schema.", field.name()); |
| 168 | + builder.field(field.name(), field.schema()); |
| 169 | + } |
| 170 | + } |
| 171 | + |
| 172 | + outputSchema = builder.build(); |
| 173 | + outputStruct = new Struct(outputSchema); |
| 174 | + |
| 175 | + // Hydrate Struct by iterating over fields again |
| 176 | + for (Field field: outputSchema.fields()) { |
| 177 | +// String fieldName = field.name(); |
| 178 | + |
| 179 | + if (modifiedFields.contains(field.name())) { |
| 180 | + BigDecimal originalBigDecimal = (BigDecimal) inputStruct.get(field.name()); |
| 181 | + if (null != originalBigDecimal) { |
| 182 | + int precision = Integer.parseInt(field.schema().parameters().get(CONNECT_AVRO_DECIMAL_PRECISION_PROP)); |
| 183 | + int scale = Integer.parseInt(field.schema().parameters().get(CONNECT_AVRO_DECIMAL_SCALE_PROP)); |
| 184 | + |
| 185 | + // RoundingMode _shouldn't_ matter here because the source data presumably has the same precision and scale; |
| 186 | + // it was just 'lost' (not picked up) by the Connector (prior to the SMT) |
| 187 | + // Precision of the BigDecimal will be total scale + total number of digits to left of decimal |
| 188 | + // For example: 12345.67890 with a scale of 5 will have precision of 10, regardless of desired precision, |
| 189 | + // but the schema will reflect both desired precision and scale |
| 190 | + // Order of scale vs. round doesn't seem to matter here |
| 191 | + MathContext mc = new MathContext(precision); |
| 192 | + BigDecimal newBigDecimal = originalBigDecimal.round(mc).setScale(scale, RoundingMode.FLOOR); |
| 193 | + outputStruct.put(field.name(), newBigDecimal); |
| 194 | + } else { |
| 195 | + outputStruct.put(field.name(), null); |
| 196 | + } |
| 197 | + } else { |
| 198 | + log.trace("state() - copying field '{}' to new struct.", field.name()); |
| 199 | + outputStruct.put(field.name(), inputStruct.get(field.name())); |
| 200 | + } |
| 201 | + } |
| 202 | + |
| 203 | + return new SchemaAndValue(outputSchema, outputStruct); |
| 204 | + } |
| 205 | + |
| 206 | + @Override |
| 207 | + public void configure(Map<String, ?> settings) { |
| 208 | + this.config = new AdjustPrecisionAndScaleConfig(settings); |
| 209 | + } |
| 210 | + |
| 211 | + public static class Key<R extends ConnectRecord<R>> extends AdjustPrecisionAndScale<R> { |
| 212 | + public Key() { |
| 213 | + super(true); |
| 214 | + } |
| 215 | + } |
| 216 | + |
| 217 | + public static class Value<R extends ConnectRecord<R>> extends AdjustPrecisionAndScale<R> { |
| 218 | + public Value() { |
| 219 | + super(false); |
| 220 | + } |
| 221 | + } |
| 222 | +} |
0 commit comments