Skip to content

Commit 9126c92

Browse files
authored
feat(SMT): Create SMT to adjust precision and scale (#75)
* initial draft * second draft; pre-refactor * Version with all comments * baseline done * reduce logs to trace * refactor to include precision from connector * Remove comments
1 parent 4a88f67 commit 9126c92

File tree

3 files changed

+527
-0
lines changed

3 files changed

+527
-0
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,222 @@
1+
/**
2+
* Copyright © 2017 Jeremy Custenborder ([email protected])
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
package com.github.jcustenborder.kafka.connect.transform.common;
17+
18+
import com.github.jcustenborder.kafka.connect.utils.config.Description;
19+
import com.github.jcustenborder.kafka.connect.utils.config.DocumentationNote;
20+
import com.github.jcustenborder.kafka.connect.utils.config.Title;
21+
import com.github.jcustenborder.kafka.connect.utils.transformation.BaseKeyValueTransformation;
22+
import org.apache.kafka.common.config.ConfigDef;
23+
import org.apache.kafka.connect.connector.ConnectRecord;
24+
import org.apache.kafka.connect.data.Struct;
25+
import org.apache.kafka.connect.data.Schema;
26+
import org.apache.kafka.connect.data.SchemaAndValue;
27+
import org.apache.kafka.connect.data.Field;
28+
import org.apache.kafka.connect.data.SchemaBuilder;
29+
import org.apache.kafka.connect.data.Decimal;
30+
import org.slf4j.Logger;
31+
import org.slf4j.LoggerFactory;
32+
33+
import java.math.BigDecimal;
34+
import java.math.MathContext;
35+
import java.math.RoundingMode;
36+
import java.util.Set;
37+
import java.util.Map;
38+
import java.util.HashSet;
39+
import java.util.LinkedHashMap;
40+
41+
@Title("AdjustPrecisionAndScale")
42+
@Description("This transformation is used to ensure that all decimal fields in a struct fall within" +
43+
"the desired range. Can set a max precision and max scale, as well as require a positive scale.")
44+
@DocumentationNote("The Confluent AvroConverter uses a default precision of 64 which can be too large " +
45+
"for some database systems.")
46+
public class AdjustPrecisionAndScale<R extends ConnectRecord<R>> extends BaseKeyValueTransformation<R> {
47+
private static final Logger log = LoggerFactory.getLogger(AdjustPrecisionAndScale.class);
48+
49+
public AdjustPrecisionAndScale(boolean isKey) {
50+
super(isKey);
51+
}
52+
53+
@Override
54+
public ConfigDef config() {
55+
return AdjustPrecisionAndScaleConfig.config();
56+
}
57+
58+
@Override
59+
public void close() {
60+
}
61+
62+
AdjustPrecisionAndScaleConfig config;
63+
64+
static final String CONNECT_AVRO_DECIMAL_PRECISION_PROP = "connect.decimal.precision";
65+
static final String CONNECT_AVRO_DECIMAL_SCALE_PROP = "scale";
66+
67+
// Streams/lambdas don't add a ton of value here; becomes both really hard to reason about and not really that clean
68+
// Also, for loops are faster
69+
@Override
70+
protected SchemaAndValue processStruct(R record, Schema inputSchema, Struct inputStruct) {
71+
Set<String> modifiedFields = new HashSet<>();
72+
73+
Schema outputSchema;
74+
Struct outputStruct;
75+
76+
SchemaBuilder builder = SchemaBuilder.struct()
77+
.name(inputSchema.name())
78+
.doc(inputSchema.doc())
79+
.version(inputSchema.version());
80+
if (null != inputSchema.parameters() && !inputSchema.parameters().isEmpty()) {
81+
builder.parameters(inputSchema.parameters());
82+
}
83+
84+
// Iterate over all fields to generate new schemas
85+
// Only perform logic on 'org.apache.kafka.connect.data.Decimal' fields; otherwise, directly copy field schema to new schema
86+
for (Field field: inputSchema.fields()) {
87+
if (Decimal.LOGICAL_NAME.equals(field.schema().name())) {
88+
log.trace("Looking at {}", field.name());
89+
90+
int scale, precision;
91+
// If we're coming from a Connector that doesn't have precision defined, default to 'undefined precision' (which is 0)
92+
precision = Integer.parseInt(inputSchema.field(field.name()).schema().parameters().getOrDefault(CONNECT_AVRO_DECIMAL_PRECISION_PROP, "0"));
93+
scale = Integer.parseInt(inputSchema.field(field.name()).schema().parameters().getOrDefault(CONNECT_AVRO_DECIMAL_SCALE_PROP, "0"));
94+
95+
boolean undefinedPrecision = precision == config.undefinedPrecisionValue;
96+
boolean exceededPrecision = precision > config.precision;
97+
98+
// If precision is undefined, we assume scale is undefined as well
99+
boolean undefinedScale = undefinedPrecision || scale == config.undefinedScaleValue;
100+
boolean zeroScale = scale == 0;
101+
boolean exceededScale = scale > config.scale;
102+
boolean negativeScale = scale < 0;
103+
104+
// If in undefined mode, set precision to provided value if precision is undefined
105+
// If in max mode, set precision to provided value if precision is undefined or exceeds provided value
106+
boolean setPrecision = (config.precisionMode.equals(AdjustPrecisionAndScaleConfig.PRECISION_MODE_UNDEFINED) && undefinedPrecision) ||
107+
(config.precisionMode.equals(AdjustPrecisionAndScaleConfig.PRECISION_MODE_MAX) && (undefinedPrecision || exceededPrecision));
108+
109+
// Set scale to provided scale value if any of the following are true:
110+
// scale mode is 'undefined' and scale is undefined
111+
// scale mode is 'max' and scale is undefined or over value
112+
// scale zero mode is 'value' and scale is zero
113+
// scale negative mode is 'value' and scale is negative
114+
boolean setScaleValue = (config.scaleMode.equals(AdjustPrecisionAndScaleConfig.SCALE_MODE_UNDEFINED) && undefinedScale) ||
115+
(config.scaleMode.equals(AdjustPrecisionAndScaleConfig.SCALE_MODE_MAX) && (undefinedScale || exceededScale)) ||
116+
(config.scaleZeroMode.equals(AdjustPrecisionAndScaleConfig.SCALE_ZERO_MODE_VALUE) && zeroScale) ||
117+
(config.scaleNegativeMode.equals(AdjustPrecisionAndScaleConfig.SCALE_NEGATIVE_MODE_VALUE) && negativeScale);
118+
119+
// Set scale to zero if any of the following are true:
120+
// scale negative mode is 'zero' and scale is negative
121+
boolean setScaleZero = (config.scaleNegativeMode.equals(AdjustPrecisionAndScaleConfig.SCALE_NEGATIVE_MODE_ZERO) && negativeScale);
122+
123+
// Do nothing to scale in these situations:
124+
// scale mode is none OR (scale is positive and does not exceed provided value)
125+
// AND scale negative mode is none
126+
// AND scale zero mode is none OR scale is nonzero
127+
128+
Map<String, String> parameters = new LinkedHashMap<>();
129+
if (null != field.schema().parameters() && !field.schema().parameters().isEmpty()) {
130+
parameters.putAll(field.schema().parameters());
131+
}
132+
133+
// Set precision to provided value or pull from struct schema
134+
if (setPrecision) {
135+
parameters.put(CONNECT_AVRO_DECIMAL_PRECISION_PROP, Integer.toString(config.precision));
136+
} else {
137+
parameters.put(CONNECT_AVRO_DECIMAL_PRECISION_PROP, Integer.toString(precision));
138+
}
139+
140+
// Set scale to provided value, to zero, or pull from struct schema
141+
if (setScaleValue) {
142+
parameters.put(CONNECT_AVRO_DECIMAL_SCALE_PROP, Integer.toString(config.scale));
143+
scale = config.scale;
144+
} else if (setScaleZero) {
145+
parameters.put(CONNECT_AVRO_DECIMAL_SCALE_PROP, Integer.toString(0));
146+
scale = 0;
147+
} else {
148+
parameters.put(CONNECT_AVRO_DECIMAL_SCALE_PROP, Integer.toString(scale));
149+
}
150+
151+
if (setPrecision || setScaleValue || setScaleZero) {
152+
modifiedFields.add(field.name());
153+
}
154+
155+
SchemaBuilder fieldBuilder = Decimal.builder(scale)
156+
.parameters(parameters)
157+
.doc(field.schema().doc())
158+
.version(field.schema().version());
159+
160+
if (field.schema().isOptional()) {
161+
fieldBuilder.optional();
162+
}
163+
Schema fieldSchema = fieldBuilder.build();
164+
builder.field(field.name(), fieldSchema);
165+
} else {
166+
// Not a Decimal
167+
log.trace("state() - copying field '{}' to new schema.", field.name());
168+
builder.field(field.name(), field.schema());
169+
}
170+
}
171+
172+
outputSchema = builder.build();
173+
outputStruct = new Struct(outputSchema);
174+
175+
// Hydrate Struct by iterating over fields again
176+
for (Field field: outputSchema.fields()) {
177+
// String fieldName = field.name();
178+
179+
if (modifiedFields.contains(field.name())) {
180+
BigDecimal originalBigDecimal = (BigDecimal) inputStruct.get(field.name());
181+
if (null != originalBigDecimal) {
182+
int precision = Integer.parseInt(field.schema().parameters().get(CONNECT_AVRO_DECIMAL_PRECISION_PROP));
183+
int scale = Integer.parseInt(field.schema().parameters().get(CONNECT_AVRO_DECIMAL_SCALE_PROP));
184+
185+
// RoundingMode _shouldn't_ matter here because the source data presumably has the same precision and scale;
186+
// it was just 'lost' (not picked up) by the Connector (prior to the SMT)
187+
// Precision of the BigDecimal will be total scale + total number of digits to left of decimal
188+
// For example: 12345.67890 with a scale of 5 will have precision of 10, regardless of desired precision,
189+
// but the schema will reflect both desired precision and scale
190+
// Order of scale vs. round doesn't seem to matter here
191+
MathContext mc = new MathContext(precision);
192+
BigDecimal newBigDecimal = originalBigDecimal.round(mc).setScale(scale, RoundingMode.FLOOR);
193+
outputStruct.put(field.name(), newBigDecimal);
194+
} else {
195+
outputStruct.put(field.name(), null);
196+
}
197+
} else {
198+
log.trace("state() - copying field '{}' to new struct.", field.name());
199+
outputStruct.put(field.name(), inputStruct.get(field.name()));
200+
}
201+
}
202+
203+
return new SchemaAndValue(outputSchema, outputStruct);
204+
}
205+
206+
@Override
207+
public void configure(Map<String, ?> settings) {
208+
this.config = new AdjustPrecisionAndScaleConfig(settings);
209+
}
210+
211+
public static class Key<R extends ConnectRecord<R>> extends AdjustPrecisionAndScale<R> {
212+
public Key() {
213+
super(true);
214+
}
215+
}
216+
217+
public static class Value<R extends ConnectRecord<R>> extends AdjustPrecisionAndScale<R> {
218+
public Value() {
219+
super(false);
220+
}
221+
}
222+
}

0 commit comments

Comments
 (0)