Skip to content

Commit a2b19cd

Browse files
Refactored BaseTransformation to make schema based conversions easier. Added BytesToString conversion. (#13)
fixes #12.
1 parent 0f46bdb commit a2b19cd

File tree

9 files changed

+450
-53
lines changed

9 files changed

+450
-53
lines changed

pom.xml

+1-1
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@
2727
</parent>
2828
<artifactId>kafka-connect-transform-common</artifactId>
2929
<version>0.1.0-SNAPSHOT</version>
30-
<name>afka-connect-transform-common</name>
30+
<name>kafka-connect-transform-common</name>
3131
<url>https://github.com/jcustenborder/kafka-connect-transform-common</url>
3232
<inceptionYear>2017</inceptionYear>
3333
<licenses>

src/main/java/com/github/jcustenborder/kafka/connect/transform/common/BaseTransformation.java

+140-8
Original file line numberDiff line numberDiff line change
@@ -15,28 +15,160 @@
1515
*/
1616
package com.github.jcustenborder.kafka.connect.transform.common;
1717

18+
import com.github.jcustenborder.kafka.connect.utils.data.SchemaHelper;
1819
import org.apache.kafka.connect.connector.ConnectRecord;
20+
import org.apache.kafka.connect.data.Decimal;
21+
import org.apache.kafka.connect.data.Schema;
1922
import org.apache.kafka.connect.data.SchemaAndValue;
2023
import org.apache.kafka.connect.data.Struct;
24+
import org.apache.kafka.connect.data.Time;
25+
import org.apache.kafka.connect.data.Timestamp;
2126
import org.apache.kafka.connect.transforms.Transformation;
27+
import org.slf4j.Logger;
28+
import org.slf4j.LoggerFactory;
2229

30+
import java.math.BigDecimal;
31+
import java.util.Date;
32+
import java.util.List;
2333
import java.util.Map;
2434

2535
public abstract class BaseTransformation<R extends ConnectRecord<R>> implements Transformation<R> {
36+
private static final Logger log = LoggerFactory.getLogger(BaseTransformation.class);
2637

27-
protected abstract SchemaAndValue processStruct(R record, SchemaAndValue schemaAndValue);
38+
protected SchemaAndValue processMap(R record, Map<String, Object> input) {
39+
throw new UnsupportedOperationException("MAP is not a supported type.");
40+
}
41+
42+
protected SchemaAndValue processStruct(R record, Schema inputSchema, Struct input) {
43+
throw new UnsupportedOperationException("STRUCT is not a supported type.");
44+
}
45+
46+
protected SchemaAndValue processString(R record, Schema inputSchema, String input) {
47+
throw new UnsupportedOperationException("STRING is not a supported type.");
48+
}
49+
50+
protected SchemaAndValue processBytes(R record, Schema inputSchema, byte[] input) {
51+
throw new UnsupportedOperationException("BYTES is not a supported type.");
52+
}
53+
54+
protected SchemaAndValue processInt8(R record, Schema inputSchema, byte input) {
55+
throw new UnsupportedOperationException("INT8 is not a supported type.");
56+
}
57+
58+
protected SchemaAndValue processInt16(R record, Schema inputSchema, short input) {
59+
throw new UnsupportedOperationException("INT16 is not a supported type.");
60+
}
2861

29-
protected abstract SchemaAndValue processMap(R record, SchemaAndValue schemaAndValue);
62+
protected SchemaAndValue processInt32(R record, Schema inputSchema, int input) {
63+
throw new UnsupportedOperationException("INT32 is not a supported type.");
64+
}
65+
66+
protected SchemaAndValue processInt64(R record, Schema inputSchema, long input) {
67+
throw new UnsupportedOperationException("INT64 is not a supported type.");
68+
}
69+
70+
protected SchemaAndValue processBoolean(R record, Schema inputSchema, boolean input) {
71+
throw new UnsupportedOperationException("BOOLEAN is not a supported type.");
72+
}
73+
74+
protected SchemaAndValue processTimestamp(R record, Schema inputSchema, Date input) {
75+
throw new UnsupportedOperationException("Timestamp is not a supported type.");
76+
}
77+
78+
protected SchemaAndValue processDate(R record, Schema inputSchema, Date input) {
79+
throw new UnsupportedOperationException("Date is not a supported type.");
80+
}
81+
82+
protected SchemaAndValue processTime(R record, Schema inputSchema, Date input) {
83+
throw new UnsupportedOperationException("Time is not a supported type.");
84+
}
3085

31-
protected SchemaAndValue process(R record, SchemaAndValue schemaAndValue) {
86+
protected SchemaAndValue processDecimal(R record, Schema inputSchema, BigDecimal input) {
87+
throw new UnsupportedOperationException("Decimal is not a supported type.");
88+
}
89+
90+
protected SchemaAndValue processFloat64(R record, Schema inputSchema, double input) {
91+
throw new UnsupportedOperationException("FLOAT64 is not a supported type.");
92+
}
93+
94+
protected SchemaAndValue processFloat32(R record, Schema inputSchema, float input) {
95+
throw new UnsupportedOperationException("FLOAT32 is not a supported type.");
96+
}
97+
98+
protected SchemaAndValue processArray(R record, Schema inputSchema, List<Object> input) {
99+
throw new UnsupportedOperationException("ARRAY is not a supported type.");
100+
}
101+
102+
protected SchemaAndValue processMap(R record, Schema inputSchema, Map<Object, Object> input) {
103+
throw new UnsupportedOperationException("MAP is not a supported type.");
104+
}
105+
106+
private static final Schema OPTIONAL_TIMESTAMP = Timestamp.builder().optional().build();
107+
108+
protected SchemaAndValue process(R record, Schema inputSchema, Object input) {
32109
final SchemaAndValue result;
33-
if (schemaAndValue.value() instanceof Struct) {
34-
result = processStruct(record, schemaAndValue);
35-
} else if (schemaAndValue.value() instanceof Map) {
36-
result = processMap(record, schemaAndValue);
110+
111+
if (null == inputSchema && null == input) {
112+
return new SchemaAndValue(
113+
null,
114+
null
115+
);
116+
}
117+
118+
if (input instanceof Map) {
119+
log.trace("process() - Processing as map");
120+
result = processMap(record, (Map<String, Object>) input);
121+
return result;
122+
}
123+
124+
if (null == inputSchema) {
125+
log.trace("process() - Determining schema");
126+
inputSchema = SchemaHelper.schema(input);
127+
}
128+
129+
log.trace("process() - Input has as schema. schema = {}", inputSchema);
130+
if (Schema.Type.STRUCT == inputSchema.type()) {
131+
result = processStruct(record, inputSchema, (Struct) input);
132+
} else if (Timestamp.LOGICAL_NAME.equals(inputSchema.name())) {
133+
result = processTimestamp(record, inputSchema, (Date) input);
134+
} else if (org.apache.kafka.connect.data.Date.LOGICAL_NAME.equals(inputSchema.name())) {
135+
result = processDate(record, inputSchema, (Date) input);
136+
} else if (Time.LOGICAL_NAME.equals(inputSchema.name())) {
137+
result = processTime(record, inputSchema, (Date) input);
138+
} else if (Decimal.LOGICAL_NAME.equals(inputSchema.name())) {
139+
result = processDecimal(record, inputSchema, (BigDecimal) input);
140+
} else if (Schema.Type.STRING == inputSchema.type()) {
141+
result = processString(record, inputSchema, (String) input);
142+
} else if (Schema.Type.BYTES == inputSchema.type()) {
143+
result = processBytes(record, inputSchema, (byte[]) input);
144+
} else if (Schema.Type.INT8 == inputSchema.type()) {
145+
result = processInt8(record, inputSchema, (byte) input);
146+
} else if (Schema.Type.INT16 == inputSchema.type()) {
147+
result = processInt16(record, inputSchema, (short) input);
148+
} else if (Schema.Type.INT32 == inputSchema.type()) {
149+
result = processInt32(record, inputSchema, (int) input);
150+
} else if (Schema.Type.INT64 == inputSchema.type()) {
151+
result = processInt64(record, inputSchema, (long) input);
152+
} else if (Schema.Type.FLOAT32 == inputSchema.type()) {
153+
result = processFloat32(record, inputSchema, (float) input);
154+
} else if (Schema.Type.FLOAT64 == inputSchema.type()) {
155+
result = processFloat64(record, inputSchema, (double) input);
156+
} else if (Schema.Type.ARRAY == inputSchema.type()) {
157+
result = processArray(record, inputSchema, (List<Object>) input);
158+
} else if (Schema.Type.MAP == inputSchema.type()) {
159+
result = processMap(record, inputSchema, (Map<Object, Object>) input);
37160
} else {
38-
throw new UnsupportedOperationException();
161+
throw new UnsupportedOperationException(
162+
String.format(
163+
"Schema is not supported. type='%s' name='%s'",
164+
inputSchema.type(),
165+
inputSchema.name()
166+
)
167+
);
39168
}
169+
40170
return result;
41171
}
172+
173+
42174
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,141 @@
1+
/**
2+
* Copyright © 2017 Jeremy Custenborder ([email protected])
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
package com.github.jcustenborder.kafka.connect.transform.common;
17+
18+
import com.github.jcustenborder.kafka.connect.utils.config.Description;
19+
import com.github.jcustenborder.kafka.connect.utils.config.DocumentationTip;
20+
import com.github.jcustenborder.kafka.connect.utils.config.Title;
21+
import com.google.common.base.Strings;
22+
import org.apache.kafka.common.config.ConfigDef;
23+
import org.apache.kafka.connect.connector.ConnectRecord;
24+
import org.apache.kafka.connect.data.Field;
25+
import org.apache.kafka.connect.data.Schema;
26+
import org.apache.kafka.connect.data.SchemaAndValue;
27+
import org.apache.kafka.connect.data.SchemaBuilder;
28+
import org.apache.kafka.connect.data.Struct;
29+
import org.slf4j.Logger;
30+
import org.slf4j.LoggerFactory;
31+
32+
import java.util.HashMap;
33+
import java.util.Map;
34+
35+
public abstract class BytesToString<R extends ConnectRecord<R>> extends BaseTransformation<R> {
36+
private static final Logger log = LoggerFactory.getLogger(BytesToString.class);
37+
38+
@Override
39+
public ConfigDef config() {
40+
return BytesToStringConfig.config();
41+
}
42+
43+
BytesToStringConfig config;
44+
45+
@Override
46+
public void configure(Map<String, ?> settings) {
47+
this.config = new BytesToStringConfig(settings);
48+
}
49+
50+
@Override
51+
public void close() {
52+
53+
}
54+
55+
@Override
56+
protected SchemaAndValue processBytes(R record, Schema inputSchema, byte[] input) {
57+
final Schema outputSchema = inputSchema.isOptional() ? Schema.OPTIONAL_STRING_SCHEMA : Schema.STRING_SCHEMA;
58+
final String output = new String(input, this.config.charset);
59+
return new SchemaAndValue(outputSchema, output);
60+
}
61+
62+
Map<Schema, Schema> schemaCache = new HashMap<>();
63+
64+
@Override
65+
protected SchemaAndValue processStruct(R record, Schema inputSchema, Struct input) {
66+
final Schema schema = this.schemaCache.computeIfAbsent(inputSchema, s -> {
67+
final SchemaBuilder builder = SchemaBuilder.struct();
68+
if (!Strings.isNullOrEmpty(inputSchema.name())) {
69+
builder.name(inputSchema.name());
70+
}
71+
if (inputSchema.isOptional()) {
72+
builder.optional();
73+
}
74+
75+
for (Field field : inputSchema.fields()) {
76+
log.trace("processStruct() - processing '{}'", field.name());
77+
final Schema fieldSchema;
78+
if (this.config.fields.contains(field.name())) {
79+
fieldSchema = field.schema().isOptional() ?
80+
Schema.OPTIONAL_STRING_SCHEMA :
81+
Schema.STRING_SCHEMA;
82+
} else {
83+
fieldSchema = field.schema();
84+
}
85+
builder.field(field.name(), fieldSchema);
86+
}
87+
return builder.build();
88+
});
89+
90+
Struct struct = new Struct(schema);
91+
for (Field field : schema.fields()) {
92+
if (this.config.fields.contains(field.name())) {
93+
byte[] buffer = input.getBytes(field.name());
94+
struct.put(field.name(), new String(buffer, this.config.charset));
95+
} else {
96+
struct.put(field.name(), input.get(field.name()));
97+
}
98+
}
99+
return new SchemaAndValue(schema, struct);
100+
}
101+
102+
@Title("BytesToString(Key)")
103+
@Description("This transformation is used to convert a byte array to a string.")
104+
@DocumentationTip("This transformation is used to manipulate fields in the Key of the record.")
105+
public static class Key<R extends ConnectRecord<R>> extends BytesToString<R> {
106+
107+
@Override
108+
public R apply(R r) {
109+
final SchemaAndValue transformed = process(r, r.keySchema(), r.key());
110+
111+
return r.newRecord(
112+
r.topic(),
113+
r.kafkaPartition(),
114+
transformed.schema(),
115+
transformed.value(),
116+
r.valueSchema(),
117+
r.value(),
118+
r.timestamp()
119+
);
120+
}
121+
}
122+
123+
@Title("BytesToString(Value)")
124+
@Description("This transformation is used to convert a byte array to a string.")
125+
public static class Value<R extends ConnectRecord<R>> extends BytesToString<R> {
126+
@Override
127+
public R apply(R r) {
128+
final SchemaAndValue transformed = process(r, r.valueSchema(), r.value());
129+
130+
return r.newRecord(
131+
r.topic(),
132+
r.kafkaPartition(),
133+
r.keySchema(),
134+
r.key(),
135+
transformed.schema(),
136+
transformed.value(),
137+
r.timestamp()
138+
);
139+
}
140+
}
141+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
/**
2+
* Copyright © 2017 Jeremy Custenborder ([email protected])
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
package com.github.jcustenborder.kafka.connect.transform.common;
17+
18+
import com.github.jcustenborder.kafka.connect.utils.config.ConfigKeyBuilder;
19+
import org.apache.kafka.common.config.AbstractConfig;
20+
import org.apache.kafka.common.config.ConfigDef;
21+
22+
import java.nio.charset.Charset;
23+
import java.util.Collections;
24+
import java.util.HashSet;
25+
import java.util.List;
26+
import java.util.Map;
27+
import java.util.Set;
28+
29+
public class BytesToStringConfig extends AbstractConfig {
30+
public final Charset charset;
31+
public final Set<String> fields;
32+
33+
public static final String CHARSET_CONFIG = "charset";
34+
public static final String CHARSET_DOC = "The charset to use when creating the output string.";
35+
36+
public static final String FIELD_CONFIG = "fields";
37+
public static final String FIELD_DOC = "The fields to transform.";
38+
39+
40+
public BytesToStringConfig(Map<String, ?> settings) {
41+
super(config(), settings);
42+
String charset = getString(CHARSET_CONFIG);
43+
this.charset = Charset.forName(charset);
44+
List<String> fields = getList(FIELD_CONFIG);
45+
this.fields = new HashSet<>(fields);
46+
}
47+
48+
public static ConfigDef config() {
49+
return new ConfigDef()
50+
.define(
51+
ConfigKeyBuilder.of(CHARSET_CONFIG, ConfigDef.Type.STRING)
52+
.documentation(CHARSET_DOC)
53+
.defaultValue("UTF-8")
54+
.importance(ConfigDef.Importance.HIGH)
55+
.build()
56+
).define(
57+
ConfigKeyBuilder.of(FIELD_CONFIG, ConfigDef.Type.LIST)
58+
.documentation(FIELD_DOC)
59+
.defaultValue(Collections.emptyList())
60+
.importance(ConfigDef.Importance.HIGH)
61+
.build()
62+
);
63+
}
64+
65+
}

0 commit comments

Comments
 (0)