Skip to content

Commit 74049bb

Browse files
author
Mike Skells
committed
Add additional timestamp sources, for
from a header value from a data field via a custom extractors Remove a few simple classes and make a DataExtractor to read things from the `sinkRecord` and few tidyups
1 parent ec00122 commit 74049bb

File tree

15 files changed

+530
-61
lines changed

15 files changed

+530
-61
lines changed

commons/src/main/java/io/aiven/kafka/connect/common/config/AivenCommonConfig.java

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -202,8 +202,10 @@ public final ZoneId getFilenameTimezone() {
202202
}
203203

204204
public final TimestampSource getFilenameTimestampSource() {
205-
return TimestampSource.of(getFilenameTimezone(),
206-
TimestampSource.Type.of(getString(FILE_NAME_TIMESTAMP_SOURCE)));
205+
return new TimestampSource.Builder()
206+
.configuration(getString(FILE_NAME_TIMESTAMP_SOURCE))
207+
.zoneId(getFilenameTimezone())
208+
.build();
207209
}
208210

209211
public final int getMaxRecordsPerFile() {

commons/src/main/java/io/aiven/kafka/connect/common/config/TimestampSource.java

Lines changed: 149 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -20,82 +20,191 @@
2020
import java.time.ZoneId;
2121
import java.time.ZoneOffset;
2222
import java.time.ZonedDateTime;
23+
import java.util.Locale;
24+
import java.util.Objects;
2325

26+
import io.aiven.kafka.connect.common.config.extractors.DataExtractor;
27+
import io.aiven.kafka.connect.common.config.extractors.HeaderValueExtractor;
28+
import io.aiven.kafka.connect.common.config.extractors.SimpleValuePath;
2429
import org.apache.kafka.connect.sink.SinkRecord;
2530

2631
public interface TimestampSource {
2732

2833
ZonedDateTime time(SinkRecord record);
2934

30-
@SuppressWarnings("PMD.ShortMethodName")
31-
static TimestampSource of(final Type extractorType) {
32-
return of(ZoneOffset.UTC, extractorType);
33-
}
34-
35-
@SuppressWarnings("PMD.ShortMethodName")
36-
static TimestampSource of(final ZoneId zoneId, final Type extractorType) {
37-
switch (extractorType) {
38-
case WALLCLOCK :
39-
return new WallclockTimestampSource(zoneId);
40-
case EVENT :
41-
return new EventTimestampSource(zoneId);
42-
default :
43-
throw new IllegalArgumentException(
44-
String.format("Unsupported timestamp extractor type: %s", extractorType));
45-
}
46-
}
4735

4836
Type type();
4937

5038
enum Type {
5139

52-
WALLCLOCK, EVENT;
40+
WALLCLOCK,
41+
EVENT,
42+
HEADER,
43+
SIMPLE_DATA,
44+
CUSTOM
45+
46+
}
47+
class Builder {
48+
private ZoneId zoneId = ZoneOffset.UTC;
49+
private Type type;
50+
private String additionalParameters;
51+
52+
/**
53+
* set the zoneId to be used. If this method isnt called, the default is UTC
54+
* @return this
55+
* @throws NullPointerException if zoneId is null
56+
*/
57+
public Builder zoneId(final ZoneId zoneId) {
58+
Objects.requireNonNull(zoneId, "zoneId cannot be null");
59+
this.zoneId = zoneId;
60+
return this;
61+
}
5362

54-
@SuppressWarnings("PMD.ShortMethodName")
55-
public static Type of(final String name) {
56-
for (final Type t : Type.values()) {
57-
if (t.name().equalsIgnoreCase(name)) {
58-
return t;
59-
}
63+
/**
64+
* sets the type of the timestamp source and associated parameters (if needed)
65+
* The format of the configuration is <type>[:<data>]
66+
* i.e. the type name, optionally followed by data.
67+
* <br>
68+
* The data is type specific
69+
* <p>
70+
* For type WALLCLOCK or EVENT, no data is allowed
71+
* </p>
72+
* <p>
73+
* For type SIMPLE_DATA, data is required, and is a '.' separated series of
74+
* terms in the path
75+
* <br>If the '.' is something that should be included in the terms, and you
76+
* want to use a different separator, then you can specify a '.' as the first character, and the separator as the
77+
* second character, and then the path is the rest of the string
78+
* <br>For example "SIMPLE_DATA:a.b.c" would use into a path with
79+
* terms "a", "b", "c"
80+
* <br>For example "SIMPLE_DATA:.:a.b:c" would use a path with terms "a.b", "c"
81+
* </p>
82+
* For type HEADER, data is required, and is the name of the header to extract
83+
* <br>For example "HEADER:foo" would use to "foo" header (or null if its not available in the SinkRecord
84+
* </p>
85+
* </p>
86+
* For type CUSTOM, data is required, and is the name of the class to use, and any additional parameters for that custom time source.
87+
* The specified class must implement the TimestampSource interface and have a public constructor that takes a String and a ZoneId. Fort the meaning of the data, see the documentation of the custom class.
88+
* <br>For example "CUSTOM:my.custom.timesource:some more data" would be similar to calling new my.custom.timesource("some more data", zoneId)
89+
* </p>
90+
*
91+
92+
* @return this
93+
*/
94+
public Builder configuration(final String configuration) {
95+
final String[] parts = configuration.split(":", 2);
96+
final String typeName = parts[0];
97+
try {
98+
this.type = Type.valueOf(typeName.toUpperCase(Locale.ENGLISH));
99+
} catch (final IllegalArgumentException e) {
100+
throw new IllegalArgumentException("Unknown timestamp source: "+typeName);
101+
}
102+
103+
this.additionalParameters = parts.length > 1 ? parts[1] : null;
104+
return this;
105+
}
106+
107+
public TimestampSource build() {
108+
switch (type) {
109+
case WALLCLOCK:
110+
if (additionalParameters != null) {
111+
throw new IllegalArgumentException("Wallclock timestamp source does not support additionalParameters");
112+
}
113+
return new WallclockTimestampSource(zoneId);
114+
case EVENT:
115+
if (additionalParameters != null) {
116+
throw new IllegalArgumentException("Event timestamp source does not support additionalParameters");
117+
}
118+
return new EventTimestampSource(zoneId);
119+
case SIMPLE_DATA:
120+
if (additionalParameters == null) {
121+
throw new IllegalArgumentException("Data timestamp source requires additionalParameters");
122+
}
123+
return new SimpleTimestampSource(zoneId, Type.SIMPLE_DATA, SimpleValuePath.parse(additionalParameters));
124+
case HEADER:
125+
if (additionalParameters == null) {
126+
throw new IllegalArgumentException("Header timestamp source requires additionalParameters");
127+
}
128+
return new SimpleTimestampSource(zoneId, Type.HEADER, new HeaderValueExtractor(additionalParameters));
129+
case CUSTOM:
130+
if (additionalParameters == null) {
131+
throw new IllegalArgumentException("Header timestamp source requires additionalParameters");
132+
}
133+
final String[] parts = additionalParameters.split(":", 2);
134+
final String className = parts[0];
135+
final String params = parts.length > 1 ? parts[1] : null;
136+
try {
137+
final Class<?> clazz = Class.forName(className);
138+
return (TimestampSource) clazz.getConstructor(String.class, ZoneId.class).newInstance(params, zoneId);
139+
} catch (final Exception e) {
140+
throw new IllegalArgumentException("Failed to create custom timestamp source", e);
141+
}
142+
default:
143+
throw new IllegalArgumentException(
144+
String.format("Unsupported timestamp extractor type: %s", type));
60145
}
61-
throw new IllegalArgumentException(String.format("Unknown timestamp source: %s", name));
62146
}
63147

64148
}
65149

66-
final class WallclockTimestampSource implements TimestampSource {
67-
private final ZoneId zoneId;
150+
class SimpleTimestampSource implements TimestampSource {
151+
protected final ZoneId zoneId;
152+
private final Type type;
153+
private final DataExtractor dataExtractor;
68154

69-
protected WallclockTimestampSource(final ZoneId zoneId) {
155+
protected SimpleTimestampSource(final ZoneId zoneId, final Type type, DataExtractor dataExtractor) {
70156
this.zoneId = zoneId;
157+
this.type = type;
158+
this.dataExtractor = dataExtractor;
71159
}
72160

73161
@Override
74-
public ZonedDateTime time(final SinkRecord record) {
75-
return ZonedDateTime.now(zoneId);
162+
public Type type() {
163+
return type;
76164
}
77165

78166
@Override
79-
public Type type() {
80-
return Type.WALLCLOCK;
167+
public ZonedDateTime time(SinkRecord record) {
168+
return fromRawTime(dataExtractor.extractDataFrom(record));
81169
}
82-
}
83170

84-
final class EventTimestampSource implements TimestampSource {
85-
private final ZoneId zoneId;
171+
protected ZonedDateTime fromRawTime(final Object rawValue) {
172+
if (rawValue == null) {
173+
return null;
174+
} else if (rawValue instanceof Long) {
175+
return withZone((Long) rawValue);
176+
} else if (rawValue instanceof ZonedDateTime) {
177+
return (ZonedDateTime) rawValue;
178+
} else if (rawValue instanceof Instant) {
179+
return withZone(((Instant) rawValue).toEpochMilli());
180+
}
181+
return null;
182+
}
86183

87-
protected EventTimestampSource(final ZoneId zoneId) {
88-
this.zoneId = zoneId;
184+
protected ZonedDateTime withZone(final long timestamp) {
185+
return ZonedDateTime.ofInstant(Instant.ofEpochMilli(timestamp), zoneId);
186+
}
187+
}
188+
189+
final class WallclockTimestampSource extends SimpleTimestampSource {
190+
WallclockTimestampSource(final ZoneId zoneId) {
191+
super(zoneId, Type.WALLCLOCK, null);
89192
}
90193

91194
@Override
92195
public ZonedDateTime time(final SinkRecord record) {
93-
return ZonedDateTime.ofInstant(Instant.ofEpochMilli(record.timestamp()), zoneId);
196+
return ZonedDateTime.now(zoneId);
197+
}
198+
}
199+
200+
final class EventTimestampSource extends SimpleTimestampSource {
201+
EventTimestampSource(final ZoneId zoneId) {
202+
super(zoneId, Type.EVENT, null);
94203
}
95204

96205
@Override
97-
public Type type() {
98-
return Type.EVENT;
206+
public ZonedDateTime time(final SinkRecord record) {
207+
return withZone(record.timestamp());
99208
}
100209
}
101210
}
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
package io.aiven.kafka.connect.common.config.extractors;
2+
3+
import org.apache.kafka.connect.sink.SinkRecord;
4+
5+
public interface DataExtractor {
6+
7+
Object extractDataFrom(final SinkRecord record);
8+
}
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
package io.aiven.kafka.connect.common.config.extractors;
2+
3+
import org.apache.kafka.connect.header.Header;
4+
import org.apache.kafka.connect.sink.SinkRecord;
5+
6+
public class HeaderValueExtractor implements DataExtractor {
7+
private final String headerKey;
8+
9+
public HeaderValueExtractor(final String headerKey) {
10+
this.headerKey = headerKey;
11+
}
12+
13+
public Object extractDataFrom(final SinkRecord record) {
14+
final Header header = record.headers().lastWithName(headerKey);
15+
return header == null ? null : header.value();
16+
}
17+
}
Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
/*
2+
* Copyright 2024 Aiven Oy
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
package io.aiven.kafka.connect.common.config.extractors;
17+
18+
19+
import org.apache.kafka.connect.data.Field;
20+
import org.apache.kafka.connect.data.Schema;
21+
import org.apache.kafka.connect.data.Struct;
22+
import org.apache.kafka.connect.sink.SinkRecord;
23+
24+
import java.util.Arrays;
25+
import java.util.List;
26+
import java.util.Map;
27+
import java.util.regex.Pattern;
28+
29+
public final class SimpleValuePath implements DataExtractor {
30+
private final String[] terms;
31+
32+
private SimpleValuePath(final String[] terms) {
33+
this.terms = terms;
34+
}
35+
36+
/**
37+
* Parse a path definition string into a Path object. The path definition string is a '.' separated series of
38+
* strings, which are the terms in the path If the '.' is something that should be included in the terms, and you
39+
* want to use a different separator, then you can specify a '.' as the first character, and the separator as the
40+
* second character, and then the path is the rest of the string For example "a.b.c" would parse into a path with
41+
* terms "a", "b", "c" For example ".:a.b:c" would parse into a path with terms "a.b", "c"
42+
*
43+
* @return a PathAccess that can access a value in a nested structure
44+
*/
45+
public static SimpleValuePath parse(final String pathDefinition) {
46+
final String pathDescription;
47+
final String pathSeparator;
48+
if (pathDefinition.length() > 1 && pathDefinition.charAt(0) == '.' ) {
49+
pathDescription = pathDefinition.substring(2);
50+
pathSeparator = pathDefinition.substring(1,2);
51+
} else {
52+
pathDescription = pathDefinition;
53+
pathSeparator = ".";
54+
}
55+
return new SimpleValuePath(Pattern.compile(pathSeparator, Pattern.LITERAL).split(pathDescription));
56+
}
57+
58+
public Object extractDataFrom(final SinkRecord record) {
59+
Object current = record.value();
60+
61+
for (final String term : terms) {
62+
if (current == null) {
63+
return null;
64+
}
65+
if (current instanceof Struct) {
66+
final Struct struct = (Struct) current;
67+
final Schema schema = struct.schema();
68+
final Field field = schema.field(term);
69+
if (field == null) {
70+
return null;
71+
}
72+
current = struct.get(field);
73+
} else if (current instanceof Map) {
74+
current = ((Map<?, ?>) current).get(term);
75+
} else if (current instanceof List) {
76+
try {
77+
current = ((List<?>) current).get(Integer.parseInt(term));
78+
} catch (NumberFormatException|IndexOutOfBoundsException e) {
79+
return null;
80+
}
81+
} else {
82+
return null;
83+
}
84+
}
85+
return current;
86+
}
87+
88+
@Override
89+
public String toString() {
90+
return "Path[terms=" + Arrays.toString( terms) +"]";
91+
}
92+
}

commons/src/main/java/io/aiven/kafka/connect/common/config/validators/TimestampSourceValidator.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,9 @@ public class TimestampSourceValidator implements ConfigDef.Validator {
2626
@Override
2727
public void ensureValid(final String name, final Object value) {
2828
try {
29-
TimestampSource.Type.of(value.toString());
29+
new TimestampSource.Builder()
30+
.configuration(value.toString())
31+
.build();
3032
} catch (final Exception e) { // NOPMD AvoidCatchingGenericException
3133
throw new ConfigException(name, value, e.getMessage());
3234
}

0 commit comments

Comments
 (0)