Skip to content

Commit 437612c

Browse files
xxlaykxxFiV0
andauthored
GH-87: [Vector] Add ExtensionWriter (#697)
Based on changes from apache/arrow#41731. ## What's Changed Added writer ExtensionWriter with 3 methods: - write method for writing values from Extension holders; - writeExtensionType method for writing values (arguments is Object because we don't know exact type); - addExtensionTypeFactory method - because the exact vector and value type are unknown, the user should create their own extension type vector, write for it, and ExtensionTypeFactory, which should map the vector and writer. Closes #87. Co-authored-by: Finn Völkel <[email protected]>
1 parent 222f30e commit 437612c

22 files changed

+728
-70
lines changed

vector/src/main/codegen/templates/AbstractFieldWriter.java

+22
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,16 @@ public void endEntry() {
107107
throw new IllegalStateException(String.format("You tried to end a map entry when you are using a ValueWriter of type %s.", this.getClass().getSimpleName()));
108108
}
109109

110+
public void write(ExtensionHolder var1) {
111+
this.fail("ExtensionType");
112+
}
113+
public void writeExtension(Object var1) {
114+
this.fail("ExtensionType");
115+
}
116+
public void addExtensionTypeWriterFactory(ExtensionTypeWriterFactory var1) {
117+
this.fail("ExtensionType");
118+
}
119+
110120
<#list vv.types as type><#list type.minor as minor><#assign name = minor.class?cap_first />
111121
<#assign fields = minor.fields!type.fields />
112122
<#assign friendlyType = (minor.friendlyType!minor.boxedType!type.boxedType) />
@@ -241,6 +251,18 @@ public MapWriter map(String name, boolean keysSorted) {
241251
fail("Map");
242252
return null;
243253
}
254+
255+
@Override
256+
public ExtensionWriter extension(String name, ArrowType arrowType) {
257+
fail("Extension");
258+
return null;
259+
}
260+
261+
@Override
262+
public ExtensionWriter extension(ArrowType arrowType) {
263+
fail("Extension");
264+
return null;
265+
}
244266
<#list vv.types as type><#list type.minor as minor>
245267
<#assign lowerName = minor.class?uncap_first />
246268
<#if lowerName == "int" ><#assign lowerName = "integer" /></#if>

vector/src/main/codegen/templates/AbstractPromotableFieldWriter.java

+10
Original file line numberDiff line numberDiff line change
@@ -293,6 +293,11 @@ public MapWriter map(boolean keysSorted) {
293293
return getWriter(MinorType.MAP, new ArrowType.Map(keysSorted));
294294
}
295295
296+
@Override
297+
public ExtensionWriter extension(ArrowType arrowType) {
298+
return getWriter(MinorType.EXTENSIONTYPE).extension(arrowType);
299+
}
300+
296301
@Override
297302
public StructWriter struct(String name) {
298303
return getWriter(MinorType.STRUCT).struct(name);
@@ -318,6 +323,11 @@ public MapWriter map(String name, boolean keysSorted) {
318323
return getWriter(MinorType.STRUCT).map(name, keysSorted);
319324
}
320325
326+
@Override
327+
public ExtensionWriter extension(String name, ArrowType arrowType) {
328+
return getWriter(MinorType.EXTENSIONTYPE).extension(name, arrowType);
329+
}
330+
321331
<#list vv.types as type><#list type.minor as minor>
322332
<#assign lowerName = minor.class?uncap_first />
323333
<#if lowerName == "int" ><#assign lowerName = "integer" /></#if>

vector/src/main/codegen/templates/BaseWriter.java

+31
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@ public interface StructWriter extends BaseWriter {
6161
6262
void copyReaderToField(String name, FieldReader reader);
6363
StructWriter struct(String name);
64+
ExtensionWriter extension(String name, ArrowType arrowType);
6465
ListWriter list(String name);
6566
ListWriter listView(String name);
6667
MapWriter map(String name);
@@ -79,6 +80,7 @@ public interface ListWriter extends BaseWriter {
7980
ListWriter listView();
8081
MapWriter map();
8182
MapWriter map(boolean keysSorted);
83+
ExtensionWriter extension(ArrowType arrowType);
8284
void copyReader(FieldReader reader);
8385
8486
<#list vv.types as type><#list type.minor as minor>
@@ -101,6 +103,35 @@ public interface MapWriter extends ListWriter {
101103
MapWriter value();
102104
}
103105

106+
public interface ExtensionWriter extends BaseWriter {
107+
108+
/**
109+
* Writes a null value.
110+
*/
111+
void writeNull();
112+
113+
/**
114+
* Writes value from the given extension holder.
115+
*
116+
* @param holder the extension holder to write
117+
*/
118+
void write(ExtensionHolder holder);
119+
120+
/**
121+
* Writes the given extension type value.
122+
*
123+
* @param value the extension type value to write
124+
*/
125+
void writeExtension(Object value);
126+
127+
/**
128+
* Adds the given extension type factory. This factory allows configuring writer implementations for specific ExtensionTypeVector.
129+
*
130+
* @param factory the extension type factory to add
131+
*/
132+
void addExtensionTypeWriterFactory(ExtensionTypeWriterFactory factory);
133+
}
134+
104135
public interface ScalarWriter extends
105136
<#list vv.types as type><#list type.minor as minor><#assign name = minor.class?cap_first /> ${name}Writer, </#list></#list> BaseWriter {}
106137

vector/src/main/codegen/templates/PromotableWriter.java

+14
Original file line numberDiff line numberDiff line change
@@ -285,6 +285,9 @@ protected void setWriter(ValueVector v) {
285285
case UNION:
286286
writer = new UnionWriter((UnionVector) vector, nullableStructWriterFactory);
287287
break;
288+
case EXTENSIONTYPE:
289+
writer = new UnionExtensionWriter((ExtensionTypeVector) vector);
290+
break;
288291
default:
289292
writer = type.getNewFieldWriter(vector);
290293
break;
@@ -316,6 +319,7 @@ protected boolean requiresArrowType(MinorType type) {
316319
|| type == MinorType.MAP
317320
|| type == MinorType.DURATION
318321
|| type == MinorType.FIXEDSIZEBINARY
322+
|| type == MinorType.EXTENSIONTYPE
319323
|| (type.name().startsWith("TIMESTAMP") && type.name().endsWith("TZ"));
320324
}
321325

@@ -536,6 +540,16 @@ public void writeLargeVarChar(String value) {
536540
getWriter(MinorType.LARGEVARCHAR).writeLargeVarChar(value);
537541
}
538542

543+
@Override
544+
public void writeExtension(Object value) {
545+
getWriter(MinorType.EXTENSIONTYPE).writeExtension(value);
546+
}
547+
548+
@Override
549+
public void addExtensionTypeWriterFactory(ExtensionTypeWriterFactory factory) {
550+
getWriter(MinorType.EXTENSIONTYPE).addExtensionTypeWriterFactory(factory);
551+
}
552+
539553
@Override
540554
public void allocate() {
541555
getWriter().allocate();

vector/src/main/codegen/templates/StructWriters.java

+26
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,9 @@ public class ${mode}StructWriter extends AbstractFieldWriter {
8383
fields.put(handleCase(child.getName()), writer);
8484
break;
8585
}
86+
case EXTENSIONTYPE:
87+
extension(child.getName(), child.getType());
88+
break;
8689
case UNION:
8790
FieldType fieldType = new FieldType(addVectorAsNullable, MinorType.UNION.getType(), null, null);
8891
UnionWriter writer = new UnionWriter(container.addOrGet(child.getName(), fieldType, UnionVector.class), getNullableStructWriterFactory());
@@ -159,6 +162,29 @@ public StructWriter struct(String name) {
159162
return writer;
160163
}
161164

165+
@Override
166+
public ExtensionWriter extension(String name, ArrowType arrowType) {
167+
String finalName = handleCase(name);
168+
FieldWriter writer = fields.get(finalName);
169+
if(writer == null){
170+
int vectorCount=container.size();
171+
FieldType fieldType = new FieldType(addVectorAsNullable, arrowType, null, null);
172+
ExtensionTypeVector vector = container.addOrGet(name, fieldType, ExtensionTypeVector.class);
173+
writer = new PromotableWriter(vector, container, getNullableStructWriterFactory());
174+
if(vectorCount != container.size()) {
175+
writer.allocate();
176+
}
177+
writer.setPosition(idx());
178+
fields.put(finalName, writer);
179+
} else {
180+
if (writer instanceof PromotableWriter) {
181+
// ensure writers are initialized
182+
((PromotableWriter)writer).getWriter(MinorType.EXTENSIONTYPE, arrowType);
183+
}
184+
}
185+
return (ExtensionWriter) writer;
186+
}
187+
162188
@Override
163189
public void close() throws Exception {
164190
clear();

vector/src/main/codegen/templates/UnionListWriter.java

+23
Original file line numberDiff line numberDiff line change
@@ -201,6 +201,17 @@ public MapWriter map(String name, boolean keysSorted) {
201201
return mapWriter;
202202
}
203203
204+
@Override
205+
public ExtensionWriter extension(ArrowType arrowType) {
206+
writer.extension(arrowType);
207+
return writer;
208+
}
209+
@Override
210+
public ExtensionWriter extension(String name, ArrowType arrowType) {
211+
ExtensionWriter extensionWriter = writer.extension(name, arrowType);
212+
return extensionWriter;
213+
}
214+
204215
<#if listName == "LargeList">
205216
@Override
206217
public void startList() {
@@ -323,6 +334,18 @@ public void writeNull() {
323334
}
324335
}
325336
337+
@Override
338+
public void writeExtension(Object value) {
339+
writer.writeExtension(value);
340+
}
341+
@Override
342+
public void addExtensionTypeWriterFactory(ExtensionTypeWriterFactory var1) {
343+
writer.addExtensionTypeWriterFactory(var1);
344+
}
345+
public void write(ExtensionHolder var1) {
346+
writer.write(var1);
347+
}
348+
326349
<#list vv.types as type>
327350
<#list type.minor as minor>
328351
<#assign name = minor.class?cap_first />

vector/src/main/codegen/templates/UnionMapWriter.java

+12
Original file line numberDiff line numberDiff line change
@@ -231,4 +231,16 @@ public MapWriter map() {
231231
return super.map();
232232
}
233233
}
234+
235+
@Override
236+
public ExtensionWriter extension(ArrowType type) {
237+
switch (mode) {
238+
case KEY:
239+
return entryWriter.extension(MapVector.KEY_NAME, type);
240+
case VALUE:
241+
return entryWriter.extension(MapVector.VALUE_NAME, type);
242+
default:
243+
return super.extension(type);
244+
}
245+
}
234246
}

vector/src/main/codegen/templates/UnionWriter.java

+20
Original file line numberDiff line numberDiff line change
@@ -213,6 +213,10 @@ public MapWriter asMap(ArrowType arrowType) {
213213
return getMapWriter(arrowType);
214214
}
215215

216+
private ExtensionWriter getExtensionWriter(ArrowType arrowType) {
217+
throw new UnsupportedOperationException("ExtensionTypes are not supported yet.");
218+
}
219+
216220
BaseWriter getWriter(MinorType minorType) {
217221
return getWriter(minorType, null);
218222
}
@@ -227,6 +231,8 @@ BaseWriter getWriter(MinorType minorType, ArrowType arrowType) {
227231
return getListViewWriter();
228232
case MAP:
229233
return getMapWriter(arrowType);
234+
case EXTENSIONTYPE:
235+
return getExtensionWriter(arrowType);
230236
<#list vv.types as type>
231237
<#list type.minor as minor>
232238
<#assign name = minor.class?cap_first />
@@ -460,6 +466,20 @@ public MapWriter map(String name, boolean keysSorted) {
460466
return getStructWriter().map(name, keysSorted);
461467
}
462468

469+
@Override
470+
public ExtensionWriter extension(ArrowType arrowType) {
471+
data.setType(idx(), MinorType.EXTENSIONTYPE);
472+
getListWriter().setPosition(idx());
473+
return getListWriter().extension(arrowType);
474+
}
475+
476+
@Override
477+
public ExtensionWriter extension(String name, ArrowType arrowType) {
478+
data.setType(idx(), MinorType.EXTENSIONTYPE);
479+
getStructWriter().setPosition(idx());
480+
return getStructWriter().extension(name, arrowType);
481+
}
482+
463483
<#list vv.types as type><#list type.minor as minor>
464484
<#assign lowerName = minor.class?uncap_first />
465485
<#if lowerName == "int" ><#assign lowerName = "integer" /></#if>
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
package org.apache.arrow.vector.complex.impl;
18+
19+
import org.apache.arrow.vector.ExtensionTypeVector;
20+
import org.apache.arrow.vector.types.pojo.Field;
21+
22+
/**
23+
* Base {@link AbstractFieldWriter} class for an {@link
24+
* org.apache.arrow.vector.ExtensionTypeVector}.
25+
*
26+
* @param <T> a specific {@link ExtensionTypeVector}.
27+
*/
28+
public class AbstractExtensionTypeWriter<T extends ExtensionTypeVector>
29+
extends AbstractFieldWriter {
30+
protected final T vector;
31+
32+
public AbstractExtensionTypeWriter(T vector) {
33+
this.vector = vector;
34+
}
35+
36+
@Override
37+
public Field getField() {
38+
return this.vector.getField();
39+
}
40+
41+
@Override
42+
public int getValueCapacity() {
43+
return this.vector.getValueCapacity();
44+
}
45+
46+
@Override
47+
public void allocate() {
48+
this.vector.allocateNew();
49+
}
50+
51+
@Override
52+
public void close() {
53+
this.vector.close();
54+
}
55+
56+
@Override
57+
public void clear() {
58+
this.vector.clear();
59+
}
60+
61+
@Override
62+
public void writeNull() {
63+
this.vector.setNull(getPosition());
64+
this.vector.setValueCount(getPosition() + 1);
65+
}
66+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
package org.apache.arrow.vector.complex.impl;
18+
19+
import org.apache.arrow.vector.ExtensionTypeVector;
20+
import org.apache.arrow.vector.complex.writer.FieldWriter;
21+
22+
/**
23+
* A factory interface for creating instances of {@link ExtensionTypeWriter}. This factory allows
24+
* configuring writer implementations for specific {@link ExtensionTypeVector}.
25+
*
26+
* @param <T> the type of writer implementation for a specific {@link ExtensionTypeVector}.
27+
*/
28+
public interface ExtensionTypeWriterFactory<T extends FieldWriter> {
29+
30+
/**
31+
* Returns an instance of the writer implementation for the given {@link ExtensionTypeVector}.
32+
*
33+
* @param vector the {@link ExtensionTypeVector} for which the writer implementation is to be
34+
* returned.
35+
* @return an instance of the writer implementation for the given {@link ExtensionTypeVector}.
36+
*/
37+
T getWriterImpl(ExtensionTypeVector vector);
38+
}

0 commit comments

Comments
 (0)