Skip to content

[Avro] Remove dependencies upon Jackson 1.X and Avro's JacksonUtils #195

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Feb 29, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import java.util.*;

import org.codehaus.jackson.JsonNode;
import com.fasterxml.jackson.databind.JsonNode;

/**
* Factory class for various default providers
Expand All @@ -19,7 +19,7 @@ public static AvroFieldReader createDefaulter(String name,
case VALUE_NULL:
return new ScalarDefaults.NullDefaults(name);
case VALUE_NUMBER_FLOAT:
switch (defaultAsNode.getNumberType()) {
switch (defaultAsNode.numberType()) {
case FLOAT:
return new ScalarDefaults.FloatDefaults(name, (float) defaultAsNode.asDouble());
case DOUBLE:
Expand All @@ -28,7 +28,7 @@ public static AvroFieldReader createDefaulter(String name,
return new ScalarDefaults.DoubleDefaults(name, defaultAsNode.asDouble());
}
case VALUE_NUMBER_INT:
switch (defaultAsNode.getNumberType()) {
switch (defaultAsNode.numberType()) {
case INT:
return new ScalarDefaults.FloatDefaults(name, defaultAsNode.asInt());
case BIG_INTEGER: // TODO: maybe support separately?
Expand All @@ -40,7 +40,7 @@ public static AvroFieldReader createDefaulter(String name,
return new ScalarDefaults.StringDefaults(name, defaultAsNode.asText());
case START_OBJECT:
{
Iterator<Map.Entry<String,JsonNode>> it = defaultAsNode.getFields();
Iterator<Map.Entry<String,JsonNode>> it = defaultAsNode.fields();
List<AvroFieldReader> readers = new ArrayList<AvroFieldReader>();
while (it.hasNext()) {
Map.Entry<String,JsonNode> entry = it.next();
Expand All @@ -64,7 +64,7 @@ public static AvroFieldReader createDefaulter(String name,

// 23-Jul-2019, tatu: With Avro 1.9, likely changed to use "raw" JDK containers?
// Code would look more like this:
/*
/*
public static AvroFieldReader createDefaulter(String name,
Object defaultValue)
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
import java.util.*;

import org.apache.avro.Schema;
import org.apache.avro.util.internal.JacksonUtils;

import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.dataformat.avro.deser.ScalarDecoder.*;
import com.fasterxml.jackson.dataformat.avro.schema.AvroSchemaHelper;

Expand All @@ -22,6 +22,7 @@ public abstract class AvroReaderFactory
protected final static ScalarDecoder READER_LONG = new LongReader();
protected final static ScalarDecoder READER_NULL = new NullReader();
protected final static ScalarDecoder READER_STRING = new StringReader();
private final static ObjectMapper DEFAULT_MAPPER = new ObjectMapper();

/**
* To resolve cyclic types, need to keep track of resolved named
Expand Down Expand Up @@ -56,24 +57,24 @@ public ScalarDecoder createScalarValueDecoder(Schema type)
switch (type.getType()) {
case BOOLEAN:
return READER_BOOLEAN;
case BYTES:
case BYTES:
return READER_BYTES;
case DOUBLE:
case DOUBLE:
return READER_DOUBLE;
case ENUM:
case ENUM:
return new EnumDecoder(AvroSchemaHelper.getFullName(type), type.getEnumSymbols());
case FIXED:
case FIXED:
return new FixedDecoder(type.getFixedSize(), AvroSchemaHelper.getFullName(type));
case FLOAT:
case FLOAT:
return READER_FLOAT;
case INT:
if (AvroSchemaHelper.getTypeId(type) != null) {
return new IntReader(AvroSchemaHelper.getTypeId(type));
}
return READER_INT;
case LONG:
case LONG:
return READER_LONG;
case NULL:
case NULL:
return READER_NULL;
case STRING:
if (AvroSchemaHelper.getTypeId(type) != null) {
Expand Down Expand Up @@ -127,7 +128,7 @@ public AvroStructureReader createReader(Schema schema)
switch (schema.getType()) {
case ARRAY:
return createArrayReader(schema);
case MAP:
case MAP:
return createMapReader(schema);
case RECORD:
return createRecordReader(schema);
Expand Down Expand Up @@ -252,7 +253,7 @@ public AvroStructureReader createReader(Schema writerSchema, Schema readerSchema
switch (writerSchema.getType()) {
case ARRAY:
return createArrayReader(writerSchema, readerSchema);
case MAP:
case MAP:
return createMapReader(writerSchema, readerSchema);
case RECORD:
return createRecordReader(writerSchema, readerSchema);
Expand Down Expand Up @@ -303,7 +304,7 @@ protected AvroStructureReader createRecordReader(Schema writerSchema, Schema rea
final List<Schema.Field> writerFields = writerSchema.getFields();

// but first: find fields that only exist in reader-side and need defaults,
// and remove those from
// and remove those from
Map<String,Schema.Field> readerFields = new HashMap<String,Schema.Field>();
List<Schema.Field> defaultFields = new ArrayList<Schema.Field>();
{
Expand All @@ -320,7 +321,7 @@ protected AvroStructureReader createRecordReader(Schema writerSchema, Schema rea
}
}
}

// note: despite changes, we will always have known number of field entities,
// ones from writer schema -- some may skip, but there's entry there
AvroFieldReader[] fieldReaders = new AvroFieldReader[writerFields.size()
Expand All @@ -339,12 +340,13 @@ protected AvroStructureReader createRecordReader(Schema writerSchema, Schema rea
: createFieldReader(readerField.name(),
writerField.schema(), readerField.schema());
}

// Any defaults to consider?
if (!defaultFields.isEmpty()) {
for (Schema.Field defaultField : defaultFields) {
AvroFieldReader fr =
AvroFieldDefaulters.createDefaulter(defaultField.name(), JacksonUtils.toJsonNode(defaultField.defaultVal()));
AvroFieldReader fr = AvroFieldDefaulters.createDefaulter(defaultField.name(),
AvroSchemaHelper.parseDefaultValue(defaultField.defaultVal())
);
if (fr == null) {
throw new IllegalArgumentException("Unsupported default type: "+defaultField.schema().getType());
}
Expand All @@ -359,9 +361,9 @@ protected AvroStructureReader createUnionReader(Schema writerSchema, Schema read
final List<Schema> types = writerSchema.getTypes();
AvroStructureReader[] typeReaders = new AvroStructureReader[types.size()];
int i = 0;

// !!! TODO: actual resolution !!!

for (Schema type : types) {
typeReaders[i++] = createReader(type);
}
Expand Down Expand Up @@ -414,7 +416,7 @@ private Schema _verifyMatchingStructure(Schema readerSchema, Schema writerSchema
// in case there are multiple alternatives of same structured type.
// But since that is quite non-trivial let's wait for a good example of actual
// usage before tackling that.

if (actualType == Schema.Type.UNION) {
for (Schema sch : readerSchema.getTypes()) {
if (sch.getType() == expectedType) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,15 +14,20 @@
import org.apache.avro.specific.SpecificData;

import com.fasterxml.jackson.core.JsonParser;
import com.fasterxml.jackson.databind.BeanDescription;
import com.fasterxml.jackson.databind.JavaType;
import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.*;
import com.fasterxml.jackson.databind.introspect.AnnotatedClass;
import com.fasterxml.jackson.databind.introspect.AnnotatedConstructor;
import com.fasterxml.jackson.databind.jsonFormatVisitors.JsonFormatTypes;
import com.fasterxml.jackson.databind.util.ClassUtil;

public abstract class AvroSchemaHelper
{
/**
* Dedicated mapper for handling default values (String &lt;-&gt; JsonNode &lt;-&gt; Object)
*/
private static final ObjectMapper DEFAULT_VALUE_MAPPER = new ObjectMapper();

/**
* Constant used by native Avro Schemas for indicating more specific
* physical class of a value; referenced indirectly to reduce direct
Expand Down Expand Up @@ -317,6 +322,10 @@ public static String getTypeId(Schema schema) {
/**
* Returns the full name of a schema; This is similar to {@link Schema#getFullName()}, except that it properly handles namespaces for
* nested classes. (<code>package.name.ClassName$NestedClassName</code> instead of <code>package.name.ClassName$.NestedClassName</code>)
* <p>
* <b>WARNING!</b> This method has to probe for nested classes in order to resolve whether or not a schema references a top-level
* class or a nested class and return the corresponding name for each.
* </p>
*/
public static String getFullName(Schema schema) {
switch (schema.getType()) {
Expand All @@ -332,9 +341,44 @@ public static String getFullName(Schema schema) {
return namespace + name;
}
StringBuilder sb = new StringBuilder(1 + namespace.length() + name.length());
return sb.append(namespace).append('.').append(name).toString();
default:
// Check if this is a nested class
String nestedClassName = sb.append(namespace).append('$').append(name).toString();
try {
Class.forName(nestedClassName);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@baharclerode assuming most common case is NOT inner class, would it make sense to try loading "regular" class first, inverting logic? Only asking since there's quite a bit of overhead for exception handling and possibly other corner cases for trying to load non-existing class.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think so; Either way, we're always doing at least 1 Class.forName(); Either we check for the non-nested class, and assume if that fails that it's nested, or we always check for the nested class, and if that fails, assume that it's not-nested.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Alas, looks like this lead to a major performance issue for some usage (see #219 for attempts to work around the issue). I assume that case of finding class is relatively cheap, but that failure is enormously expensive (as that is the "rare" case being optimized against), depending on complexity of classpath for application.

return nestedClassName;
} catch (ClassNotFoundException e) {
// Could not find a nested class, must be a regular class
sb.setLength(0);
return sb.append(namespace).append('.').append(name).toString();
}
default:
return schema.getType().getName();
}
}

public static JsonNode parseDefaultValue(Object defaultValue) {
return DEFAULT_VALUE_MAPPER.convertValue(defaultValue, JsonNode.class);
}

public static Object convertDefaultValueToObject(JsonNode defaultJsonValue) {
return DEFAULT_VALUE_MAPPER.convertValue(defaultJsonValue, Object.class);
}

/**
* Converts a default value represented as a string (such as from a schema specification) into a JsonNode for further handling.
*
* @param defaultValue The default value to parse, in the form of a JSON string
* @return a parsed JSON representation of the default value
* @throws JsonMappingException If {@code defaultValue} is not valid JSON
*/
public static JsonNode parseDefaultValue(String defaultValue) throws JsonMappingException {
if (defaultValue == null) {
return null;
}
try {
return DEFAULT_VALUE_MAPPER.readTree(defaultValue);
} catch (JsonProcessingException e) {
throw new JsonMappingException(null, "Failed to parse default value", e);
}
}
}
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
package com.fasterxml.jackson.dataformat.avro.schema;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
Expand All @@ -9,9 +8,6 @@
import org.apache.avro.Schema.Type;
import org.apache.avro.reflect.AvroMeta;
import org.apache.avro.reflect.AvroSchema;
import org.apache.avro.util.internal.JacksonUtils;
import org.codehaus.jackson.JsonNode;
import org.codehaus.jackson.map.ObjectMapper;

import com.fasterxml.jackson.databind.*;
import com.fasterxml.jackson.databind.jsonFormatVisitors.JsonFormatVisitable;
Expand All @@ -36,9 +32,9 @@ public class RecordVisitor
protected final boolean _overridden;

protected Schema _avroSchema;

protected List<Schema.Field> _fields = new ArrayList<Schema.Field>();

public RecordVisitor(SerializerProvider p, JavaType type, DefinedSchemas schemas)
{
super(p);
Expand Down Expand Up @@ -75,7 +71,7 @@ public RecordVisitor(SerializerProvider p, JavaType type, DefinedSchemas schemas
}
schemas.addSchema(type, _avroSchema);
}

@Override
public Schema builtAvroSchema() {
if (!_overridden) {
Expand All @@ -90,7 +86,7 @@ public Schema builtAvroSchema() {
/* JsonObjectFormatVisitor implementation
/**********************************************************
*/

@Override
public void property(BeanProperty writer) throws JsonMappingException
{
Expand Down Expand Up @@ -142,7 +138,7 @@ public void optionalProperty(String name, JsonFormatVisitable handler,
/* Internal methods
/**********************************************************************
*/

protected Schema.Field schemaFieldForWriter(BeanProperty prop, boolean optional) throws JsonMappingException
{
Schema writerSchema;
Expand Down Expand Up @@ -187,10 +183,10 @@ protected Schema.Field schemaFieldForWriter(BeanProperty prop, boolean optional)
writerSchema = AvroSchemaHelper.unionWithNull(writerSchema);
}
}
JsonNode defaultValue = parseJson(prop.getMetadata().getDefaultValue());
JsonNode defaultValue = AvroSchemaHelper.parseDefaultValue(prop.getMetadata().getDefaultValue());
writerSchema = reorderUnionToMatchDefaultType(writerSchema, defaultValue);
Schema.Field field = new Schema.Field(prop.getName(), writerSchema, prop.getMetadata().getDescription(),
JacksonUtils.toObject(defaultValue));
AvroSchemaHelper.convertDefaultValueToObject(defaultValue));

AvroMeta meta = prop.getAnnotation(AvroMeta.class);
if (meta != null) {
Expand All @@ -206,29 +202,6 @@ protected Schema.Field schemaFieldForWriter(BeanProperty prop, boolean optional)
return field;
}

/**
* Parses a JSON-encoded string for use as the default value of a field
*
* @param defaultValue
* Default value as a JSON-encoded string
*
* @return Jackson V1 {@link JsonNode} for use as the default value in a {@link Schema.Field}
*
* @throws JsonMappingException
* if {@code defaultValue} is not valid JSON
*/
protected JsonNode parseJson(String defaultValue) throws JsonMappingException {
if (defaultValue == null) {
return null;
}
ObjectMapper mapper = new ObjectMapper();
try {
return mapper.readTree(defaultValue);
} catch (IOException e) {
throw JsonMappingException.from(getProvider(), "Unable to parse default value as JSON: " + defaultValue, e);
}
}

/**
* A union schema with a default value must always have the schema branch corresponding to the default value first, or Avro will print a
* warning complaining that the default value is not compatible. If {@code schema} is a {@link Schema.Type#UNION UNION} schema and
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,15 @@ protected Schema createSchema(Type type, Map<String, Schema> names) {
/* 14-Jun-2018, tatu: This is 99.9% certainly wrong; IDE gives warnings and
* it just... doesn't fly. Either keys are NOT Strings or...
*/
/*
* 26-Jun-2019, baharclerode: The keys are NOT always strings. It's a horrible hack I used to enable support for generics
* in the apache implementation because otherwise it flat out doesn't support generic types correctly except for the handful
* that they hand-coded support for, resulting in oddities like Set<T> not working, or List<T> working but LinkedList<T> not
* working. This regression suite is a lot simpler when we don't have to disable half the tests because the reference
* implementation doesn't support them.
*
* Note the "((Map) names).put(...)" above this else/if case.
*/
if (names.containsKey(type)) {
return names.get(type);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ static class RecordWithDefaults {
@AvroDefault("1234")
public Integer intField;
@AvroDefault("true")
public Integer booleanField;
public Boolean booleanField;
}

@Test
Expand Down