Skip to content

Commit 713a6ab

Browse files
authored
Add WRITE_MINIMAL_DOUBLES feature to CBORGenerator (#356)
Enabling WRITE_MINIMAL_DOUBLES works similarly to WRITE_MINIMAL_INTS, choosing to use a smaller data representation (single-precision float) when the conversion will result in no loss of data. This can provide a substantial savings for data that contains many doubles that don't require full precision. Cleaned up some of the writing code for floats and doubles to avoid code duplication, and reserve the proper amount of space for output for both types.
1 parent e9abe20 commit 713a6ab

File tree

3 files changed

+142
-49
lines changed

3 files changed

+142
-49
lines changed

cbor/src/main/java/com/fasterxml/jackson/dataformat/cbor/CBORGenerator.java

Lines changed: 63 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,19 @@ public enum Feature implements FormatFeature {
102102
* @since 2.15
103103
*/
104104
STRINGREF(false),
105+
106+
/**
107+
* Feature that determines whether generator should try to write doubles
108+
* as floats: if {@code true}, will write a {@code double} as a 4-byte float if no
109+
* precision loss will occur; if {@code false}, will always write a {@code double}
110+
* as an 8-byte double.
111+
* <p>
112+
* Default value is {@code false} meaning that doubles will always be written as
113+
* 8-byte values.
114+
*
115+
* @since 2.15
116+
*/
117+
WRITE_MINIMAL_DOUBLES(false),
105118
;
106119

107120
protected final boolean _defaultState;
@@ -177,6 +190,9 @@ public int getMask() {
177190

178191
protected boolean _cfgMinimalInts;
179192

193+
// @since 2.15
194+
protected boolean _cfgMinimalDoubles;
195+
180196
/*
181197
/**********************************************************
182198
/* Output state
@@ -275,6 +291,7 @@ public CBORGenerator(IOContext ctxt, int stdFeatures, int formatFeatures,
275291
_streamWriteContext = CBORWriteContext.createRootContext(dups);
276292
_formatFeatures = formatFeatures;
277293
_cfgMinimalInts = Feature.WRITE_MINIMAL_INTS.enabledIn(formatFeatures);
294+
_cfgMinimalDoubles = Feature.WRITE_MINIMAL_DOUBLES.enabledIn(formatFeatures);
278295
_ioContext = ctxt;
279296
_out = out;
280297
_bufferRecyclable = true;
@@ -311,6 +328,7 @@ public CBORGenerator(IOContext ctxt, int stdFeatures, int formatFeatures,
311328
_streamWriteContext = CBORWriteContext.createRootContext(dups);
312329
_formatFeatures = formatFeatures;
313330
_cfgMinimalInts = Feature.WRITE_MINIMAL_INTS.enabledIn(formatFeatures);
331+
_cfgMinimalDoubles = Feature.WRITE_MINIMAL_DOUBLES.enabledIn(formatFeatures);
314332
_ioContext = ctxt;
315333
_out = out;
316334
_bufferRecyclable = bufferRecyclable;
@@ -413,6 +431,7 @@ public JsonGenerator overrideFormatFeatures(int values, int mask) {
413431
if (oldState != newState) {
414432
_formatFeatures = newState;
415433
_cfgMinimalInts = Feature.WRITE_MINIMAL_INTS.enabledIn(newState);
434+
_cfgMinimalDoubles = Feature.WRITE_MINIMAL_DOUBLES.enabledIn(newState);
416435
}
417436
return this;
418437
}
@@ -458,6 +477,8 @@ public CBORGenerator enable(Feature f) {
458477
_formatFeatures |= f.getMask();
459478
if (f == Feature.WRITE_MINIMAL_INTS) {
460479
_cfgMinimalInts = true;
480+
} else if (f == Feature.WRITE_MINIMAL_DOUBLES) {
481+
_cfgMinimalDoubles = true;
461482
}
462483
return this;
463484
}
@@ -466,6 +487,8 @@ public CBORGenerator disable(Feature f) {
466487
_formatFeatures &= ~f.getMask();
467488
if (f == Feature.WRITE_MINIMAL_INTS) {
468489
_cfgMinimalInts = false;
490+
} else if (f == Feature.WRITE_MINIMAL_DOUBLES) {
491+
_cfgMinimalDoubles = false;
469492
}
470493
return this;
471494
}
@@ -691,8 +714,14 @@ public void writeArray(double[] array, int offset, int length) throws IOExceptio
691714
// short-cut, do not create child array context etc
692715
_verifyValueWrite("write int array");
693716
_writeLengthMarker(PREFIX_TYPE_ARRAY, length);
694-
for (int i = offset, end = offset+length; i < end; ++i) {
695-
_writeDoubleNoCheck(array[i]);
717+
if (_cfgMinimalDoubles) {
718+
for (int i = offset, end = offset+length; i < end; ++i) {
719+
_writeDoubleMinimal(array[i]);
720+
}
721+
} else {
722+
for (int i = offset, end = offset+length; i < end; ++i) {
723+
_writeDoubleNoCheck(array[i]);
724+
}
696725
}
697726
}
698727

@@ -786,8 +815,24 @@ private final void _writeLongNoCheck(long l) throws IOException
786815
_outputBuffer[_outputTail++] = (byte) i;
787816
}
788817

818+
private final void _writeFloatNoCheck(float f) throws IOException {
819+
_ensureRoomForOutput(5);
820+
/*
821+
* 17-Apr-2010, tatu: could also use 'floatToIntBits', but it seems more
822+
* accurate to use exact representation; and possibly faster. However,
823+
* if there are cases where collapsing of NaN was needed (for non-Java
824+
* clients), this can be changed
825+
*/
826+
int i = Float.floatToRawIntBits(f);
827+
_outputBuffer[_outputTail++] = BYTE_FLOAT32;
828+
_outputBuffer[_outputTail++] = (byte) (i >> 24);
829+
_outputBuffer[_outputTail++] = (byte) (i >> 16);
830+
_outputBuffer[_outputTail++] = (byte) (i >> 8);
831+
_outputBuffer[_outputTail++] = (byte) i;
832+
}
833+
789834
private final void _writeDoubleNoCheck(double d) throws IOException {
790-
_ensureRoomForOutput(11);
835+
_ensureRoomForOutput(9);
791836
// 17-Apr-2010, tatu: could also use 'doubleToIntBits', but it seems
792837
// more accurate to use exact representation; and possibly faster.
793838
// However, if there are cases where collapsing of NaN was needed (for
@@ -807,6 +852,15 @@ private final void _writeDoubleNoCheck(double d) throws IOException {
807852
_outputBuffer[_outputTail++] = (byte) i;
808853
}
809854

855+
private final void _writeDoubleMinimal(double d) throws IOException {
856+
float f = (float)d;
857+
if (f == d) {
858+
_writeFloatNoCheck(f);
859+
} else {
860+
_writeDoubleNoCheck(d);
861+
}
862+
}
863+
810864
/*
811865
/***********************************************************
812866
/* Output method implementations, textual
@@ -1178,46 +1232,17 @@ protected void _write(BigInteger v) throws IOException {
11781232
@Override
11791233
public void writeNumber(double d) throws IOException {
11801234
_verifyValueWrite("write number");
1181-
_ensureRoomForOutput(11);
1182-
/*
1183-
* 17-Apr-2010, tatu: could also use 'doubleToIntBits', but it seems
1184-
* more accurate to use exact representation; and possibly faster.
1185-
* However, if there are cases where collapsing of NaN was needed (for
1186-
* non-Java clients), this can be changed
1187-
*/
1188-
long l = Double.doubleToRawLongBits(d);
1189-
_outputBuffer[_outputTail++] = BYTE_FLOAT64;
1190-
1191-
int i = (int) (l >> 32);
1192-
_outputBuffer[_outputTail++] = (byte) (i >> 24);
1193-
_outputBuffer[_outputTail++] = (byte) (i >> 16);
1194-
_outputBuffer[_outputTail++] = (byte) (i >> 8);
1195-
_outputBuffer[_outputTail++] = (byte) i;
1196-
i = (int) l;
1197-
_outputBuffer[_outputTail++] = (byte) (i >> 24);
1198-
_outputBuffer[_outputTail++] = (byte) (i >> 16);
1199-
_outputBuffer[_outputTail++] = (byte) (i >> 8);
1200-
_outputBuffer[_outputTail++] = (byte) i;
1235+
if (_cfgMinimalDoubles) {
1236+
_writeDoubleMinimal(d);
1237+
} else {
1238+
_writeDoubleNoCheck(d);
1239+
}
12011240
}
12021241

12031242
@Override
12041243
public void writeNumber(float f) throws IOException {
1205-
// Ok, now, we needed token type byte plus 5 data bytes (7 bits each)
1206-
_ensureRoomForOutput(6);
12071244
_verifyValueWrite("write number");
1208-
1209-
/*
1210-
* 17-Apr-2010, tatu: could also use 'floatToIntBits', but it seems more
1211-
* accurate to use exact representation; and possibly faster. However,
1212-
* if there are cases where collapsing of NaN was needed (for non-Java
1213-
* clients), this can be changed
1214-
*/
1215-
int i = Float.floatToRawIntBits(f);
1216-
_outputBuffer[_outputTail++] = BYTE_FLOAT32;
1217-
_outputBuffer[_outputTail++] = (byte) (i >> 24);
1218-
_outputBuffer[_outputTail++] = (byte) (i >> 16);
1219-
_outputBuffer[_outputTail++] = (byte) (i >> 8);
1220-
_outputBuffer[_outputTail++] = (byte) i;
1245+
_writeFloatNoCheck(f);
12211246
}
12221247

12231248
@Override

cbor/src/test/java/com/fasterxml/jackson/dataformat/cbor/gen/ArrayGenerationTest.java

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,64 @@ public void testMinimalIntValuesForLong() throws Exception
150150
p.close();
151151
}
152152

153+
public void testMinimalFloatValuesForDouble() throws Exception
154+
{
155+
// Array with 2 values, one that can be represented as a float without losing precision and
156+
// one that cannot.
157+
final double[] input = new double[] {
158+
1.5, // can be exactly represented as a float
159+
0.123456789 // must be kept as double
160+
};
161+
ByteArrayOutputStream bytes = new ByteArrayOutputStream();
162+
CBORGenerator gen = FACTORY.createGenerator(bytes);
163+
assertFalse(gen.isEnabled(CBORGenerator.Feature.WRITE_MINIMAL_DOUBLES));
164+
gen.enable(CBORGenerator.Feature.WRITE_MINIMAL_DOUBLES);
165+
gen.writeArray(input, 0, 2);
166+
gen.close();
167+
168+
// With minimal doubles enabled, should get:
169+
byte[] encoded = bytes.toByteArray();
170+
assertEquals(15, encoded.length);
171+
172+
// then verify contents
173+
174+
CBORParser p = FACTORY.createParser(encoded);
175+
assertToken(JsonToken.START_ARRAY, p.nextToken());
176+
assertToken(JsonToken.VALUE_NUMBER_FLOAT, p.nextToken());
177+
assertEquals(NumberType.FLOAT, p.getNumberType());
178+
assertEquals(input[0], p.getDoubleValue());
179+
assertToken(JsonToken.VALUE_NUMBER_FLOAT, p.nextToken());
180+
assertEquals(NumberType.DOUBLE, p.getNumberType());
181+
assertEquals(input[1], p.getDoubleValue());
182+
assertToken(JsonToken.END_ARRAY, p.nextToken());
183+
p.close();
184+
185+
// but then also check without minimization
186+
bytes = new ByteArrayOutputStream();
187+
gen = FACTORY.createGenerator(bytes);
188+
gen.disable(CBORGenerator.Feature.WRITE_MINIMAL_DOUBLES);
189+
190+
gen.writeArray(input, 0, 2);
191+
gen.close();
192+
193+
// With default settings, should get:
194+
encoded = bytes.toByteArray();
195+
assertEquals(19, encoded.length);
196+
197+
// then verify contents
198+
199+
p = FACTORY.createParser(encoded);
200+
assertToken(JsonToken.START_ARRAY, p.nextToken());
201+
assertToken(JsonToken.VALUE_NUMBER_FLOAT, p.nextToken());
202+
assertEquals(NumberType.DOUBLE, p.getNumberType());
203+
assertEquals(input[0], p.getDoubleValue());
204+
assertToken(JsonToken.VALUE_NUMBER_FLOAT, p.nextToken());
205+
assertEquals(NumberType.DOUBLE, p.getNumberType());
206+
assertEquals(input[1], p.getDoubleValue());
207+
assertToken(JsonToken.END_ARRAY, p.nextToken());
208+
p.close();
209+
}
210+
153211
private void _testIntArray() throws Exception {
154212
// first special cases of 0, 1 values
155213
_testIntArray(0, 0, 0);

cbor/src/test/java/com/fasterxml/jackson/dataformat/cbor/mapper/CBORMapperTest.java

Lines changed: 21 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@
44
import com.fasterxml.jackson.dataformat.cbor.CBORTestBase;
55
import com.fasterxml.jackson.dataformat.cbor.databind.CBORMapper;
66

7+
import org.junit.Assert;
8+
79
public class CBORMapperTest extends CBORTestBase
810
{
911
/*
@@ -14,23 +16,31 @@ public class CBORMapperTest extends CBORTestBase
1416

1517
public void testStreamingFeaturesViaMapper() throws Exception
1618
{
17-
final Integer SMALL_INT = Integer.valueOf(3);
18-
CBORMapper mapperWithMinimalInts = CBORMapper.builder()
19+
final int SMALL_INT = 3;
20+
final int BIG_INT = 0x7FFFFFFF;
21+
final double LOW_RPECISION_DOUBLE = 1.5;
22+
final double HIGH_RPECISION_DOUBLE = 0.123456789;
23+
Object[] values = {SMALL_INT, BIG_INT, LOW_RPECISION_DOUBLE, HIGH_RPECISION_DOUBLE};
24+
Object[] minimalValues = {
25+
SMALL_INT, BIG_INT, (float)LOW_RPECISION_DOUBLE, HIGH_RPECISION_DOUBLE};
26+
CBORMapper mapperWithMinimal = CBORMapper.builder()
1927
.enable(CBORGenerator.Feature.WRITE_MINIMAL_INTS)
28+
.enable(CBORGenerator.Feature.WRITE_MINIMAL_DOUBLES)
2029
.build();
21-
byte[] encodedMinimal = mapperWithMinimalInts.writeValueAsBytes(SMALL_INT);
22-
assertEquals(1, encodedMinimal.length);
30+
byte[] encodedMinimal = mapperWithMinimal.writeValueAsBytes(values);
31+
assertEquals(21, encodedMinimal.length);
2332

24-
CBORMapper mapperFullInts = CBORMapper.builder()
33+
CBORMapper mapperFull = CBORMapper.builder()
2534
.disable(CBORGenerator.Feature.WRITE_MINIMAL_INTS)
35+
.disable(CBORGenerator.Feature.WRITE_MINIMAL_DOUBLES)
2636
.build();
27-
byte[] encodedNotMinimal = mapperFullInts.writeValueAsBytes(SMALL_INT);
28-
assertEquals(5, encodedNotMinimal.length);
37+
byte[] encodedNotMinimal = mapperFull.writeValueAsBytes(values);
38+
assertEquals(29, encodedNotMinimal.length);
2939

3040
// And then verify we can read it back, either way
31-
assertEquals(SMALL_INT, mapperWithMinimalInts.readValue(encodedMinimal, Object.class));
32-
assertEquals(SMALL_INT, mapperWithMinimalInts.readValue(encodedNotMinimal, Object.class));
33-
assertEquals(SMALL_INT, mapperFullInts.readValue(encodedMinimal, Object.class));
34-
assertEquals(SMALL_INT, mapperFullInts.readValue(encodedNotMinimal, Object.class));
41+
Assert.assertArrayEquals(minimalValues, mapperWithMinimal.readValue(encodedMinimal, Object[].class));
42+
Assert.assertArrayEquals(values, mapperWithMinimal.readValue(encodedNotMinimal, Object[].class));
43+
Assert.assertArrayEquals(minimalValues, mapperFull.readValue(encodedMinimal, Object[].class));
44+
Assert.assertArrayEquals(values, mapperFull.readValue(encodedNotMinimal, Object[].class));
3545
}
3646
}

0 commit comments

Comments
 (0)