Add WRITE_MINIMAL_DOUBLES feature to CBORGenerator (#356)

here-abarany · web-flow · commit 713a6abb79c7 · 2023-02-09T16:57:11.000-08:00
Enabling WRITE_MINIMAL_DOUBLES works similarly to WRITE_MINIMAL_INTS,
choosing to use a smaller data representation (single-precision float) when
the conversion will result in no loss of data. This can provide a
substantial savings for data that contains many doubles that don't require
full precision.

Cleaned up some of the writing code for floats and doubles to avoid code
duplication, and reserve the proper amount of space for output for both
types.
diff --git a/cbor/src/main/java/com/fasterxml/jackson/dataformat/cbor/CBORGenerator.java b/cbor/src/main/java/com/fasterxml/jackson/dataformat/cbor/CBORGenerator.java
@@ -102,6 +102,19 @@ public enum Feature implements FormatFeature {
          * @since 2.15
          */
         STRINGREF(false),
+
+        /**
+         * Feature that determines whether generator should try to write doubles
+         * as floats: if {@code true}, will write a {@code double} as a 4-byte float if no
+         * precision loss will occur; if {@code false}, will always write a {@code double}
+         * as an 8-byte double.
+         * <p>
+         * Default value is {@code false} meaning that doubles will always be written as
+         * 8-byte values.
+         *
+         * @since 2.15
+         */
+        WRITE_MINIMAL_DOUBLES(false),
         ;
 
         protected final boolean _defaultState;
@@ -177,6 +190,9 @@ public int getMask() {
 
     protected boolean _cfgMinimalInts;
 
+    // @since 2.15
+    protected boolean _cfgMinimalDoubles;
+
     /*
     /**********************************************************
     /* Output state
@@ -275,6 +291,7 @@ public CBORGenerator(IOContext ctxt, int stdFeatures, int formatFeatures,
         _streamWriteContext = CBORWriteContext.createRootContext(dups);
         _formatFeatures = formatFeatures;
         _cfgMinimalInts = Feature.WRITE_MINIMAL_INTS.enabledIn(formatFeatures);
+        _cfgMinimalDoubles = Feature.WRITE_MINIMAL_DOUBLES.enabledIn(formatFeatures);
         _ioContext = ctxt;
         _out = out;
         _bufferRecyclable = true;
@@ -311,6 +328,7 @@ public CBORGenerator(IOContext ctxt, int stdFeatures, int formatFeatures,
         _streamWriteContext = CBORWriteContext.createRootContext(dups);
         _formatFeatures = formatFeatures;
         _cfgMinimalInts = Feature.WRITE_MINIMAL_INTS.enabledIn(formatFeatures);
+        _cfgMinimalDoubles = Feature.WRITE_MINIMAL_DOUBLES.enabledIn(formatFeatures);
         _ioContext = ctxt;
         _out = out;
         _bufferRecyclable = bufferRecyclable;
@@ -413,6 +431,7 @@ public JsonGenerator overrideFormatFeatures(int values, int mask) {
         if (oldState != newState) {
             _formatFeatures = newState;
             _cfgMinimalInts = Feature.WRITE_MINIMAL_INTS.enabledIn(newState);
+            _cfgMinimalDoubles = Feature.WRITE_MINIMAL_DOUBLES.enabledIn(newState);
         }
         return this;
     }
@@ -458,6 +477,8 @@ public CBORGenerator enable(Feature f) {
         _formatFeatures |= f.getMask();
         if (f == Feature.WRITE_MINIMAL_INTS) {
             _cfgMinimalInts = true;
+        } else if (f == Feature.WRITE_MINIMAL_DOUBLES) {
+            _cfgMinimalDoubles = true;
         }
         return this;
     }
@@ -466,6 +487,8 @@ public CBORGenerator disable(Feature f) {
         _formatFeatures &= ~f.getMask();
         if (f == Feature.WRITE_MINIMAL_INTS) {
             _cfgMinimalInts = false;
+        } else if (f == Feature.WRITE_MINIMAL_DOUBLES) {
+            _cfgMinimalDoubles = false;
         }
         return this;
     }
@@ -691,8 +714,14 @@ public void writeArray(double[] array, int offset, int length) throws IOExceptio
         // short-cut, do not create child array context etc
         _verifyValueWrite("write int array");
         _writeLengthMarker(PREFIX_TYPE_ARRAY, length);
-        for (int i = offset, end = offset+length; i < end; ++i) {
-            _writeDoubleNoCheck(array[i]);
+        if (_cfgMinimalDoubles) {
+            for (int i = offset, end = offset+length; i < end; ++i) {
+                _writeDoubleMinimal(array[i]);
+            }
+        } else {
+            for (int i = offset, end = offset+length; i < end; ++i) {
+                _writeDoubleNoCheck(array[i]);
+            }
         }
     }
 
@@ -786,8 +815,24 @@ private final void _writeLongNoCheck(long l) throws IOException
         _outputBuffer[_outputTail++] = (byte) i;
     }
 
+    private final void _writeFloatNoCheck(float f) throws IOException {
+        _ensureRoomForOutput(5);
+        /*
+         * 17-Apr-2010, tatu: could also use 'floatToIntBits', but it seems more
+         * accurate to use exact representation; and possibly faster. However,
+         * if there are cases where collapsing of NaN was needed (for non-Java
+         * clients), this can be changed
+         */
+        int i = Float.floatToRawIntBits(f);
+        _outputBuffer[_outputTail++] = BYTE_FLOAT32;
+        _outputBuffer[_outputTail++] = (byte) (i >> 24);
+        _outputBuffer[_outputTail++] = (byte) (i >> 16);
+        _outputBuffer[_outputTail++] = (byte) (i >> 8);
+        _outputBuffer[_outputTail++] = (byte) i;
+    }
+
     private final void _writeDoubleNoCheck(double d) throws IOException {
-        _ensureRoomForOutput(11);
+        _ensureRoomForOutput(9);
         // 17-Apr-2010, tatu: could also use 'doubleToIntBits', but it seems
         // more accurate to use exact representation; and possibly faster.
         // However, if there are cases where collapsing of NaN was needed (for
@@ -807,6 +852,15 @@ private final void _writeDoubleNoCheck(double d) throws IOException {
         _outputBuffer[_outputTail++] = (byte) i;
     }
 
+    private final void _writeDoubleMinimal(double d) throws IOException {
+        float f = (float)d;
+        if (f == d) {
+            _writeFloatNoCheck(f);
+        } else {
+            _writeDoubleNoCheck(d);
+        }
+    }
+
     /*
     /***********************************************************
     /* Output method implementations, textual
@@ -1178,46 +1232,17 @@ protected void _write(BigInteger v) throws IOException {
     @Override
     public void writeNumber(double d) throws IOException {
         _verifyValueWrite("write number");
-        _ensureRoomForOutput(11);
-        /*
-         * 17-Apr-2010, tatu: could also use 'doubleToIntBits', but it seems
-         * more accurate to use exact representation; and possibly faster.
-         * However, if there are cases where collapsing of NaN was needed (for
-         * non-Java clients), this can be changed
-         */
-        long l = Double.doubleToRawLongBits(d);
-        _outputBuffer[_outputTail++] = BYTE_FLOAT64;
-
-        int i = (int) (l >> 32);
-        _outputBuffer[_outputTail++] = (byte) (i >> 24);
-        _outputBuffer[_outputTail++] = (byte) (i >> 16);
-        _outputBuffer[_outputTail++] = (byte) (i >> 8);
-        _outputBuffer[_outputTail++] = (byte) i;
-        i = (int) l;
-        _outputBuffer[_outputTail++] = (byte) (i >> 24);
-        _outputBuffer[_outputTail++] = (byte) (i >> 16);
-        _outputBuffer[_outputTail++] = (byte) (i >> 8);
-        _outputBuffer[_outputTail++] = (byte) i;
+        if (_cfgMinimalDoubles) {
+            _writeDoubleMinimal(d);
+        } else {
+            _writeDoubleNoCheck(d);
+        }
     }
 
     @Override
     public void writeNumber(float f) throws IOException {
-        // Ok, now, we needed token type byte plus 5 data bytes (7 bits each)
-        _ensureRoomForOutput(6);
         _verifyValueWrite("write number");
-
-        /*
-         * 17-Apr-2010, tatu: could also use 'floatToIntBits', but it seems more
-         * accurate to use exact representation; and possibly faster. However,
-         * if there are cases where collapsing of NaN was needed (for non-Java
-         * clients), this can be changed
-         */
-        int i = Float.floatToRawIntBits(f);
-        _outputBuffer[_outputTail++] = BYTE_FLOAT32;
-        _outputBuffer[_outputTail++] = (byte) (i >> 24);
-        _outputBuffer[_outputTail++] = (byte) (i >> 16);
-        _outputBuffer[_outputTail++] = (byte) (i >> 8);
-        _outputBuffer[_outputTail++] = (byte) i;
+        _writeFloatNoCheck(f);
     }
 
     @Override
diff --git a/cbor/src/test/java/com/fasterxml/jackson/dataformat/cbor/gen/ArrayGenerationTest.java b/cbor/src/test/java/com/fasterxml/jackson/dataformat/cbor/gen/ArrayGenerationTest.java
@@ -150,6 +150,64 @@ public void testMinimalIntValuesForLong() throws Exception
         p.close();
     }
 
+    public void testMinimalFloatValuesForDouble() throws Exception
+    {
+        // Array with 2 values, one that can be represented as a float without losing precision and
+        // one that cannot.
+        final double[] input = new double[] {
+                1.5, // can be exactly represented as a float
+                0.123456789 // must be kept as double
+        };
+        ByteArrayOutputStream bytes = new ByteArrayOutputStream();
+        CBORGenerator gen = FACTORY.createGenerator(bytes);
+        assertFalse(gen.isEnabled(CBORGenerator.Feature.WRITE_MINIMAL_DOUBLES));
+        gen.enable(CBORGenerator.Feature.WRITE_MINIMAL_DOUBLES);
+        gen.writeArray(input, 0, 2);
+        gen.close();
+
+        // With minimal doubles enabled, should get:
+        byte[] encoded = bytes.toByteArray();
+        assertEquals(15, encoded.length);
+
+        // then verify contents
+
+        CBORParser p = FACTORY.createParser(encoded);
+        assertToken(JsonToken.START_ARRAY, p.nextToken());
+        assertToken(JsonToken.VALUE_NUMBER_FLOAT, p.nextToken());
+        assertEquals(NumberType.FLOAT, p.getNumberType());
+        assertEquals(input[0], p.getDoubleValue());
+        assertToken(JsonToken.VALUE_NUMBER_FLOAT, p.nextToken());
+        assertEquals(NumberType.DOUBLE, p.getNumberType());
+        assertEquals(input[1], p.getDoubleValue());
+        assertToken(JsonToken.END_ARRAY, p.nextToken());
+        p.close();
+
+        // but then also check without minimization
+        bytes = new ByteArrayOutputStream();
+        gen = FACTORY.createGenerator(bytes);
+        gen.disable(CBORGenerator.Feature.WRITE_MINIMAL_DOUBLES);
+
+        gen.writeArray(input, 0, 2);
+        gen.close();
+
+        // With default settings, should get:
+        encoded = bytes.toByteArray();
+        assertEquals(19, encoded.length);
+
+        // then verify contents
+
+        p = FACTORY.createParser(encoded);
+        assertToken(JsonToken.START_ARRAY, p.nextToken());
+        assertToken(JsonToken.VALUE_NUMBER_FLOAT, p.nextToken());
+        assertEquals(NumberType.DOUBLE, p.getNumberType());
+        assertEquals(input[0], p.getDoubleValue());
+        assertToken(JsonToken.VALUE_NUMBER_FLOAT, p.nextToken());
+        assertEquals(NumberType.DOUBLE, p.getNumberType());
+        assertEquals(input[1], p.getDoubleValue());
+        assertToken(JsonToken.END_ARRAY, p.nextToken());
+        p.close();
+    }
+
     private void _testIntArray() throws Exception {
         // first special cases of 0, 1 values
         _testIntArray(0, 0, 0);
diff --git a/cbor/src/test/java/com/fasterxml/jackson/dataformat/cbor/mapper/CBORMapperTest.java b/cbor/src/test/java/com/fasterxml/jackson/dataformat/cbor/mapper/CBORMapperTest.java
@@ -4,6 +4,8 @@
 import com.fasterxml.jackson.dataformat.cbor.CBORTestBase;
 import com.fasterxml.jackson.dataformat.cbor.databind.CBORMapper;
 
+import org.junit.Assert;
+
 public class CBORMapperTest extends CBORTestBase
 {
     /*
@@ -14,23 +16,31 @@ public class CBORMapperTest extends CBORTestBase
 
     public void testStreamingFeaturesViaMapper() throws Exception
     {
-        final Integer SMALL_INT = Integer.valueOf(3);
-        CBORMapper mapperWithMinimalInts = CBORMapper.builder()
+        final int SMALL_INT = 3;
+        final int BIG_INT = 0x7FFFFFFF;
+        final double LOW_RPECISION_DOUBLE = 1.5;
+        final double HIGH_RPECISION_DOUBLE = 0.123456789;
+        Object[] values = {SMALL_INT, BIG_INT, LOW_RPECISION_DOUBLE, HIGH_RPECISION_DOUBLE};
+        Object[] minimalValues = {
+                SMALL_INT, BIG_INT, (float)LOW_RPECISION_DOUBLE, HIGH_RPECISION_DOUBLE};
+        CBORMapper mapperWithMinimal = CBORMapper.builder()
                 .enable(CBORGenerator.Feature.WRITE_MINIMAL_INTS)
+                .enable(CBORGenerator.Feature.WRITE_MINIMAL_DOUBLES)
                 .build();
-        byte[] encodedMinimal = mapperWithMinimalInts.writeValueAsBytes(SMALL_INT);
-        assertEquals(1, encodedMinimal.length);
+        byte[] encodedMinimal = mapperWithMinimal.writeValueAsBytes(values);
+        assertEquals(21, encodedMinimal.length);
 
-        CBORMapper mapperFullInts = CBORMapper.builder()
+        CBORMapper mapperFull = CBORMapper.builder()
                 .disable(CBORGenerator.Feature.WRITE_MINIMAL_INTS)
+                .disable(CBORGenerator.Feature.WRITE_MINIMAL_DOUBLES)
                 .build();
-        byte[] encodedNotMinimal = mapperFullInts.writeValueAsBytes(SMALL_INT);
-        assertEquals(5, encodedNotMinimal.length);
+        byte[] encodedNotMinimal = mapperFull.writeValueAsBytes(values);
+        assertEquals(29, encodedNotMinimal.length);
 
         // And then verify we can read it back, either way
-        assertEquals(SMALL_INT, mapperWithMinimalInts.readValue(encodedMinimal, Object.class));
-        assertEquals(SMALL_INT, mapperWithMinimalInts.readValue(encodedNotMinimal, Object.class));
-        assertEquals(SMALL_INT, mapperFullInts.readValue(encodedMinimal, Object.class));
-        assertEquals(SMALL_INT, mapperFullInts.readValue(encodedNotMinimal, Object.class));
+        Assert.assertArrayEquals(minimalValues, mapperWithMinimal.readValue(encodedMinimal, Object[].class));
+        Assert.assertArrayEquals(values, mapperWithMinimal.readValue(encodedNotMinimal, Object[].class));
+        Assert.assertArrayEquals(minimalValues, mapperFull.readValue(encodedMinimal, Object[].class));
+        Assert.assertArrayEquals(values, mapperFull.readValue(encodedNotMinimal, Object[].class));
     }
 }