Fix #95

cowtowncoder · cowtowncoder · commit 2bcdcb5f492b · 2016-01-09T19:47:25.000-08:00
diff --git a/release-notes/VERSION b/release-notes/VERSION
@@ -13,6 +13,8 @@ Project: jackson-dataformat-csv
 #92: Allow multi-character separator values
  (contributed by dharaburda@github)
 #94: Change schema/mapping related `JsonParseException`s to proper `JsonMappingException`s
+#95: Add `CsvParser.Feature.IGNORE_TRAILING_UNMAPPABLE` to allow skipping of
+  all extra, unmappable columns
 #97: Verify CSV headers are in the order as expected (added `strictHeaders` property in `CsvSchema`)
  (contributed by Nick B)
 #103: `JsonGenerator.Feature.IGNORE_UNKNOWN` does not prevent error when writing structured values
diff --git a/src/main/java/com/fasterxml/jackson/dataformat/csv/CsvParser.java b/src/main/java/com/fasterxml/jackson/dataformat/csv/CsvParser.java
@@ -47,7 +47,7 @@ public enum Feature
         /**
          * Feature that determines how stream of records (usually CSV lines, but sometimes
          * multiple lines when linefeeds are included in quoted values) is exposed:
-         * either as a sequence of Objects (false), or as an array of Objects (true).
+         * either as a sequence of Objects (false), or as an Array of Objects (true).
          * Using stream of Objects is convenient when using
          * <code>ObjectMapper.readValues(...)</code>
          * and array of Objects convenient when binding to <code>List</code>s or
@@ -56,7 +56,17 @@ public enum Feature
          * Default value is false, meaning that by default a CSV document is exposed as
          * a sequence of root-level Object entries.
          */
-        WRAP_AS_ARRAY(false)
+        WRAP_AS_ARRAY(false),
+
+        /**
+         * Feature that allows ignoring of unmappable "extra" columns; that is, values for
+         * columns that appear after columns for which types are defined. When disabled,
+         * an exception is thrown for such column values, but if enabled, they are
+         * silently ignored.
+         *
+         * @since 2.7
+         */
+        IGNORE_TRAILING_UNMAPPABLE(false),
         ;
 
         final boolean _defaultState;
@@ -516,17 +526,7 @@ public JsonToken nextToken() throws IOException
             return (_currToken = _handleArrayValue());
         case STATE_SKIP_EXTRA_COLUMNS:
             // Need to just skip whatever remains
-            _state = STATE_RECORD_START;
-            while (_reader.nextString() != null) { }
-
-            // But once we hit the end of the logical line, get out
-            // NOTE: seems like we should always be within Object, but let's be conservative
-            // and check just in case
-            _parsingContext = _parsingContext.getParent();
-            _state = _reader.startNewLine() ? STATE_RECORD_START : STATE_DOC_END;
-            return (_currToken = _parsingContext.inArray()
-                    ? JsonToken.END_ARRAY : JsonToken.END_OBJECT);
-
+            return _skipUntilEndOfLine();
         case STATE_DOC_END:
             _reader.close();
             if (_parsingContext.inRoot()) {
@@ -690,14 +690,18 @@ protected JsonToken _handleNextEntry() throws IOException
             }
             return JsonToken.END_OBJECT;
         }
-        _state = STATE_NAMED_VALUE;
         _currentValue = next;
         if (_columnIndex >= _columnCount) {
             _currentName = null;
-            /* 14-Mar-2012, tatu: As per [Issue-1], let's allow one specific
-             *  case of extra: if we get just one all-whitespace entry, that
-             *  can be just skipped
-             */
+
+            // 09-Jan-2016, tatu: With [dataformat-csv#95], this may actually be just fine
+            if (Feature.IGNORE_TRAILING_UNMAPPABLE.enabledIn(_formatFeatures)) {
+                _state = STATE_SKIP_EXTRA_COLUMNS;
+                return _skipUntilEndOfLine();
+            }
+
+            // 14-Mar-2012, tatu: As per [dataformat-csv#1], let's allow one specific case
+            // of extra: if we get just one all-whitespace entry, that can be just skipped
             if (_columnIndex == _columnCount) {
                 next = next.trim();
                 if (next.length() == 0) {
@@ -707,10 +711,12 @@ protected JsonToken _handleNextEntry() throws IOException
                     return _handleNextEntryExpectEOL();
                 }
             }
+            
             // 21-May-2015, tatu: Need to enter recovery mode, to skip remainder of the line
             _state = STATE_SKIP_EXTRA_COLUMNS;
             _reportMappingError("Too many entries: expected at most "+_columnCount+" (value #"+_columnCount+" ("+next.length()+" chars) \""+next+"\")");
         }
+        _state = STATE_NAMED_VALUE;
         _currentName = _schema.columnName(_columnIndex);
         return JsonToken.FIELD_NAME;
     }
@@ -889,6 +895,19 @@ protected void _readHeaderLine() throws IOException {
         setSchema(builder.build());
     }
 
+    protected final JsonToken _skipUntilEndOfLine() throws IOException
+    {
+        while (_reader.nextString() != null) { }
+
+        // But once we hit the end of the logical line, get out
+        // NOTE: seems like we should always be within Object, but let's be conservative
+        // and check just in case
+        _parsingContext = _parsingContext.getParent();
+        _state = _reader.startNewLine() ? STATE_RECORD_START : STATE_DOC_END;
+        return (_currToken = _parsingContext.inArray()
+                ? JsonToken.END_ARRAY : JsonToken.END_OBJECT);
+    }
+    
     /*
     /**********************************************************
     /* String value handling
diff --git a/src/test/java/com/fasterxml/jackson/dataformat/csv/deser/IgnoreUnmappableTest.java b/src/test/java/com/fasterxml/jackson/dataformat/csv/deser/IgnoreUnmappableTest.java
@@ -0,0 +1,76 @@
+package com.fasterxml.jackson.dataformat.csv.deser;
+
+import com.fasterxml.jackson.annotation.JsonPropertyOrder;
+
+import com.fasterxml.jackson.databind.JsonMappingException;
+import com.fasterxml.jackson.databind.MappingIterator;
+
+import com.fasterxml.jackson.dataformat.csv.*;
+
+/**
+ * Test(s) for [dataformat-csv#95]
+ */
+public class IgnoreUnmappableTest extends ModuleTestBase
+{
+    final CsvMapper MAPPER = mapperForCsv();
+
+    @JsonPropertyOrder({ "first", "second" })
+    static class StringPair {
+        public String first, second;
+    }
+    
+    public void testSimpleIgnoral() throws Exception
+    {
+        final String INPUT = "a,b,c,foo\nd,e\nf,g,h,i\n";
+        final CsvSchema schema = MAPPER.schemaFor(StringPair.class);
+
+        // first: throw exception(s) with default settings
+        MappingIterator<StringPair> it = MAPPER.readerFor(StringPair.class)
+                .with(schema)
+                .without(CsvParser.Feature.IGNORE_TRAILING_UNMAPPABLE)
+                .readValues(INPUT);
+        
+        try {
+            it.nextValue();
+            fail("Should not have passed");
+        } catch (JsonMappingException e) {
+            verifyException(e, "Too many entries");
+        }
+
+        // yet second one ought to work
+        StringPair pair = it.nextValue();
+        assertEquals("d", pair.first);
+        assertEquals("e", pair.second);
+
+        // and not third, again
+        try {
+            it.nextValue();
+            fail("Should not have passed");
+        } catch (JsonMappingException e) {
+            verifyException(e, "Too many entries");
+        }
+        it.close();
+
+        // But with settings...
+        it = MAPPER.readerFor(StringPair.class)
+                .with(schema)
+                .with(CsvParser.Feature.IGNORE_TRAILING_UNMAPPABLE)
+                .readValues(INPUT);
+
+        pair = it.nextValue();
+        assertEquals("a", pair.first);
+        assertEquals("b", pair.second);
+
+        pair = it.nextValue();
+        assertEquals("d", pair.first);
+        assertEquals("e", pair.second);
+
+        pair = it.nextValue();
+        assertEquals("f", pair.first);
+        assertEquals("g", pair.second);
+
+        assertFalse(it.hasNextValue());
+
+        it.close();
+    }
+}