Skip to content

Commit 130ff68

Browse files
committed
fix(Table): do not skip empty lines in csv reader
fix #280
1 parent be670e6 commit 130ff68

File tree

3 files changed

+11
-1
lines changed

3 files changed

+11
-1
lines changed

src/main/java/com/conveyal/gtfs/loader/JdbcGtfsLoader.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -332,7 +332,7 @@ private int loadInternal (Table table) throws Exception {
332332
int keyFieldIndex = table.getKeyFieldIndex(fields);
333333
// Create separate fields array with filtered list that does not include null values (for duplicate headers or
334334
// ID field). This is solely used to construct the table and array of values to load.
335-
Field[] cleanFields = Arrays.stream(fields).filter(field -> field != null).toArray(Field[]::new);
335+
Field[] cleanFields = Arrays.stream(fields).filter(Objects::nonNull).toArray(Field[]::new);
336336
if (cleanFields.length == 0) {
337337
// Do not create the table if there are no valid fields.
338338
errorStorage.storeError(NewGTFSError.forTable(table, TABLE_MISSING_COLUMN_HEADERS));

src/main/java/com/conveyal/gtfs/loader/Table.java

+3
Original file line numberDiff line numberDiff line change
@@ -520,6 +520,9 @@ public CsvReader getCsvReader(ZipFile zipFile, SQLErrorStorage sqlErrorStorage)
520520
// but the GTFS spec says that "files that include the UTF byte order mark are acceptable".
521521
InputStream bomInputStream = new BOMInputStream(zipInputStream);
522522
CsvReader csvReader = new CsvReader(bomInputStream, ',', Charset.forName("UTF8"));
523+
// Don't skip empty records (this is set to true by default on CsvReader. We want to check for empty records
524+
// during table load, so that they are logged as validation issues (WRONG_NUMBER_OF_FIELDS).
525+
csvReader.setSkipEmptyRecords(false);
523526
csvReader.readHeaders();
524527
return csvReader;
525528
} catch (IOException e) {

src/test/java/com/conveyal/gtfs/GTFSTest.java

+7
Original file line numberDiff line numberDiff line change
@@ -141,6 +141,13 @@ public void canLoadFeedWithBadDates () {
141141
new ErrorExpectation(NewGTFSErrorType.REFERENTIAL_INTEGRITY),
142142
new ErrorExpectation(NewGTFSErrorType.DATE_FORMAT),
143143
new ErrorExpectation(NewGTFSErrorType.DATE_FORMAT),
144+
// The below "wrong number of fields" errors are for empty new lines
145+
// found in the file.
146+
new ErrorExpectation(NewGTFSErrorType.WRONG_NUMBER_OF_FIELDS),
147+
new ErrorExpectation(NewGTFSErrorType.WRONG_NUMBER_OF_FIELDS),
148+
new ErrorExpectation(NewGTFSErrorType.WRONG_NUMBER_OF_FIELDS),
149+
new ErrorExpectation(NewGTFSErrorType.WRONG_NUMBER_OF_FIELDS),
150+
new ErrorExpectation(NewGTFSErrorType.WRONG_NUMBER_OF_FIELDS),
144151
new ErrorExpectation(NewGTFSErrorType.REFERENTIAL_INTEGRITY),
145152
new ErrorExpectation(NewGTFSErrorType.ROUTE_LONG_NAME_CONTAINS_SHORT_NAME),
146153
new ErrorExpectation(NewGTFSErrorType.FEED_TRAVEL_TIMES_ROUNDED),

0 commit comments

Comments
 (0)