diff --git a/csv-core/src/reader.rs b/csv-core/src/reader.rs index dbd6dc3d..f9e5b7c2 100644 --- a/csv-core/src/reader.rs +++ b/csv-core/src/reader.rs @@ -115,6 +115,8 @@ pub struct Reader { /// If enabled (the default), then quotes are respected. When disabled, /// quotes are not treated specially. quoting: bool, + /// If enabled (the default) blank lines are ignored. + skip_blank_lines: bool, /// Whether to use the NFA for parsing. /// /// Generally this is for debugging. There's otherwise no good reason @@ -141,6 +143,7 @@ impl Default for Reader { double_quote: true, comment: None, quoting: true, + skip_blank_lines: true, use_nfa: false, line: 1, has_read: false, @@ -227,6 +230,16 @@ impl ReaderBuilder { self } + /// Enable or disable skipping of blank lines + /// + /// This is enabled by default, but it may be disabled. When enabled, + /// blank lines are ignored. If present, the finally trailing blank line + /// will be ignored. + pub fn skip_blank_lines(&mut self, yes: bool) -> &mut ReaderBuilder { + self.rdr.skip_blank_lines = yes; + self + } + /// The comment character to use when parsing CSV. /// /// If the start of a record begins with the byte given here, then that @@ -992,7 +1005,11 @@ impl Reader { End => (End, NfaInputAction::Epsilon), StartRecord => { if self.term.equals(c) { - (StartRecord, NfaInputAction::Discard) + if self.skip_blank_lines { + (StartRecord, NfaInputAction::Discard) + } else { + (CRLF, NfaInputAction::Discard) + } } else if self.comment == Some(c) { (InComment, NfaInputAction::Discard) } else { @@ -1726,6 +1743,15 @@ mod tests { } ); + fn enable_blank(builder: &mut ReaderBuilder) -> &mut ReaderBuilder { + builder.skip_blank_lines(false) + } + parses_to!(blank_lines_one_row_one_field, "a", csv![["a"]], enable_blank); + parses_to!(blank_lines_one_row_one_field_lf, "a\n", csv![["a"]], enable_blank); + parses_to!(blank_lines_one_row_one_field_lf_lf, "a\n\n", csv![["a"], [""]], enable_blank); + parses_to!(blank_lines_one_row_lf_one_field_lf, "\na\n", csv![[""], ["a"]], enable_blank); + parses_to!(blank_lines_crlf, "\r\na\r\n", csv![[""], ["a"]], enable_blank); + macro_rules! assert_read { ( $rdr:expr, $input:expr, $output:expr, diff --git a/src/reader.rs b/src/reader.rs index 82f5d2ca..ea4010e3 100644 --- a/src/reader.rs +++ b/src/reader.rs @@ -362,6 +362,41 @@ impl ReaderBuilder { self } + /// Whether blank lines are ignored. + /// + /// By default blank lines are ignored. + /// + /// When disabled + /// + /// # Example + /// + /// ``` + /// use std::error::Error; + /// use csv::ReaderBuilder; + /// + /// # fn main() { example().unwrap(); } + /// fn example() -> Result<(), Box> { + /// let data = "\na,b"; + /// let mut rdr = ReaderBuilder::new() + /// .skip_blank_lines(false) + /// .flexible(true) + /// .has_headers(false) + /// .from_reader(data.as_bytes()); + /// + /// if let Some(result) = rdr.records().next() { + /// let record = result?; + /// assert_eq!(record, vec![""]); + /// Ok(()) + /// } else { + /// Err(From::from("expected at least one record but got none")) + /// } + /// } + /// ``` + pub fn skip_blank_lines(&mut self, yes: bool) -> &mut ReaderBuilder { + self.builder.skip_blank_lines(yes); + self + } + /// The record terminator to use when parsing CSV. /// /// A record terminator can be any single byte. The default is a special @@ -2508,6 +2543,32 @@ mod tests { assert_eq!("baz", &headers[2]); } + #[test] + fn read_record_blank_lines() { + let data = b("foo,bar,baz\n\na,b,c\nd,e,f"); + let mut rdr = ReaderBuilder::new() + .flexible(true).skip_blank_lines(false).has_headers(false).from_reader(data); + let mut rec = StringRecord::new(); + + assert!(rdr.read_record(&mut rec).unwrap()); + assert_eq!(3, rec.len()); + assert_eq!("foo", &rec[0]); + + assert!(rdr.read_record(&mut rec).unwrap()); + assert_eq!(1, rec.len()); + assert_eq!("", &rec[0]); + + assert!(rdr.read_record(&mut rec).unwrap()); + assert_eq!(3, rec.len()); + assert_eq!("a", &rec[0]); + + assert!(rdr.read_record(&mut rec).unwrap()); + assert_eq!(3, rec.len()); + assert_eq!("d", &rec[0]); + + assert!(!rdr.read_record(&mut rec).unwrap()); + } + #[test] fn seek() { let data = b("foo,bar,baz\na,b,c\nd,e,f\ng,h,i");