Skip to content

Commit 17faf8c

Browse files
committed
feat: add invalid_result helper function
Closes BurntSushi#345
1 parent 533d37b commit 17faf8c

File tree

4 files changed

+185
-0
lines changed

4 files changed

+185
-0
lines changed

Cargo.toml

+1
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ csv-core = { path = "csv-core", version = "0.1.11" }
2525
itoa = "1"
2626
ryu = "1"
2727
serde = "1.0.55"
28+
serde-value = "0.7.0"
2829

2930
[dev-dependencies]
3031
bstr = { version = "1.7.0", default-features = false, features = ["alloc", "serde"] }
+30
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
#![allow(dead_code)]
2+
use std::{error::Error, io, process};
3+
4+
use serde::Deserialize;
5+
#[derive(Debug, Deserialize)]
6+
#[serde(rename_all = "PascalCase")]
7+
struct Record {
8+
latitude: f64,
9+
longitude: f64,
10+
#[serde(deserialize_with = "csv::invalid_result")]
11+
population: Result<u64, String>,
12+
city: String,
13+
state: String,
14+
}
15+
16+
fn run() -> Result<(), Box<dyn Error>> {
17+
let mut rdr = csv::Reader::from_reader(io::stdin());
18+
for result in rdr.deserialize() {
19+
let record: Record = result?;
20+
println!("{:?}", record);
21+
}
22+
Ok(())
23+
}
24+
25+
fn main() {
26+
if let Err(err) = run() {
27+
println!("{}", err);
28+
process::exit(1);
29+
}
30+
}

src/lib.rs

+98
Original file line numberDiff line numberDiff line change
@@ -300,6 +300,9 @@ impl Default for Trim {
300300
/// `Option<T>` is deserialized with non-empty but invalid data, then the value
301301
/// will be `None` and the error will be ignored.
302302
///
303+
/// Use the [`invalid_result`](./fn.invalid_result.html) function if you want to
304+
/// return the invalid values as `Err<String>` instead of discarding them.
305+
///
303306
/// # Example
304307
///
305308
/// This example shows how to parse CSV records with numerical data, even if
@@ -343,3 +346,98 @@ where
343346
{
344347
Option::<T>::deserialize(de).or_else(|_| Ok(None))
345348
}
349+
350+
/// A custom Serde deserializer for possibly invalid `Result<T, String>` fields.
351+
///
352+
/// When deserializing CSV data, it is sometimes desirable to return separately
353+
/// fields with invalid data. For example, there might be a field that is
354+
/// usually a number, but will occasionally contain garbage data that causes
355+
/// number parsing to fail.
356+
///
357+
/// You might be inclined to use, say, `Result<i32, String>` for fields such at
358+
/// this. However this will not compile out of the box, because Serde does not
359+
/// know when to return `Ok<i32>` and when to return `Err<String>`.
360+
///
361+
/// This function allows you to define the following behavior: if `Result<T,
362+
/// String>` is deserialized with valid data, then the valid value will be
363+
/// returned as `Ok<T>`, while if it is deserialized with empty or invalid data,
364+
/// then the invalid value will be converted to `String` and returned as
365+
/// `Err<String>`. Note that any invalid UTF-8 bytes are lossily converted to
366+
/// `String`, therefore this function will never fail.
367+
///
368+
/// Use the [`invalid_option`](./fn.invalid_option.html) function if you want to
369+
/// discard the invalid values instead of returning them as `Err<String>`.
370+
///
371+
/// # Example
372+
///
373+
/// This example shows how to parse CSV records with numerical data, even if
374+
/// some numerical data is absent or invalid. Without the
375+
/// `serde(deserialize_with = "...")` annotations, this example would not
376+
/// compile.
377+
///
378+
/// ```
379+
/// use std::error::Error;
380+
///
381+
/// #[derive(Debug, serde::Deserialize, Eq, PartialEq)]
382+
/// struct Row {
383+
/// #[serde(deserialize_with = "csv::invalid_result")]
384+
/// a: Result<i32, String>,
385+
/// #[serde(deserialize_with = "csv::invalid_result")]
386+
/// b: Result<i32, String>,
387+
/// #[serde(deserialize_with = "csv::invalid_result")]
388+
/// c: Result<i32, String>,
389+
/// }
390+
///
391+
/// # fn main() { example().unwrap(); }
392+
/// fn example() -> Result<(), Box<dyn Error>> {
393+
/// let data = "\
394+
/// a,b,c
395+
/// 5,\"\",xyz
396+
/// ";
397+
/// let mut rdr = csv::Reader::from_reader(data.as_bytes());
398+
/// if let Some(result) = rdr.deserialize().next() {
399+
/// let record: Row = result?;
400+
/// assert_eq!(record, Row { a: Ok(5), b: Err(String::new()), c: Err(String::from("xyz")) });
401+
/// Ok(())
402+
/// } else {
403+
/// Err(From::from("expected at least one record but got none"))
404+
/// }
405+
/// }
406+
/// ```
407+
pub fn invalid_result<'de, D, T>(
408+
de: D,
409+
) -> result::Result<result::Result<T, String>, D::Error>
410+
where
411+
D: Deserializer<'de>,
412+
T: Deserialize<'de>,
413+
{
414+
let value = serde_value::Value::deserialize(de)?;
415+
let result = T::deserialize(value.clone()).map_err(|_| match value {
416+
serde_value::Value::Bool(b) => b.to_string(),
417+
serde_value::Value::U8(u) => u.to_string(),
418+
serde_value::Value::U16(u) => u.to_string(),
419+
serde_value::Value::U32(u) => u.to_string(),
420+
serde_value::Value::U64(u) => u.to_string(),
421+
serde_value::Value::I8(i) => i.to_string(),
422+
serde_value::Value::I16(i) => i.to_string(),
423+
serde_value::Value::I32(i) => i.to_string(),
424+
serde_value::Value::I64(i) => i.to_string(),
425+
serde_value::Value::F32(f) => f.to_string(),
426+
serde_value::Value::F64(f) => f.to_string(),
427+
serde_value::Value::Char(c) => c.to_string(),
428+
serde_value::Value::String(s) => s,
429+
serde_value::Value::Unit => String::new(),
430+
serde_value::Value::Option(option) => {
431+
format!("{:?}", option)
432+
}
433+
serde_value::Value::Newtype(newtype) => {
434+
format!("{:?}", newtype)
435+
}
436+
serde_value::Value::Seq(seq) => format!("{:?}", seq),
437+
serde_value::Value::Map(map) => format!("{:?}", map),
438+
serde_value::Value::Bytes(bytes) => {
439+
String::from_utf8_lossy(&bytes).into_owned()
440+
}
441+
});
442+
Ok(result)
443+
}

src/tutorial.rs

+56
Original file line numberDiff line numberDiff line change
@@ -1136,6 +1136,62 @@ function is a generic helper function that does one very simple thing: when
11361136
applied to `Option` fields, it will convert any deserialization error into a
11371137
`None` value. This is useful when you need to work with messy CSV data.
11381138
1139+
Sometimes you might need to return invalid fields instead of discarding them.
1140+
For this you can use the similar
1141+
[`invalid_result`](../fn.invalid_result.html)
1142+
function, which works as follows: when applied to `Result<T, String>` fields,
1143+
it will convert any invalid filed to a `String` and return it as `Err(string)`.
1144+
Note that any invalid UTF-8 bytes are lossily converted to `String`, therefore
1145+
this function will never fail.
1146+
1147+
This behavior can be achieved with very minor changes to the previous example:
1148+
1149+
```no_run
1150+
//tutorial-read-serde-invalid-03.rs
1151+
# #![allow(dead_code)]
1152+
# use std::{error::Error, io, process};
1153+
#
1154+
# use serde::Deserialize;
1155+
#[derive(Debug, Deserialize)]
1156+
#[serde(rename_all = "PascalCase")]
1157+
struct Record {
1158+
latitude: f64,
1159+
longitude: f64,
1160+
#[serde(deserialize_with = "csv::invalid_result")]
1161+
population: Result<u64, String>,
1162+
city: String,
1163+
state: String,
1164+
}
1165+
1166+
fn run() -> Result<(), Box<dyn Error>> {
1167+
let mut rdr = csv::Reader::from_reader(io::stdin());
1168+
for result in rdr.deserialize() {
1169+
let record: Record = result?;
1170+
println!("{:?}", record);
1171+
}
1172+
Ok(())
1173+
}
1174+
#
1175+
# fn main() {
1176+
# if let Err(err) = run() {
1177+
# println!("{}", err);
1178+
# process::exit(1);
1179+
# }
1180+
# }
1181+
```
1182+
1183+
If you compile and run this last example, then it should run to completion just
1184+
like the previous one but with the following output:
1185+
1186+
```text
1187+
$ cargo build
1188+
$ ./target/debug/csvtutor < uspop-null.csv
1189+
Record { latitude: 65.2419444, longitude: -165.2716667, population: Err(""), city: "Davidsons Landing", state: "AK" }
1190+
Record { latitude: 60.5544444, longitude: -151.2583333, population: Ok(7610), city: "Kenai", state: "AK" }
1191+
Record { latitude: 33.7133333, longitude: -87.3886111, population: Err(""), city: "Oakman", state: "AL" }
1192+
# ... and more
1193+
```
1194+
11391195
# Writing CSV
11401196
11411197
In this section we'll show a few examples that write CSV data. Writing CSV data

0 commit comments

Comments
 (0)