-
Notifications
You must be signed in to change notification settings - Fork 116
Add a basic EDN parser #149
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -47,3 +47,5 @@ pom.xml.asc | |
| /release-node/datomish/ | ||
| /release-node/goog/ | ||
| /release-node/honeysql/ | ||
|
|
||
| /edn/target/ | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,3 +1,17 @@ | ||
| [package] | ||
| name = "edn" | ||
| version = "0.0.1" | ||
| version = "0.1.0" | ||
| authors = ["Joe Walker <jwalker@mozilla.com>"] | ||
|
|
||
| license = "Apache-2.0" | ||
| repository = "https://github.com/mozilla/datomish" | ||
| description = "EDN Parser for Datomish" | ||
| build = "build.rs" | ||
| readme = "./README.md" | ||
|
|
||
| [dependencies] | ||
| num = "0.1.35" | ||
| ordered-float = "0.3.0" | ||
|
|
||
| [build-dependencies] | ||
| peg = "0.4" | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,2 @@ | ||
| # barnardsstar | ||
| An experimental EDN parser for Datomish | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same. |
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,15 @@ | ||
| // Copyright 2016 Mozilla | ||
| // | ||
| // Licensed under the Apache License, Version 2.0 (the "License"); you may not use | ||
| // this file except in compliance with the License. You may obtain a copy of the | ||
| // License at http://www.apache.org/licenses/LICENSE-2.0 | ||
| // Unless required by applicable law or agreed to in writing, software distributed | ||
| // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR | ||
| // CONDITIONS OF ANY KIND, either express or implied. See the License for the | ||
| // specific language governing permissions and limitations under the License. | ||
|
|
||
| extern crate peg; | ||
|
|
||
| fn main() { | ||
| peg::cargo_build("src/edn.rustpeg"); | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could you add Details: https://github.com/github/linguist/blob/master/README.md#using-gitattributes
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's fairly explicitly not rust though. It happens to have a
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hm. I suppose I was thinking that Rust syntax highlighting, indenting, etc. would be better than nothing? Up to you. There are some syntax highlighting plugins for it: https://github.com/treycordova/rustpeg.vim with filetype |
||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,124 @@ | ||
| // Copyright 2016 Mozilla | ||
| // | ||
| // Licensed under the Apache License, Version 2.0 (the "License"); you may not use | ||
| // this file except in compliance with the License. You may obtain a copy of the | ||
| // License at http://www.apache.org/licenses/LICENSE-2.0 | ||
| // Unless required by applicable law or agreed to in writing, software distributed | ||
| // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR | ||
| // CONDITIONS OF ANY KIND, either express or implied. See the License for the | ||
| // specific language governing permissions and limitations under the License. | ||
|
|
||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. License blocks in all files, please. |
||
| use std::collections::{BTreeSet, BTreeMap, LinkedList}; | ||
| use std::iter::FromIterator; | ||
| use num::BigInt; | ||
| use types::Value; | ||
| use ordered_float::OrderedFloat; | ||
|
|
||
| // Goal: Be able to parse https://github.com/edn-format/edn | ||
| // Also extensible to help parse http://docs.datomic.com/query.html | ||
|
|
||
| // Debugging hint: test using `cargo test --features peg/trace -- --nocapture` | ||
| // to trace where the parser is failing | ||
|
|
||
| // TODO: Support tagged elements | ||
| // TODO: Support comments | ||
| // TODO: Support discard | ||
|
|
||
| #[export] | ||
| nil -> Value = "nil" { | ||
| Value::Nil | ||
| } | ||
|
|
||
| #[export] | ||
| boolean -> Value = | ||
| "true" { Value::Boolean(true) } / | ||
| "false" { Value::Boolean(false) } | ||
|
|
||
| digit = [0-9] | ||
| sign = "-" / "+" | ||
|
|
||
| #[export] | ||
| bigint -> Value = b:$( sign? digit+ ) "N" { | ||
| Value::BigInteger(b.parse::<BigInt>().unwrap()) | ||
| } | ||
|
|
||
| #[export] | ||
| integer -> Value = i:$( sign? digit+ ) { | ||
| Value::Integer(i.parse::<i64>().unwrap()) | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It occurs to me that — forgetting the 'N' — the parser will panic. Now, we could build a strategy that always handles panics in the parser, allowing us to avoid error handling, but can we instead signal a failure to parse at this point? |
||
| } | ||
|
|
||
| frac = sign? digit+ "." digit+ | ||
| exp = sign? digit+ ("e" / "E") sign? digit+ | ||
| frac_exp = sign? digit+ "." digit+ ("e" / "E") sign? digit+ | ||
|
|
||
| // The order here is important - frac_exp must come before (exp / frac) or the | ||
| // parser assumes exp or frac when the float is really a frac_exp and fails | ||
| #[export] | ||
| float -> Value = f:$( frac_exp / exp / frac ) { | ||
| Value::Float(OrderedFloat(f.parse::<f64>().unwrap())) | ||
| } | ||
|
|
||
| // TODO: \newline, \return, \space and \tab | ||
| special_char = quote / tab | ||
| quote = "\\\"" | ||
| tab = "\\tab" | ||
| char = [^"] / special_char | ||
|
|
||
| #[export] | ||
| text -> Value = "\"" t:$( char* ) "\"" { | ||
| Value::Text(t.to_string()) | ||
| } | ||
|
|
||
| // TODO: Be more picky here | ||
| symbol_char_initial = [a-z] / [A-Z] / [0-9] / [*!_?$%&=<>/.] | ||
| symbol_char_subsequent = [a-z] / [A-Z] / [0-9] / [*!_?$%&=<>/.] / "-" | ||
|
|
||
| #[export] | ||
| symbol -> Value = s:$( symbol_char_initial symbol_char_subsequent* ) { | ||
| Value::Symbol(s.to_string()) | ||
| } | ||
|
|
||
| keyword_char_initial = ":" | ||
| // TODO: More chars here? | ||
| keyword_char_subsequent = [a-z] / [A-Z] / [0-9] / "/" | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. For future correction: both keywords and symbols can contain (There are similar rules around
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
|
|
||
| #[export] | ||
| keyword -> Value = k:$( keyword_char_initial keyword_char_subsequent+ ) { | ||
| Value::Keyword(k.to_string()) | ||
| } | ||
|
|
||
| #[export] | ||
| list -> Value = "(" __ v:(__ value)* __ ")" { | ||
| Value::List(LinkedList::from_iter(v)) | ||
| } | ||
|
|
||
| #[export] | ||
| vector -> Value = "[" __ v:(__ value)* __ "]" { | ||
| Value::Vector(v) | ||
| } | ||
|
|
||
| #[export] | ||
| set -> Value = "#{" __ v:(__ value)* __ "}" { | ||
| Value::Set(BTreeSet::from_iter(v)) | ||
| } | ||
|
|
||
| pair -> (Value, Value) = k:(value) " " v:(value) ", "? { | ||
| (k, v) | ||
| } | ||
|
|
||
| #[export] | ||
| map -> Value = "{" __ v:(pair)* __ "}" { | ||
| Value::Map(BTreeMap::from_iter(v)) | ||
| } | ||
|
|
||
| // It's important that float comes before integer or the parser assumes that | ||
| // floats are integers and fails to parse | ||
| #[export] | ||
| value -> Value | ||
| = nil / boolean / float / bigint / integer / text / | ||
| keyword / symbol / | ||
| list / vector / map / set | ||
|
|
||
| whitespace = (" " / "\r" / "\n" / "\t") | ||
|
|
||
| __ = whitespace* | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -8,4 +8,17 @@ | |
| // CONDITIONS OF ANY KIND, either express or implied. See the License for the | ||
| // specific language governing permissions and limitations under the License. | ||
|
|
||
| pub mod keyword; | ||
| #![allow(dead_code)] | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You've unrooted the existing |
||
|
|
||
| extern crate ordered_float; | ||
| extern crate num; | ||
|
|
||
| pub mod types; | ||
|
|
||
| pub mod parse { | ||
| include!(concat!(env!("OUT_DIR"), "/edn.rs")); | ||
| } | ||
|
|
||
| fn main() { | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This shouldn't be in a |
||
| println!("Use cargo test"); | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,85 @@ | ||
| // Copyright 2016 Mozilla | ||
| // | ||
| // Licensed under the Apache License, Version 2.0 (the "License"); you may not use | ||
| // this file except in compliance with the License. You may obtain a copy of the | ||
| // License at http://www.apache.org/licenses/LICENSE-2.0 | ||
| // Unless required by applicable law or agreed to in writing, software distributed | ||
| // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR | ||
| // CONDITIONS OF ANY KIND, either express or implied. See the License for the | ||
| // specific language governing permissions and limitations under the License. | ||
|
|
||
| use std::collections::{BTreeSet, BTreeMap, LinkedList}; | ||
| use std::cmp::{Ordering, Ord, PartialOrd}; | ||
| use num::BigInt; | ||
| use ordered_float::OrderedFloat; | ||
|
|
||
| /// Value represents one of the allowed values in an EDN string. | ||
| #[derive(PartialEq, Eq, Hash, Debug)] | ||
| pub enum Value { | ||
| Nil, | ||
| Boolean(bool), | ||
| Integer(i64), | ||
| BigInteger(BigInt), | ||
| // https://users.rust-lang.org/t/hashmap-key-cant-be-float-number-type-why/7892 | ||
| Float(OrderedFloat<f64>), | ||
| Text(String), | ||
| Symbol(String), | ||
| Keyword(String), | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Probably this should be
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. … but that's #154. So roll on for now. |
||
| Vector(Vec<Value>), | ||
| List(LinkedList<Value>), | ||
| // We're using BTree{Set, Map} rather than Hash{Set, Map} because the BTree variants | ||
| // implement Hash (unlike the Hash variants which don't in order to preserve O(n) hashing | ||
| // time which is hard given recurrsive data structures) | ||
| // See https://internals.rust-lang.org/t/implementing-hash-for-hashset-hashmap/3817/1 | ||
| Set(BTreeSet<Value>), | ||
| Map(BTreeMap<Value, Value>), | ||
| } | ||
|
|
||
| use self::Value::*; | ||
|
|
||
| impl PartialOrd for Value { | ||
| fn partial_cmp(&self, other: &Value) -> Option<Ordering> { | ||
| Some(self.cmp(other)) | ||
| } | ||
| } | ||
|
|
||
| // TODO: Check we follow the equality rules at the bottom of https://github.com/edn-format/edn | ||
| impl Ord for Value { | ||
| fn cmp(&self, other: &Value) -> Ordering { | ||
|
|
||
| let ord_order = to_ord(self).cmp(&to_ord(other)); | ||
| match *self { | ||
| Nil => match *other { Nil => Ordering::Equal, _ => ord_order }, | ||
| Boolean(bs) => match *other { Boolean(bo) => bo.cmp(&bs), _ => ord_order }, | ||
| BigInteger(ref bs) => match *other { BigInteger(ref bo) => bo.cmp(&bs), _ => ord_order }, | ||
| Integer(is) => match *other { Integer(io) => io.cmp(&is), _ => ord_order }, | ||
| Float(ref fs) => match *other { Float(ref fo) => fo.cmp(&fs), _ => ord_order }, | ||
| Text(ref ts) => match *other { Text(ref to) => to.cmp(&ts), _ => ord_order }, | ||
| Symbol(ref ss) => match *other { Symbol(ref so) => so.cmp(&ss), _ => ord_order }, | ||
| Keyword(ref ks) => match *other { Keyword(ref ko) => ko.cmp(&ks), _ => ord_order }, | ||
| Vector(ref vs) => match *other { Vector(ref vo) => vo.cmp(&vs), _ => ord_order }, | ||
| List(ref ls) => match *other { List(ref lo) => lo.cmp(&ls), _ => ord_order }, | ||
| Set(ref ss) => match *other { Set(ref so) => so.cmp(&ss), _ => ord_order }, | ||
| Map(ref ms) => match *other { Map(ref mo) => mo.cmp(&ms), _ => ord_order }, | ||
| } | ||
| } | ||
| } | ||
|
|
||
| fn to_ord(value: &Value) -> i32 { | ||
| match *value { | ||
| Nil => 0, | ||
| Boolean(_) => 1, | ||
| Integer(_) => 2, | ||
| BigInteger(_) => 3, | ||
| Float(_) => 4, | ||
| Text(_) => 5, | ||
| Symbol(_) => 6, | ||
| Keyword(_) => 7, | ||
| Vector(_) => 8, | ||
| List(_) => 9, | ||
| Set(_) => 10, | ||
| Map(_) => 12, | ||
| } | ||
| } | ||
|
|
||
| pub struct Pair(Value, Value); | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Two 'Datomish' to replace.