Skip to content

Commit a07c7b6

Browse files
besokBoris Zhguchev
andauthored
61 regex perf (#62)
* add rgex bench * fix inter * init impl * add config * fix complains --------- Co-authored-by: Boris Zhguchev <[email protected]>
1 parent 668b626 commit a07c7b6

File tree

11 files changed

+405
-86
lines changed

11 files changed

+405
-86
lines changed

CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,4 +38,8 @@
3838
* **`0.3.5`**
3939
* add `!` negation operation in filters
4040
* allow using () in filters
41+
* **`0.5`**
42+
* add config for jsonpath
43+
* add an option to add a regex cache for boosting performance
44+
4145

Cargo.toml

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
[package]
22
name = "jsonpath-rust"
33
description = "The library provides the basic functionality to find the set of the data according to the filtering query."
4-
version = "0.4.0"
4+
version = "0.5.0"
55
authors = ["BorisZhguchev <[email protected]>"]
66
edition = "2018"
77
license-file = "LICENSE"
@@ -17,6 +17,12 @@ regex = "1"
1717
pest = "2.0"
1818
pest_derive = "2.0"
1919
thiserror = "1.0.50"
20+
lazy_static = "1.4"
21+
once_cell = "1.19.0"
2022

2123
[dev-dependencies]
22-
lazy_static = "1.0"
24+
criterion = "0.5.1"
25+
26+
[[bench]]
27+
name = "regex_bench"
28+
harness = false

README.md

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -389,7 +389,50 @@ fn test() {
389389
** If the value has been modified during the search, there is no way to find a path of a new value.
390390
It can happen if we try to find a length() of array, for in stance.**
391391

392+
## Configuration
392393

394+
The JsonPath provides a wat to configure the search by using `JsonPathConfig`.
395+
396+
```rust
397+
pub fn main() {
398+
let cfg = JsonPathConfig::new(RegexCache::Implemented(DefaultRegexCacheInst::default()));
399+
}
400+
```
401+
402+
### Regex cache
403+
The configuration provides an ability to use a regex cache to improve the [performance](https://github.com/besok/jsonpath-rust/issues/61)
404+
405+
To instantiate the cache needs to use `RegexCache` enum with the implementation of the trait `RegexCacheInst`.
406+
Default implementation `DefaultRegexCacheInst` uses `Arc<Mutex<HashMap<String,Regex>>>`.
407+
The pair of Box<Value> or Value and config can be used:
408+
```rust
409+
pub fn main(){
410+
let cfg = JsonPathConfig::new(RegexCache::Implemented(DefaultRegexCacheInst::default()));
411+
let json = Box::new(json!({
412+
"author":"abcd(Rees)",
413+
}));
414+
415+
let _v = (json, cfg).path("$.[?(@.author ~= '.*(?i)d\\(Rees\\)')]")
416+
.expect("the path is correct");
417+
418+
419+
}
420+
```
421+
or using `JsonPathFinder` :
422+
423+
```rust
424+
fn main() {
425+
let cfg = JsonPathConfig::new(RegexCache::Implemented(DefaultRegexCacheInst::default()));
426+
let finder = JsonPathFinder::from_str_with_cfg(
427+
r#"{"first":{"second":[{"active":1},{"passive":1}]}}"#,
428+
"$.first.second[?(@.active)]",
429+
cfg,
430+
).unwrap();
431+
let slice_of_data: Vec<&Value> = finder.find_slice();
432+
let js = json!({"active":1});
433+
assert_eq!(slice_of_data, vec![JsonPathValue::Slice(&js, "$.first.second[0]".to_string())]);
434+
}
435+
```
393436

394437
## The structure
395438

benches/regex_bench.rs

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
use criterion::{black_box, criterion_group, criterion_main, Criterion};
2+
use jsonpath_rust::path::config::cache::{DefaultRegexCacheInst, RegexCache};
3+
use jsonpath_rust::path::config::JsonPathConfig;
4+
use jsonpath_rust::{JsonPathFinder, JsonPathInst, JsonPathQuery};
5+
use once_cell::sync::Lazy;
6+
use serde_json::{json, Value};
7+
use std::str::FromStr;
8+
9+
fn regex_perf_test_with_cache(cfg: JsonPathConfig) {
10+
let json = Box::new(json!({
11+
"author":"abcd(Rees)",
12+
}));
13+
14+
let _v = (json, cfg)
15+
.path("$.[?(@.author ~= '.*(?i)d\\(Rees\\)')]")
16+
.expect("the path is correct");
17+
}
18+
19+
fn regex_perf_test_without_cache() {
20+
let json = Box::new(json!({
21+
"author":"abcd(Rees)",
22+
}));
23+
24+
let _v = json
25+
.path("$.[?(@.author ~= '.*(?i)d\\(Rees\\)')]")
26+
.expect("the path is correct");
27+
}
28+
29+
pub fn criterion_benchmark(c: &mut Criterion) {
30+
let cfg = JsonPathConfig::new(RegexCache::Implemented(DefaultRegexCacheInst::default()));
31+
c.bench_function("regex bench without cache", |b| {
32+
b.iter(|| regex_perf_test_without_cache())
33+
});
34+
c.bench_function("regex bench with cache", |b| {
35+
b.iter(|| regex_perf_test_with_cache(cfg.clone()))
36+
});
37+
}
38+
39+
criterion_group!(benches, criterion_benchmark);
40+
criterion_main!(benches);

src/lib.rs

Lines changed: 50 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,7 @@
116116

117117
use crate::parser::model::JsonPath;
118118
use crate::parser::parser::parse_json_path;
119+
use crate::path::config::JsonPathConfig;
119120
use crate::path::{json_path_instance, PathInstance};
120121
use serde_json::Value;
121122
use std::convert::TryInto;
@@ -182,8 +183,12 @@ impl FromStr for JsonPathInst {
182183
}
183184

184185
impl JsonPathInst {
185-
pub fn find_slice<'a>(&'a self, value: &'a Value) -> Vec<JsonPtr<'a, Value>> {
186-
json_path_instance(&self.inner, value)
186+
pub fn find_slice<'a>(
187+
&'a self,
188+
value: &'a Value,
189+
cfg: JsonPathConfig,
190+
) -> Vec<JsonPtr<'a, Value>> {
191+
json_path_instance(&self.inner, value, cfg)
187192
.find(JsonPathValue::from_root(value))
188193
.into_iter()
189194
.filter(|v| v.has_value())
@@ -224,13 +229,27 @@ impl JsonPathQuery for Box<Value> {
224229
}
225230
}
226231

232+
impl JsonPathQuery for (Box<Value>, JsonPathConfig) {
233+
fn path(self, query: &str) -> Result<Value, String> {
234+
let p = JsonPathInst::from_str(query)?;
235+
Ok(JsonPathFinder::new_with_cfg(self.0, Box::new(p), self.1).find())
236+
}
237+
}
238+
227239
impl JsonPathQuery for Value {
228240
fn path(self, query: &str) -> Result<Value, String> {
229241
let p = JsonPathInst::from_str(query)?;
230242
Ok(JsonPathFinder::new(Box::new(self), Box::new(p)).find())
231243
}
232244
}
233245

246+
impl JsonPathQuery for (Value, JsonPathConfig) {
247+
fn path(self, query: &str) -> Result<Value, String> {
248+
let p = JsonPathInst::from_str(query)?;
249+
Ok(JsonPathFinder::new_with_cfg(Box::new(self.0), Box::new(p), self.1).find())
250+
}
251+
}
252+
234253
/// just to create a json path value of data
235254
/// Example:
236255
/// - json_path_value(&json) = `JsonPathValue::Slice(&json)`
@@ -294,6 +313,7 @@ type JsPathStr = String;
294313
pub(crate) fn jsp_idx(prefix: &str, idx: usize) -> String {
295314
format!("{}[{}]", prefix, idx)
296315
}
316+
297317
pub(crate) fn jsp_obj(prefix: &str, key: &str) -> String {
298318
format!("{}.['{}']", prefix, key)
299319
}
@@ -337,7 +357,7 @@ impl<'a, Data: Clone + Debug + Default> JsonPathValue<'a, Data> {
337357
}
338358

339359
impl<'a, Data> JsonPathValue<'a, Data> {
340-
fn only_no_value(input: &Vec<JsonPathValue<'a, Data>>) -> bool {
360+
fn only_no_value(input: &[JsonPathValue<'a, Data>]) -> bool {
341361
!input.is_empty() && input.iter().filter(|v| v.has_value()).count() == 0
342362
}
343363
fn map_vec(data: Vec<(&'a Data, JsPathStr)>) -> Vec<JsonPathValue<'a, Data>> {
@@ -407,12 +427,26 @@ impl<'a, Data> JsonPathValue<'a, Data> {
407427
pub struct JsonPathFinder {
408428
json: Box<Value>,
409429
path: Box<JsonPathInst>,
430+
cfg: JsonPathConfig,
410431
}
411432

412433
impl JsonPathFinder {
413434
/// creates a new instance of [JsonPathFinder]
414435
pub fn new(json: Box<Value>, path: Box<JsonPathInst>) -> Self {
415-
JsonPathFinder { json, path }
436+
JsonPathFinder {
437+
json,
438+
path,
439+
cfg: JsonPathConfig::default(),
440+
}
441+
}
442+
443+
pub fn new_with_cfg(json: Box<Value>, path: Box<JsonPathInst>, cfg: JsonPathConfig) -> Self {
444+
JsonPathFinder { json, path, cfg }
445+
}
446+
447+
/// sets a cfg with a new one
448+
pub fn set_cfg(&mut self, cfg: JsonPathConfig) {
449+
self.cfg = cfg
416450
}
417451

418452
/// updates a path with a new one
@@ -440,10 +474,15 @@ impl JsonPathFinder {
440474
let path = Box::new(JsonPathInst::from_str(path)?);
441475
Ok(JsonPathFinder::new(json, path))
442476
}
477+
pub fn from_str_with_cfg(json: &str, path: &str, cfg: JsonPathConfig) -> Result<Self, String> {
478+
let json = serde_json::from_str(json).map_err(|e| e.to_string())?;
479+
let path = Box::new(JsonPathInst::from_str(path)?);
480+
Ok(JsonPathFinder::new_with_cfg(json, path, cfg))
481+
}
443482

444483
/// creates an instance to find a json slice from the json
445484
pub fn instance(&self) -> PathInstance {
446-
json_path_instance(&self.path.inner, &self.json)
485+
json_path_instance(&self.path.inner, &self.json, self.cfg.clone())
447486
}
448487
/// finds a slice of data in the set json.
449488
/// The result is a vector of references to the incoming structure.
@@ -494,6 +533,7 @@ impl JsonPathFinder {
494533

495534
#[cfg(test)]
496535
mod tests {
536+
use crate::path::config::JsonPathConfig;
497537
use crate::JsonPathQuery;
498538
use crate::JsonPathValue::{NoValue, Slice};
499539
use crate::{jp_v, JsonPathFinder, JsonPathInst, JsonPathValue};
@@ -1194,7 +1234,7 @@ mod tests {
11941234
let query = JsonPathInst::from_str("$..book[?(@.author size 10)].title")
11951235
.expect("the path is correct");
11961236

1197-
let results = query.find_slice(&json);
1237+
let results = query.find_slice(&json, JsonPathConfig::default());
11981238
let v = results.first().expect("to get value");
11991239

12001240
// V can be implicitly converted to &Value
@@ -1257,7 +1297,7 @@ mod tests {
12571297
v,
12581298
vec![Slice(
12591299
&json!({"second":{"active": 1}}),
1260-
"$.['first']".to_string()
1300+
"$.['first']".to_string(),
12611301
)]
12621302
);
12631303

@@ -1271,7 +1311,7 @@ mod tests {
12711311
v,
12721312
vec![Slice(
12731313
&json!({"second":{"active": 1}}),
1274-
"$.['first']".to_string()
1314+
"$.['first']".to_string(),
12751315
)]
12761316
);
12771317

@@ -1285,7 +1325,7 @@ mod tests {
12851325
v,
12861326
vec![Slice(
12871327
&json!({"second":{"active": 1}}),
1288-
"$.['first']".to_string()
1328+
"$.['first']".to_string(),
12891329
)]
12901330
);
12911331

@@ -1299,7 +1339,7 @@ mod tests {
12991339
v,
13001340
vec![Slice(
13011341
&json!({"second":{"active": 1}}),
1302-
"$.['first']".to_string()
1342+
"$.['first']".to_string(),
13031343
)]
13041344
);
13051345
}

src/path/config.rs

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
pub mod cache;
2+
3+
use crate::path::config::cache::RegexCache;
4+
5+
/// Configuration to adjust the jsonpath search
6+
#[derive(Clone, Default)]
7+
pub struct JsonPathConfig {
8+
/// cache to provide
9+
pub regex_cache: RegexCache,
10+
}
11+
12+
impl JsonPathConfig {
13+
pub fn new(regex_cache: RegexCache) -> Self {
14+
Self { regex_cache }
15+
}
16+
}

0 commit comments

Comments
 (0)