Skip to content

Commit aa02827

Browse files
authored
Merge pull request #221 from threatgrid/ingest-limit
Ingest limit
2 parents d3b6752 + 10a21c6 commit aa02827

File tree

8 files changed

+133
-48
lines changed

8 files changed

+133
-48
lines changed

CHANGELOG.md

+6-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,9 @@
11
# Change Log
22

3+
## [2.2.2] - 2021-10-19
4+
### Added
5+
- Added a new `:input-limit` option to transact. When included, the transaction will attempt to keep at or below this number of triples.
6+
37
## [2.2.1] - 2021-10-16
48
### Fixed
59
- Accepting java.time.Instant objects on the `since` and `as-of` database functions.
@@ -285,7 +289,8 @@
285289
### Added
286290
- Introduced Update Annotations
287291

288-
[Unreleased]: https://github.com/threatgrid/asami/compare/2.2.1...HEAD
292+
[Unreleased]: https://github.com/threatgrid/asami/compare/2.2.2...HEAD
293+
[2.2.2]: https://github.com/threatgrid/asami/compare/2.2.1...2.2.2
289294
[2.2.1]: https://github.com/threatgrid/asami/compare/2.2.0...2.2.1
290295
[2.2.0]: https://github.com/threatgrid/asami/compare/2.1.3...2.2.0
291296
[2.1.3]: https://github.com/threatgrid/asami/compare/2.1.2...2.1.3

README.md

+2-2
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ Asami can be made available to clojure by adding the following to a `deps.edn` f
3232
```clojure
3333
{
3434
:deps {
35-
org.clojars.quoll/asami {:mvn/version "2.2.1"}
35+
org.clojars.quoll/asami {:mvn/version "2.2.2"}
3636
}
3737
}
3838
```
@@ -41,7 +41,7 @@ This makes Asami available to a repl that is launched with the `clj` or `clojure
4141

4242
Alternatively, Asami can be added for the Leiningen build tool by adding this to the `:dependencies` section of the `project.clj` file:
4343
```clojure
44-
[org.clojars.quoll/asami "2.2.1"]
44+
[org.clojars.quoll/asami "2.2.2"]
4545
```
4646

4747
### Important Note for databases before 2.1.0

project.clj

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
(defproject org.clojars.quoll/asami "2.2.1"
1+
(defproject org.clojars.quoll/asami "2.2.2"
22
:description "An in memory graph store for Clojure and ClojureScript"
33
:url "http://github.com/threatgrid/asami"
44
:license {:name "Eclipse Public License"

src/asami/core.cljc

+7-3
Original file line numberDiff line numberDiff line change
@@ -158,7 +158,8 @@
158158
(s/one s/Any "attribute")
159159
(s/one s/Any "value")]]
160160
(s/optional-key :executor) s/Any
161-
(s/optional-key :update-fn) (s/pred fn?)}
161+
(s/optional-key :update-fn) (s/pred fn?)
162+
(s/optional-key :input-limit) s/Num}
162163
[s/Any]))
163164

164165
(s/defn transact-async
@@ -178,6 +179,7 @@
178179
Alternatively, a map may have a :tx-triples key. If so, then this is a seq of 3 element vectors.
179180
Each vector in a :tx-triples seq will contain the raw values for [entity attribute value]
180181
:executor An optional value in the tx-info containing an executor to be used to run the CompletableFuture
182+
:input-limit contains an optional maximum number of statements to insert (approx)
181183
Entities and assertions may have attributes that are keywords with a trailing ' character.
182184
When these appear an existing attribute without that character will be replaced. This only occurs for the top level
183185
entity, and is not applied to attributes appearing in nested structures.
@@ -192,7 +194,7 @@
192194
:tx-data sequence of datoms produced by the transaction
193195
:tempids mapping of the temporary IDs in entities to the allocated nodes"
194196
[{:keys [name state] :as connection} :- ConnectionType
195-
{:keys [tx-data tx-triples executor update-fn] :as tx-info} :- TransactData]
197+
{:keys [tx-data tx-triples executor update-fn input-limit] :as tx-info} :- TransactData]
196198

197199
;; Detached databases need to be reattached when transacted into
198200
(check-attachment connection)
@@ -222,7 +224,9 @@
222224
(fn [graph]
223225
;; building triples returns a tuple of assertions, retractions, tempids
224226
(let [[_ _ tempids :as result]
225-
(entities/build-triples graph (seq-wrapper (or tx-data tx-info)))]
227+
(entities/build-triples graph
228+
(seq-wrapper (or tx-data tx-info))
229+
input-limit)]
226230
(vreset! vtempids tempids)
227231
result))))
228232
;; pull out the info captured during the transaction

src/asami/entities.cljc

+56-29
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,11 @@
4141
(some append-attribute? obj-keys)
4242
(some #(and (map? %) (contains-updates? %)) (vals obj)))))
4343

44+
(s/defn ^:private minus :- (s/maybe s/Num)
45+
[limit :- (s/maybe s/Num)
46+
n :- s/Num]
47+
(when limit (- limit n)))
48+
4449
(s/defn ^:private entity-triples :- [(s/one [Triple] "New triples")
4550
(s/one [Triple] "Retractions")
4651
(s/one {s/Any s/Any} "New list of ID mappings")
@@ -55,7 +60,8 @@
5560
[graph :- GraphType
5661
{id :db/id ident :db/ident ident2 :id :as obj} :- EntityMap
5762
existing-ids :- {s/Any s/Any}
58-
top-ids :- #{s/Any}]
63+
top-ids :- #{s/Any}
64+
limit :- (s/maybe s/Num)]
5965
(let [[new-obj removals additions]
6066
(if (contains-updates? obj)
6167
(do
@@ -106,10 +112,16 @@
106112
(let [v (obj (append->annotate attr))
107113
new-node (node/new-node graph)]
108114
[[(find-tail head) :tg/rest new-node] [new-node :tg/first v] [head :tg/contains v]])) attr-heads)]
109-
[new-obj removals append-triples]))
115+
(if (and limit (> (count append-triples) limit))
116+
(throw (ex-info "Limit reached" {:overflow true}))
117+
[new-obj removals append-triples])))
110118
[obj nil nil])
111119

112-
[triples ids new-top-ids] (writer/ident-map->triples graph new-obj existing-ids top-ids)
120+
[triples ids new-top-ids] (writer/ident-map->triples graph
121+
new-obj
122+
existing-ids
123+
top-ids
124+
(minus limit (count additions)))
113125

114126
;; if updates occurred new entity statements are redundant
115127
triples (if (or (seq removals) (seq additions) (not (identical? obj new-obj)))
@@ -138,29 +150,44 @@
138150
(s/one {s/Any s/Any} "ID map of created objects")]
139151
"Converts a set of transaction data into triples.
140152
Returns a tuple containing [triples removal-triples tempids]"
141-
[graph :- gr/GraphType
142-
data :- [s/Any]]
143-
(let [[retract-stmts new-data] (util/divide' #(= :db/retract (first %)) data)
144-
ref->id (partial resolve-lookup-refs graph)
145-
retractions (mapv (comp (partial mapv ref->id) rest) retract-stmts)
146-
add-triples (fn [[acc racc ids top-ids] obj]
147-
(if (map? obj)
148-
(let [[triples rtriples new-ids new-top-ids] (entity-triples graph obj ids top-ids)]
149-
[(into acc triples) (into racc rtriples) new-ids new-top-ids])
150-
(if (and (seqable? obj)
151-
(= 4 (count obj))
152-
(= :db/add (first obj)))
153-
(or
154-
(when (= (nth obj 2) :db/id)
155-
(let [id (nth obj 3)]
156-
(when (temp-id? id)
157-
(let [new-id (or (ids id) (node/new-node graph))]
158-
[(conj acc (assoc (vec-rest obj) 2 new-id))
159-
racc
160-
(assoc ids (or id new-id) new-id)
161-
top-ids]))))
162-
[(conj acc (mapv #(or (ids %) (ref->id %)) (rest obj))) racc ids top-ids])
163-
(throw (ex-info (str "Bad data in transaction: " obj) {:data obj})))))
164-
[triples rtriples id-map top-level-ids] (reduce add-triples [[] retractions {} #{}] new-data)
165-
triples (writer/backtrack-unlink-top-entities top-level-ids triples)]
166-
[triples rtriples id-map]))
153+
([graph :- gr/GraphType
154+
data :- [s/Any]]
155+
(build-triples graph data nil))
156+
([graph :- gr/GraphType
157+
data :- [s/Any]
158+
limit :- (s/maybe s/Num)]
159+
(let [[retract-stmts new-data] (util/divide' #(= :db/retract (first %)) data)
160+
ref->id (partial resolve-lookup-refs graph)
161+
retractions (mapv (comp (partial mapv ref->id) rest) retract-stmts)
162+
add-triples (fn [[acc racc ids top-ids :as last-result] obj]
163+
(if (and limit (> (count acc) limit))
164+
(reduced last-result)
165+
(if (map? obj)
166+
(try
167+
(let [[triples rtriples new-ids new-top-ids] (entity-triples graph
168+
obj
169+
ids
170+
top-ids
171+
(minus limit (count acc)))]
172+
[(into acc triples) (into racc rtriples) new-ids new-top-ids])
173+
(catch #?(:clj Exception :cljs :default) e
174+
(if-let [overflow (:overflow (ex-data e))]
175+
(reduced last-result)
176+
(throw e))))
177+
(if (and (seqable? obj)
178+
(= 4 (count obj))
179+
(= :db/add (first obj)))
180+
(or
181+
(when (= (nth obj 2) :db/id)
182+
(let [id (nth obj 3)]
183+
(when (temp-id? id)
184+
(let [new-id (or (ids id) (node/new-node graph))]
185+
[(conj acc (assoc (vec-rest obj) 2 new-id))
186+
racc
187+
(assoc ids (or id new-id) new-id)
188+
top-ids]))))
189+
[(conj acc (mapv #(or (ids %) (ref->id %)) (rest obj))) racc ids top-ids])
190+
(throw (ex-info (str "Bad data in transaction: " obj) {:data obj}))))))
191+
[triples rtriples id-map top-level-ids] (reduce add-triples [[] retractions {} #{}] new-data)
192+
triples (writer/backtrack-unlink-top-entities top-level-ids triples)]
193+
[triples rtriples id-map])))

src/asami/entities/writer.cljc

+21-10
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,8 @@
3333

3434
(def ^:dynamic *triples* nil)
3535

36+
(def ^:dynamic *limit* nil)
37+
3638
(def ^:dynamic *current-entity* nil)
3739

3840
(def ^:dynamic *top-level-entities* nil)
@@ -43,6 +45,13 @@
4345

4446
(declare value-triples map->triples)
4547

48+
(defn add-triples!
49+
[op data]
50+
(vswap! *triples* op data)
51+
(when (and *limit*
52+
(> (count @*triples*) *limit*))
53+
(throw (ex-info "overflow" {:overflow true}))))
54+
4655
(defn list-triples
4756
"Creates the triples for a list. Returns a node and list of nodes representing contents of the list."
4857
[vlist]
@@ -52,17 +61,17 @@
5261
[list-ref value-nodes]
5362
(let [node-ref (node/new-node *current-graph*)
5463
_ (when last-ref
55-
(vswap! *triples* conj [last-ref :tg/rest node-ref]))
64+
(add-triples! conj [last-ref :tg/rest node-ref]))
5665
value-ref (value-triples v)]
57-
(vswap! *triples* conj [node-ref (node/data-attribute *current-graph* value-ref) value-ref])
66+
(add-triples! conj [node-ref (node/data-attribute *current-graph* value-ref) value-ref])
5867
(recur (or list-ref node-ref) node-ref (conj value-nodes value-ref) vs))))))
5968

6069
(s/defn value-triples-list
6170
[vlist :- [s/Any]]
6271
(if (seq vlist)
6372
(let [[node value-nodes] (list-triples vlist)]
6473
(doseq [vn value-nodes]
65-
(vswap! *triples* conj [node (node/container-attribute *current-graph* vn) vn]))
74+
(add-triples! conj [node (node/container-attribute *current-graph* vn) vn]))
6675
node)
6776
:tg/empty-list))
6877

@@ -86,7 +95,7 @@
8695
(when-not (or (= node *current-entity*)
8796
(@*top-level-entities* node)
8897
(= node :tg/empty-list))
89-
(vswap! *triples* conj [*current-entity* :tg/owns node]))
98+
(add-triples! conj [*current-entity* :tg/owns node]))
9099
node)
91100

92101
(defn value-triples
@@ -110,9 +119,9 @@
110119
(if (set? value)
111120
(doseq [v value]
112121
(let [vr (value-triples v)]
113-
(vswap! *triples* conj [entity-ref property vr])))
122+
(add-triples! conj [entity-ref property vr])))
114123
(let [v (value-triples value)]
115-
(vswap! *triples* conj [entity-ref property v]))))
124+
(add-triples! conj [entity-ref property v]))))
116125

117126
(defn new-node
118127
[id]
@@ -185,22 +194,24 @@
185194
"Converts a single map to triples for an ID'ed map"
186195
([graph :- GraphType
187196
j :- EntityMap]
188-
(ident-map->triples graph j {} #{}))
197+
(ident-map->triples graph j {} #{} nil))
189198
([graph :- GraphType
190199
j :- EntityMap
191200
id-map :- {s/Any s/Any}
192-
top-level-ids :- #{s/Any}]
201+
top-level-ids :- #{s/Any}
202+
limit :- (s/maybe s/Num)]
193203
(binding [*current-graph* graph
194204
*id-map* (volatile! id-map)
195205
*triples* (volatile! [])
206+
*limit* limit
196207
*top-level-entities* (volatile! top-level-ids)]
197208
(let [derefed-id-map (ident-map->triples j)]
198209
[@*triples* derefed-id-map @*top-level-entities*])))
199210
([j :- EntityMap]
200211
(let [node-ref (map->triples j)]
201212
(if (:db/ident j)
202-
(vswap! *triples* conj [node-ref :tg/entity true])
203-
(vswap! *triples* into [[node-ref :db/ident (name-for node-ref)] [node-ref :tg/entity true]]))
213+
(add-triples! conj [node-ref :tg/entity true])
214+
(add-triples! into [[node-ref :db/ident (name-for node-ref)] [node-ref :tg/entity true]]))
204215
@*id-map*)))
205216

206217
(defn backtrack-unlink-top-entities

test/asami/api_test.cljc

+20
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,26 @@
4242
(is (instance? asami.multi_graph.MultiGraph (:graph (:db @(:state cm)))))
4343
(is (= "banana" (:name cm)))))
4444

45+
(deftest test-input-limit
46+
(let [c (connect "asami:mem://limit1")
47+
maksim {:db/id -1
48+
:name "Maksim"
49+
:age 45
50+
:wife {:db/id -2}
51+
:aka ["Maks Otto von Stirlitz", "Jack Ryan"]}
52+
anna {:db/id -2
53+
:name "Anna"
54+
:age 31
55+
:husband {:db/id -1}
56+
:aka ["Anitzka"]}
57+
{:keys [tx-data]} @(transact c {:tx-data [maksim anna]})
58+
c2 (connect "asami:mem://limit2")
59+
{tx-data2 :tx-data} @(transact c2 {:tx-data [maksim anna] :input-limit 15})]
60+
(is (= 21 (count tx-data)))
61+
(is (= 21 (q '[:find (count *) . :where [?s ?p ?o]] c)))
62+
(is (= 13 (count tx-data2)))
63+
(is (= 13 (q '[:find (count *) . :where [?s ?p ?o]] c2)))))
64+
4565
(deftest load-data
4666
(let [c (connect "asami:mem://test1")
4767
r (transact c {:tx-data [{:db/ident "bobid"

test/asami/entities/test_entity.cljc

+20-2
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,8 @@
1414
:cljs [schema.test :as st :refer-macros [deftest]])
1515
#?(:clj [clojure.test :as t :refer [is]]
1616
:cljs [clojure.test :as t :refer-macros [is]]))
17-
#?(:clj (:import [java.time ZonedDateTime])
17+
#?(:clj (:import [java.time ZonedDateTime]
18+
[clojure.lang ExceptionInfo])
1819
:cljs (:import [goog.date DateTime])))
1920

2021
(defn parseDateTime [s]
@@ -287,6 +288,23 @@
287288
(is (= data obj1))
288289
(is (= (assoc data :sub (dissoc d0 :db/ident)) obj2))))
289290

291+
(deftest test-entity-limits
292+
(let [m1 {:prop "val"}
293+
m2 {:prop "val", :p2 2}
294+
m3 {:prop "val", :p2 22, :p3 [42 54]}
295+
m4 {:prop "val"}
296+
m5 {:prop "val2"}
297+
m6 {:prop "val" :arr [{:a 1} {:a 2} ["nested"]]}
298+
m7 {:prop "val", :p2 22, :p3 []}]
299+
(is (= 3 (count (first (ident-map->triples empty-graph m1 {} #{} 18)))))
300+
(is (= 4 (count (first (ident-map->triples empty-graph m2 {} #{} 18)))))
301+
(is (= 11 (count (first (ident-map->triples empty-graph m3 {} #{} 18)))))
302+
(is (= 3 (count (first (ident-map->triples empty-graph m4 {} #{} 18)))))
303+
(is (= 3 (count (first (ident-map->triples empty-graph m5 {} #{} 18)))))
304+
(is (thrown-with-msg? ExceptionInfo #"overflow"
305+
(ident-map->triples empty-graph m6 {} #{} 18)))
306+
(is (= 5 (count (first (ident-map->triples empty-graph m7 {} #{} 18)))))))
307+
290308
(deftest test-looped-ref->entity
291309
(let [d1 {:db/ident :t1, :task/name "Task 1", :task/requires [#:db{:ident :t3}]}
292310
d2 {:db/ident :t2, :task/name "Task 2", :task/requires [#:db{:ident :t1}]}
@@ -306,7 +324,7 @@
306324
(defn ident-map->graph
307325
([m] (ident-map->graph m {}))
308326
([m mp]
309-
(let [[triples result-map] (ident-map->triples empty-graph m mp #{})]
327+
(let [[triples result-map] (ident-map->triples empty-graph m mp #{} nil)]
310328
[(set triples) result-map])))
311329

312330
(deftest test-ident-map->triples

0 commit comments

Comments
 (0)