|
20 | 20 |
|
21 | 21 | ;; (set! *warn-on-reflection* true)
|
22 | 22 |
|
| 23 | +(def ^:dynamic *entity-offsets* nil) |
| 24 | +(def ^:dynamic *current-offset* nil) |
| 25 | + |
| 26 | +(def empty-bytes (byte-array 0)) |
| 27 | + |
23 | 28 | (def type->code
|
24 | 29 | {Long (byte 0)
|
25 | 30 | Double (byte 1)
|
|
80 | 85 | (bit-and 0xFF (bit-shift-right len 8))
|
81 | 86 | (bit-and 0xFF len)])))
|
82 | 87 |
|
83 |
| -;; to-bytes is required by the recursive concattenation operation |
84 |
| -(declare to-bytes) |
| 88 | +;; to-counted-bytes is required by the recursive concattenation operation |
| 89 | +(declare to-counted-bytes) |
85 | 90 |
|
86 | 91 | (defn concat-bytes
|
87 | 92 | "Takes multiple byte arrays and returns an array with all of the bytes concattenated"
|
88 |
| - [bas] |
| 93 | + ^bytes [bas] |
89 | 94 | (let [len (apply + (map alength bas))
|
90 | 95 | output (byte-array len)]
|
91 | 96 | (reduce (fn [offset arr]
|
|
133 | 138 | (when (and (< l max-short-long) (> l min-short-long))
|
134 | 139 | (bit-and data-mask l)))
|
135 | 140 |
|
| 141 | +(def ^:dynamic *number-bytes* nil) |
| 142 | +(def ^:dynamic *number-buffer* nil) |
| 143 | + |
| 144 | +(defn n-byte-number |
| 145 | + "Returns an array of n bytes representing the number x. |
| 146 | + Must be initialized for the current thread." |
| 147 | + [^long n ^long x] |
| 148 | + (.putLong *number-buffer* 0 x) |
| 149 | + (let [ret (byte-array n)] |
| 150 | + (System/arraycopy ^bytes *number-bytes* (int (- Long/BYTES n)) ret 0 (int n)) |
| 151 | + ret)) |
| 152 | + |
| 153 | +(defn num-bytes |
| 154 | + "Determines the number of bytes that can hold a value. |
| 155 | + From 2-4 tests, this preferences small numbers." |
| 156 | + [^long n] |
| 157 | + (let [f (neg? n) |
| 158 | + nn (if f (dec (- n)) n)] |
| 159 | + (if (<= nn 0x7FFF) |
| 160 | + (if (<= nn 0x7F) 1 2) |
| 161 | + (if (<= nn 0x7FFFFFFF) |
| 162 | + (if (<= nn 0x7FFFFF) 3 4) |
| 163 | + (if (<= nn 0x7FFFFFFFFFFF) |
| 164 | + (if (<= nn 0x7FFFFFFFFF) 5 6) |
| 165 | + (if (<= nn 0x7FFFFFFFFFFFFF) 7 8)))))) |
| 166 | + |
136 | 167 | (def constant-length?
|
137 | 168 | "The set of types that can be encoded in a constant number of bytes. Used for homogenous sequences."
|
138 | 169 | #{Long Double Date Instant UUID})
|
|
167 | 198 |
|
168 | 199 | Keyword
|
169 | 200 | (header [this len]
|
170 |
| - (if (< len 0x20) |
| 201 | + (if (< len 0x10) |
171 | 202 | (byte-array [(bit-or 0xC0 len)])
|
172 | 203 | (general-header (type->code Keyword) len)))
|
173 | 204 | (body [this]
|
174 |
| - (let [nms (namespace this) |
175 |
| - n (name this)] |
176 |
| - (.getBytes (subs (str this) 1) ^Charset utf8))) |
| 205 | + (.getBytes (subs (str this) 1) ^Charset utf8)) |
177 | 206 | (encapsulate-id [this]
|
178 | 207 | (encapsulate-sstr (subs (str this) 1) skey-type-mask))
|
179 | 208 |
|
180 | 209 | Long
|
181 | 210 | (header [this len]
|
182 |
| - (assert (= len Long/BYTES)) |
183 |
| - (byte-array [(bit-or 0xE0 (type->code Long))])) |
| 211 | + (assert (<= len Long/BYTES)) |
| 212 | + (byte-array [(bit-or 0xD0 len)])) |
184 | 213 | (body [^long this]
|
185 |
| - (let [b (byte-array Long/BYTES) |
186 |
| - bb (ByteBuffer/wrap b)] |
187 |
| - (.putLong bb 0 this) |
188 |
| - b)) |
| 214 | + (let [n (num-bytes this)] |
| 215 | + (n-byte-number n this))) |
189 | 216 | (encapsulate-id [this]
|
190 | 217 | (when-let [v (encapsulate-long this)]
|
191 | 218 | (bit-or long-type-mask v)))
|
|
220 | 247 | (assert (= len Long/BYTES))
|
221 | 248 | (byte-array [(bit-or 0xE0 (type->code Date))]))
|
222 | 249 | (body [^Date this]
|
223 |
| - (body (.getTime this))) |
| 250 | + (n-byte-number Long/BYTES (.getTime this))) |
224 | 251 | (encapsulate-id [this]
|
225 | 252 | (when-let [v (encapsulate-long (.getTime ^Date this))]
|
226 | 253 | (bit-or date-type-mask v)))
|
|
257 | 284 | (general-header (type->code ISeq) len))
|
258 | 285 | (body [this]
|
259 | 286 | (if-not (seq this)
|
260 |
| - (byte-array 0) |
| 287 | + empty-bytes |
261 | 288 | (let [fst (first this)
|
262 | 289 | t (type fst)
|
263 | 290 | homogeneous (and (constant-length? t) (every? #(instance? t %) this))
|
264 | 291 | [elt-fn prefix] (if homogeneous
|
265 |
| - (let [hdr (byte-array [(bit-or 0xE0 (type->code t))])] |
266 |
| - [#(vector (body %)) hdr]) |
267 |
| - [to-bytes zero-array])] |
268 |
| - (->> this |
269 |
| - (mapcat elt-fn) |
270 |
| - (cons prefix) |
271 |
| - concat-bytes)))) |
| 292 | + (if (= t Long) |
| 293 | + (let [elt-len (apply max (map num-bytes this)) |
| 294 | + arr-hdr (byte-array [(bit-or 0xD0 elt-len)])] ;; 0xDllll is the header byte for longs |
| 295 | + ;; integer homogenous arrays store the number in the header, with nil bodies |
| 296 | + [#(vector (n-byte-number elt-len %)) arr-hdr]) |
| 297 | + (let [arr-hdr (byte-array [(bit-or 0xE0 (type->code t))])] ;; 0xEtttt is the header byte for typed things |
| 298 | + ;; simple homogenous arrays store everything in the object header, with nil bodies |
| 299 | + [#(vector (body %)) arr-hdr])) |
| 300 | + [to-counted-bytes zero-array]) |
| 301 | + ;; start counting the bytes that are going into the buffer |
| 302 | + starting-offset @*current-offset* |
| 303 | + _ (vswap! *current-offset* + 3) ;; 2 bytes for a short header + 1 byte for the prefix array |
| 304 | + result (->> this |
| 305 | + ;; like a mapv but records the lengths of the data as it iterates through the seq |
| 306 | + (reduce (fn [arrays x] |
| 307 | + (let [offset @*current-offset* ;; save the start, as the embedded objects will update this |
| 308 | + [head body] (elt-fn x)] |
| 309 | + ;; regardless of what embedded objects have update the *current-offset* to, change it to the |
| 310 | + ;; start of the current object, plus its total size |
| 311 | + (vreset! *current-offset* (+ offset (alength head) (if body (alength body) 0))) |
| 312 | + ;; add the bytes of this object to the overall result of byte arrays |
| 313 | + (cond-> (conj! arrays head) |
| 314 | + body (conj! body)))) ;; only add the body if there is one |
| 315 | + (transient [prefix])) |
| 316 | + persistent! |
| 317 | + concat-bytes) |
| 318 | + update-lengths (fn [m u] |
| 319 | + (into {} (map (fn [[k v :as kv]] |
| 320 | + (if (> v starting-offset) [k (+ v u)] kv)) |
| 321 | + m))) |
| 322 | + rlen (alength result)] |
| 323 | + ;; correct offsets for longer headers |
| 324 | + (cond |
| 325 | + (> rlen 0x7FFF) (vswap! *entity-offsets* update-lengths 3) ;; total 5 after the 2 already added |
| 326 | + (> rlen 0xFF) (vswap! *entity-offsets* update-lengths 1)) ;; total 3 after the 2 already added |
| 327 | + result))) |
272 | 328 |
|
273 | 329 | IPersistentVector
|
274 | 330 | (header [this len] (header (or (seq this) '()) len))
|
|
278 | 334 | (header [this len]
|
279 | 335 | (general-header (type->code IPersistentMap) len))
|
280 | 336 | (body [this]
|
| 337 | + ;; If this is an identified object, then save it's location |
| 338 | + (doseq [id-attr [:db/id :db/ident :id]] |
| 339 | + (when-let [id (id-attr this)] |
| 340 | + (vswap! *entity-offsets* assoc id @*current-offset*))) |
281 | 341 | (body (apply concat (seq this))))
|
282 | 342 |
|
283 | 343 | Object
|
|
300 | 360 | (encapsulate-id [^asami.graph.InternalNode this]
|
301 | 361 | (bit-or node-type-mask (bit-and data-mask (.id this)))))
|
302 | 362 |
|
303 |
| -(defn to-bytes |
| 363 | +(defn to-counted-bytes |
304 | 364 | "Returns a tuple of byte arrays, representing the header and the body"
|
305 | 365 | [o]
|
306 | 366 | (let [^bytes b (body o)]
|
307 | 367 | [(header o (alength b)) b]))
|
| 368 | + |
| 369 | +(defn to-bytes |
| 370 | + "Returns a tuple of byte arrays, representing the header and the body" |
| 371 | + [o] |
| 372 | + (binding [*entity-offsets* (volatile! {}) |
| 373 | + *current-offset* (volatile! 0) |
| 374 | + *number-bytes* (byte-array Long/BYTES)] |
| 375 | + (binding [*number-buffer* (ByteBuffer/wrap *number-bytes*)] |
| 376 | + (conj (to-counted-bytes o) @*entity-offsets*)))) |
0 commit comments