Skip to content

Commit 4a0eb94

Browse files
authored
Add :disable-na-as-missing? for fixed types too. (#463)
1 parent 4d2e306 commit 4a0eb94

File tree

2 files changed

+11
-6
lines changed

2 files changed

+11
-6
lines changed

src/tech/v3/dataset/io/column_parsers.clj

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -197,7 +197,8 @@
197197
^IMutList failed-values
198198
^RoaringBitmap failed-indexes
199199
column-name
200-
^:unsynchronized-mutable ^long max-idx]
200+
^:unsynchronized-mutable ^long max-idx
201+
disable-na-as-missing?]
201202
dtype-proto/PECount
202203
(ecount [_this] (inc max-idx))
203204
Indexed
@@ -216,7 +217,7 @@
216217
;;be in the space of the container or it could require the parse-fn
217218
;;to make it.
218219
(let [parsed-value (cond
219-
(missing-value? value false)
220+
(missing-value? value disable-na-as-missing?)
220221
:tech.v3.dataset/missing
221222
(and (identical? (dtype/datatype value) container-dtype)
222223
(not (instance? String value)))
@@ -299,20 +300,18 @@
299300
missing (bitmap/->bitmap)]
300301
(FixedTypeParser. container dtype missing-value parse-fn
301302
missing failed-values failed-indexes
302-
cname -1)))
303-
303+
cname -1
304+
(get options :disable-na-as-missing?))))
304305

305306
(defn parser-kwd-list->parser-tuples
306307
[kwd-list]
307308
(mapv parser-entry->parser-tuple kwd-list))
308309

309-
310310
(def default-parser-datatype-sequence
311311
[:bool :int16 :int32 :int64 :float64 :uuid
312312
:packed-duration :packed-local-date
313313
:zoned-date-time :string :text :boolean])
314314

315-
316315
(defn- promote-container
317316
^IMutList [old-container ^RoaringBitmap missing new-dtype options]
318317
(let [n-elems (dtype/ecount old-container)

test/tech/v3/dataset_test.clj

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1756,6 +1756,12 @@
17561756
(is (= expected-column (:a ds1)))
17571757
(is (= expected-column (:a ds2)))))
17581758

1759+
(deftest fixed-type-disable-na-as-missing
1760+
(let [data [{:a "no"} {:a "NA"} {:a "na"}]
1761+
ds1 (ds/->dataset data {:parser-fn :string :disable-na-as-missing? true})
1762+
ds2 (ds/->dataset data {:parser-fn :string :disable-na-as-missing? false})]
1763+
(is (= ["no" "NA" "na"] (:a ds1)))
1764+
(is (= ["no" nil nil] (:a ds2)))))
17591765

17601766
(deftest sub-buffer-col-incorrect-missing
17611767
(let [ds (-> (ds/->dataset {:a (range 20)})

0 commit comments

Comments
 (0)