From 0e840e260c8e6f4eed1762f170115dd4bd001887 Mon Sep 17 00:00:00 2001 From: Sebastian Mendez Date: Mon, 20 Oct 2025 16:14:28 +0200 Subject: [PATCH] Add :disable-na-as-missing? for fixed types too. --- src/tech/v3/dataset/io/column_parsers.clj | 11 +++++------ test/tech/v3/dataset_test.clj | 6 ++++++ 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/src/tech/v3/dataset/io/column_parsers.clj b/src/tech/v3/dataset/io/column_parsers.clj index 3f7f8a83..1478e76d 100644 --- a/src/tech/v3/dataset/io/column_parsers.clj +++ b/src/tech/v3/dataset/io/column_parsers.clj @@ -197,7 +197,8 @@ ^IMutList failed-values ^RoaringBitmap failed-indexes column-name - ^:unsynchronized-mutable ^long max-idx] + ^:unsynchronized-mutable ^long max-idx + disable-na-as-missing?] dtype-proto/PECount (ecount [_this] (inc max-idx)) Indexed @@ -216,7 +217,7 @@ ;;be in the space of the container or it could require the parse-fn ;;to make it. (let [parsed-value (cond - (missing-value? value false) + (missing-value? value disable-na-as-missing?) :tech.v3.dataset/missing (and (identical? (dtype/datatype value) container-dtype) (not (instance? String value))) @@ -299,20 +300,18 @@ missing (bitmap/->bitmap)] (FixedTypeParser. container dtype missing-value parse-fn missing failed-values failed-indexes - cname -1))) - + cname -1 + (get options :disable-na-as-missing?)))) (defn parser-kwd-list->parser-tuples [kwd-list] (mapv parser-entry->parser-tuple kwd-list)) - (def default-parser-datatype-sequence [:bool :int16 :int32 :int64 :float64 :uuid :packed-duration :packed-local-date :zoned-date-time :string :text :boolean]) - (defn- promote-container ^IMutList [old-container ^RoaringBitmap missing new-dtype options] (let [n-elems (dtype/ecount old-container) diff --git a/test/tech/v3/dataset_test.clj b/test/tech/v3/dataset_test.clj index 84617d9f..440af96d 100644 --- a/test/tech/v3/dataset_test.clj +++ b/test/tech/v3/dataset_test.clj @@ -1756,6 +1756,12 @@ (is (= expected-column (:a ds1))) (is (= expected-column (:a ds2))))) +(deftest fixed-type-disable-na-as-missing + (let [data [{:a "no"} {:a "NA"} {:a "na"}] + ds1 (ds/->dataset data {:parser-fn :string :disable-na-as-missing? true}) + ds2 (ds/->dataset data {:parser-fn :string :disable-na-as-missing? false})] + (is (= ["no" "NA" "na"] (:a ds1))) + (is (= ["no" nil nil] (:a ds2))))) (deftest sub-buffer-col-incorrect-missing (let [ds (-> (ds/->dataset {:a (range 20)})