Skip to content

Commit

Permalink
feat: make arrow_binary etc. to blob
Browse files Browse the repository at this point in the history
Signed-off-by: SHIMA Tatsuya <[email protected]>
  • Loading branch information
eitsupi committed Feb 20, 2025
1 parent f6bfa7b commit 1811175
Show file tree
Hide file tree
Showing 8 changed files with 20 additions and 25 deletions.
4 changes: 2 additions & 2 deletions r/R/metadata.R
Original file line number Diff line number Diff line change
Expand Up @@ -245,14 +245,14 @@ remove_attributes <- function(x) {
removed_attributes <- c("row.names", "names")
} else if (inherits(x, "factor")) {
removed_attributes <- c("class", "levels")
} else if (inherits(x, c("integer64", "Date", "arrow_binary", "arrow_large_binary"))) {
removed_attributes <- c("class")
} else if (inherits(x, "arrow_fixed_size_binary")) {
removed_attributes <- c("class", "byte_width")
} else if (inherits(x, "POSIXct")) {
removed_attributes <- c("class", "tzone")
} else if (inherits(x, "hms") || inherits(x, "difftime")) {
removed_attributes <- c("class", "units")
} else if (inherits(x, c("integer64", "Date", "blob", "arrow_binary", "arrow_large_binary"))) {
removed_attributes <- c("class")
}
removed_attributes
}
Expand Down
4 changes: 2 additions & 2 deletions r/src/arrow_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -151,8 +151,8 @@ static inline bool can_convert_native(SEXP x) {
return Rf_inherits(x, "factor") || Rf_inherits(x, "Date") ||
Rf_inherits(x, "integer64") || Rf_inherits(x, "POSIXct") ||
Rf_inherits(x, "hms") || Rf_inherits(x, "difftime") ||
Rf_inherits(x, "data.frame") || Rf_inherits(x, "arrow_binary") ||
Rf_inherits(x, "arrow_large_binary") ||
Rf_inherits(x, "data.frame") || Rf_inherits(x, "blob") ||
Rf_inherits(x, "arrow_binary") || Rf_inherits(x, "arrow_large_binary") ||
Rf_inherits(x, "arrow_fixed_size_binary") ||
Rf_inherits(x, "vctrs_unspecified") || Rf_inherits(x, "AsIs");
}
Expand Down
2 changes: 1 addition & 1 deletion r/src/r_to_arrow.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ RVectorType GetVectorType(SEXP x) {
return POSIXLT;
}

if (Rf_inherits(x, "arrow_binary")) {
if (Rf_inherits(x, "arrow_binary") || Rf_inherits(x, "blob")) {
return BINARY;
}

Expand Down
10 changes: 5 additions & 5 deletions r/src/symbols.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -60,11 +60,11 @@ SEXP data::classes_tbl_df =
precious(cpp11::writable::strings({"tbl_df", "tbl", "data.frame"}));

SEXP data::classes_arrow_binary =
precious(cpp11::writable::strings({"arrow_binary", "vctrs_vctr", "list"}));
SEXP data::classes_arrow_large_binary =
precious(cpp11::writable::strings({"arrow_large_binary", "vctrs_vctr", "list"}));
SEXP data::classes_arrow_fixed_size_binary =
precious(cpp11::writable::strings({"arrow_fixed_size_binary", "vctrs_vctr", "list"}));
precious(cpp11::writable::strings({"arrow_binary", "blob", "vctrs_vctr", "list"}));
SEXP data::classes_arrow_large_binary = precious(
cpp11::writable::strings({"arrow_large_binary", "blob", "vctrs_vctr", "list"}));
SEXP data::classes_arrow_fixed_size_binary = precious(
cpp11::writable::strings({"arrow_fixed_size_binary", "blob", "vctrs_vctr", "list"}));
SEXP data::classes_factor = precious(cpp11::writable::strings({"factor"}));
SEXP data::classes_ordered = precious(cpp11::writable::strings({"ordered", "factor"}));

Expand Down
8 changes: 4 additions & 4 deletions r/src/type_infer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -150,14 +150,14 @@ std::shared_ptr<arrow::DataType> InferArrowTypeFromVector<VECSXP>(SEXP x) {
return arrow::fixed_size_binary(INTEGER(byte_width)[0]);
}

if (Rf_inherits(x, "arrow_binary")) {
return arrow::binary();
}

if (Rf_inherits(x, "arrow_large_binary")) {
return arrow::large_binary();
}

if (Rf_inherits(x, "arrow_binary") || Rf_inherits(x, "blob")) {
return arrow::binary();
}

// Check attr(x, "ptype") for an appropriate R prototype
SEXP ptype = Rf_getAttrib(x, symbols::ptype);
if (!Rf_isNull(ptype)) {
Expand Down
5 changes: 2 additions & 3 deletions r/tests/testthat/test-Array.R
Original file line number Diff line number Diff line change
Expand Up @@ -800,16 +800,15 @@ test_that("arrow_array() handles vector -> fixed size list arrays", {
})

test_that("Handling string data with embedded nuls", {
raws <- structure(
raws <- blob::as_blob(
list(
as.raw(c(0x70, 0x65, 0x72, 0x73, 0x6f, 0x6e)),
as.raw(c(0x77, 0x6f, 0x6d, 0x61, 0x6e)),
as.raw(c(0x6d, 0x61, 0x00, 0x6e)), # <-- there's your nul, 0x00
as.raw(c(0x66, 0x00, 0x00, 0x61, 0x00, 0x6e)), # multiple nuls
as.raw(c(0x63, 0x61, 0x6d, 0x65, 0x72, 0x61)),
as.raw(c(0x74, 0x76))
),
class = c("arrow_binary", "vctrs_vctr", "list")
)
)
expect_error(
rawToChar(raws[[3]]),
Expand Down
6 changes: 2 additions & 4 deletions r/tests/testthat/test-RecordBatch.R
Original file line number Diff line number Diff line change
Expand Up @@ -606,15 +606,13 @@ test_that("RecordBatch supports cbind", {
})

test_that("Handling string data with embedded nuls", {
raws <- Array$create(structure(list(
raws <- Array$create(blob::as_blob(list(
as.raw(c(0x70, 0x65, 0x72, 0x73, 0x6f, 0x6e)),
as.raw(c(0x77, 0x6f, 0x6d, 0x61, 0x6e)),
as.raw(c(0x6d, 0x61, 0x00, 0x6e)), # <-- there's your nul, 0x00
as.raw(c(0x63, 0x61, 0x6d, 0x65, 0x72, 0x61)),
as.raw(c(0x74, 0x76))
),
class = c("arrow_binary", "vctrs_vctr", "list")
))
)))
batch_with_nul <- record_batch(a = 1:5, b = raws)
batch_with_nul$b <- batch_with_nul$b$cast(utf8())

Expand Down
6 changes: 2 additions & 4 deletions r/tests/testthat/test-chunked-array.R
Original file line number Diff line number Diff line change
Expand Up @@ -463,16 +463,14 @@ test_that("Converting a chunked array unifies factors (ARROW-8374)", {
})

test_that("Handling string data with embedded nuls", {
raws <- structure(list(
raws <- blob::as_blob(list(
as.raw(c(0x70, 0x65, 0x72, 0x73, 0x6f, 0x6e)),
as.raw(c(0x77, 0x6f, 0x6d, 0x61, 0x6e)),
as.raw(c(0x6d, 0x61, 0x00, 0x6e)), # <-- there's your nul, 0x00
as.raw(c(0x66, 0x00, 0x00, 0x61, 0x00, 0x6e)), # multiple nuls
as.raw(c(0x63, 0x61, 0x6d, 0x65, 0x72, 0x61)),
as.raw(c(0x74, 0x76))
),
class = c("arrow_binary", "vctrs_vctr", "list")
)
))
chunked_array_with_nul <- ChunkedArray$create(raws)$cast(utf8())

v <- expect_error(as.vector(chunked_array_with_nul), NA)
Expand Down

0 comments on commit 1811175

Please sign in to comment.