From 5d7697a8438e2ec6ce7f2e9a231cc127e0751ef0 Mon Sep 17 00:00:00 2001 From: Nic Crane Date: Thu, 28 Sep 2023 21:40:45 +0100 Subject: [PATCH] GH-37842: [R] Implement infer_schema.data.frame() (#37843) ### Rationale for this change Users will be able to easily see the schema which their `data.frame` object will have when it's converted into an Arrwo table. ### What changes are included in this PR? Implements `infer_schema()` method for `data.frame` objects. Before: ``` r library(arrow) schema(mtcars) #> Error in UseMethod("infer_schema"): no applicable method for 'infer_schema' applied to an object of class "data.frame" ``` After: ``` r library(arrow) schema(mtcars) #> Schema #> mpg: double #> cyl: double #> disp: double #> hp: double #> drat: double #> wt: double #> qsec: double #> vs: double #> am: double #> gear: double #> carb: double #> #> See $metadata for additional Schema metadata ``` ### Are these changes tested? Yes ### Are there any user-facing changes? Yes * Closes: #37842 Authored-by: Nic Crane Signed-off-by: Nic Crane --- r/NAMESPACE | 1 + r/R/schema.R | 3 +++ r/tests/testthat/test-schema.R | 5 +++++ 3 files changed, 9 insertions(+) diff --git a/r/NAMESPACE b/r/NAMESPACE index 21f88b4180d24..d49255f781f94 100644 --- a/r/NAMESPACE +++ b/r/NAMESPACE @@ -112,6 +112,7 @@ S3method(infer_schema,ArrowTabular) S3method(infer_schema,Dataset) S3method(infer_schema,RecordBatchReader) S3method(infer_schema,arrow_dplyr_query) +S3method(infer_schema,data.frame) S3method(infer_type,ArrowDatum) S3method(infer_type,Expression) S3method(infer_type,blob) diff --git a/r/R/schema.R b/r/R/schema.R index 1ad18e314191e..ac0604b2b345c 100644 --- a/r/R/schema.R +++ b/r/R/schema.R @@ -285,6 +285,9 @@ infer_schema.Dataset <- function(x) x$schema #' @export infer_schema.arrow_dplyr_query <- function(x) implicit_schema(x) +#' @export +infer_schema.data.frame <- function(x) schema(!!!lapply(x, infer_type)) + #' @export names.Schema <- function(x) x$names diff --git a/r/tests/testthat/test-schema.R b/r/tests/testthat/test-schema.R index db91cee330960..b1dc06592955e 100644 --- a/r/tests/testthat/test-schema.R +++ b/r/tests/testthat/test-schema.R @@ -295,9 +295,14 @@ test_that("schema name assignment", { test_that("schema extraction", { skip_if_not_available("dataset") + tbl <- arrow_table(example_data) + expect_equal(schema(example_data), tbl$schema) expect_equal(schema(tbl), tbl$schema) + expect_equal(schema(data.frame(a = 1, a = "x", check.names = FALSE)), schema(a = double(), a = string())) + expect_equal(schema(data.frame()), schema()) + ds <- InMemoryDataset$create(example_data) expect_equal(schema(ds), ds$schema)