diff --git a/r/src/array_to_vector.cpp b/r/src/array_to_vector.cpp index 432b49503e1..7af710bc7f3 100644 --- a/r/src/array_to_vector.cpp +++ b/r/src/array_to_vector.cpp @@ -595,7 +595,9 @@ class Converter_Dictionary : public Converter { case Type::UINT16: case Type::INT16: case Type::INT32: - // TODO: also add int64, uint32, uint64 downcasts, if possible + case Type::UINT32: + case Type::INT64: + case Type::UINT64: break; default: cpp11::stop("Cannot convert Dictionary Array of type `%s` to R", @@ -612,6 +614,16 @@ class Converter_Dictionary : public Converter { dictionary_ = CreateEmptyArray(dict_type.value_type()); } } + + // R factors store their codes in 32-bit integers, so dictionary arrays with + // more levels than that cannot be represented safely. + if (dictionary_->length() > std::numeric_limits::max()) { + const auto& dict_type = checked_cast(*chunked_array->type()); + cpp11::stop( + "Cannot convert Dictionary Array of type `%s` to R: dictionary has " + "more levels than an R factor can represent", + dict_type.ToString().c_str()); + } } SEXP Allocate(R_xlen_t n) const { @@ -653,6 +665,15 @@ class Converter_Dictionary : public Converter { case Type::INT32: return Ingest_some_nulls_Impl(data, array, start, n, chunk_index); + case Type::UINT32: + return Ingest_some_nulls_Impl(data, array, start, n, + chunk_index); + case Type::INT64: + return Ingest_some_nulls_Impl(data, array, start, n, + chunk_index); + case Type::UINT64: + return Ingest_some_nulls_Impl(data, array, start, n, + chunk_index); default: break; } diff --git a/r/tests/testthat/test-Table.R b/r/tests/testthat/test-Table.R index 1ca8832beb8..e404da1d029 100644 --- a/r/tests/testthat/test-Table.R +++ b/r/tests/testthat/test-Table.R @@ -371,6 +371,22 @@ test_that("Can create table with specific dictionary types", { } }) +test_that("Table converts dictionary arrays with wider index types back to R", { + fact <- example_data[, "fct"] + + tab_uint32 <- Table$create(fact, schema = schema(fct = dictionary(uint32(), utf8()))) + expect_equal(tab_uint32$schema, schema(fct = dictionary(uint32(), utf8()))) + expect_equal_data_frame(tab_uint32, fact) + + tab_int64 <- Table$create(fact, schema = schema(fct = dictionary(int64(), utf8()))) + expect_equal(tab_int64$schema, schema(fct = dictionary(int64(), utf8()))) + expect_equal_data_frame(tab_int64, fact) + + tab_uint64 <- Table$create(fact, schema = schema(fct = dictionary(uint64(), utf8()))) + expect_equal(tab_uint64$schema, schema(fct = dictionary(uint64(), utf8()))) + expect_equal_data_frame(tab_uint64, fact) +}) + test_that("Table unifies dictionary on conversion back to R (ARROW-8374)", { b1 <- record_batch(f = factor(c("a"), levels = c("a", "b"))) b2 <- record_batch(f = factor(c("c"), levels = c("c", "d")))