From fa2c86da01f81ecb2d4f23fcb755cf2d188d085e Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger Date: Wed, 13 May 2026 18:15:00 +0200 Subject: [PATCH 1/4] add setnafill support for character vectors --- NEWS.md | 2 +- inst/tests/nafill.Rraw | 17 ++++++++++++----- src/nafill.c | 5 ++--- 3 files changed, 15 insertions(+), 9 deletions(-) diff --git a/NEWS.md b/NEWS.md index 5de499cc8..1266fb844 100644 --- a/NEWS.md +++ b/NEWS.md @@ -14,7 +14,7 @@ ### NEW FEATURES -1. `nafill()`, `setnafill()` extended to work on logical and factor vectors (part of [#3992](https://github.com/Rdatatable/data.table/issues/3992)). Includes support for `Date`, `IDate`, `POSIXct`, etc. `nafill()` works for character vectors, but not yet `setnafill()`. Thanks @jangorecki for the request and @jangorecki and @MichaelChirico for the PRs. +1. `nafill()`, `setnafill()` extended to work on logical, factor and character vectors (part of [#3992](https://github.com/Rdatatable/data.table/issues/3992)). Includes support for `Date`, `IDate`, `POSIXct` and character vectors. Thanks @jangorecki for the request and @jangorecki, @MichaelChirico and @ben-schwen for the PRs. 2. `[,showProgress=]` and `options(datatable.showProgress)` now accept an integer to control the progress bar update interval in seconds, allowing finer control over progress reporting frequency; `TRUE` uses the default 3-second interval, [#6514](https://github.com/Rdatatable/data.table/issues/6514). Thanks @ethanbsmith for the report and @ben-schwen for the PR. diff --git a/inst/tests/nafill.Rraw b/inst/tests/nafill.Rraw index 4ab0d30b9..af6f21afa 100644 --- a/inst/tests/nafill.Rraw +++ b/inst/tests/nafill.Rraw @@ -452,10 +452,15 @@ test(16.07, setnafill(copy(DT), fill="a", cols='f1')$f1, test(16.08, setnafill(copy(DT), fill="a", cols=c('f1', 'f2'))[, .(f1, f2)], data.table(f1=as.factor(c("a", "a", "a", "b", "a", "a", "b", "c", "a", "a")), f2=as.factor(c("a", "a", "c", "b", "a", "a", "b", "a", "a", "a")))) -test(16.09, setnafill(DT, fill="z", cols='c1'), error="not yet supported") -# test(16.10, setnafill(copy(DT), fill="z", cols=c('c1', 'c2'))[, .(c1, c2)], -# data.table(c1=c("z", "z", "a", "b", "z", "z", "c", "d", "z", "z"), -# c2=c("z", "z", "d", "c", "z", "z", "b", "a", "z", "z"))) +test(16.09, setnafill(copy(DT), fill="z", cols='c1')$c1, + c("z", "z", "a", "b", "z", "z", "c", "d", "z", "z")) +test(16.10, setnafill(copy(DT), fill="z", cols=c('c1', 'c2'))[, .(c1, c2)], + data.table(c1=c("z", "z", "a", "b", "z", "z", "c", "d", "z", "z"), + c2=c("z", "z", "d", "c", "z", "z", "b", "a", "z", "z"))) +test(16.101, setnafill(copy(DT), type="locf", cols='c1')$c1, + c(NA, NA, "a", "b", "b", "b", "c", "d", "d", "d")) +test(16.102, setnafill(copy(DT), type="nocb", cols='c1')$c1, + c("a", "a", "a", "b", "c", "c", "c", "d", NA, NA)) test(16.11, setnafill(copy(DT), fill=as.POSIXct("2027-01-01"), cols='t1')$t1, replace(DT$t1, c(1:2, 5:6, 9:10), as.POSIXct("2027-01-01"))) test(16.12, setnafill(copy(DT), fill=as.POSIXct("2027-01-01"), cols=c('t1', 't2'))[, .(t1, t2)], @@ -467,7 +472,9 @@ test(16.13, setnafill(copy(DT), fill=list(TRUE, 9L, 9.0, "a", as.POSIXct("2027-0 d1=c(9.0, 9L, 0L, 1L, 9L, 9L, 2:3, 9L, 9L), f1=as.factor(c("a", "a", "a", "b", "a", "a", "b", "c", "a", "a")), t1=replace(DT$t1, c(1:2, 5:6, 9:10), as.POSIXct("2027-01-01")))) -test(16.14, setnafill(DT, cols=c("l1", "c1")), error="not yet supported") +test(16.14, setnafill(copy(DT), fill=list(TRUE, "z"), cols=c("l1", "c1"))[, .(l1, c1)], + data.table(l1=c(TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, FALSE, FALSE, TRUE, TRUE), + c1=c("z", "z", "a", "b", "z", "z", "c", "d", "z", "z"))) DT = data.table(l=c(NA, FALSE), i=c(NA, 0L)) setnafill(DT, fill=list(TRUE, 1L)) test(16.15, DT, data.table(l=c(TRUE, FALSE), i=1:0)) diff --git a/src/nafill.c b/src/nafill.c index 72ed178c7..abba67cba 100644 --- a/src/nafill.c +++ b/src/nafill.c @@ -203,11 +203,10 @@ SEXP nafillR(SEXP obj, SEXP type, SEXP fill, SEXP nan_is_na_arg, SEXP inplace, S } vans[i] = ((ans_t) { .dbl_v=(double *)p, .int_v=(int *)p, .int64_v=(int64_t *)p, .char_v=(SEXP)p, .status=0, .message={"\0","\0","\0","\0"} }); } - } else if (any_char) { - error(_("In-place filling of character columns is not yet supported.")); } else { for (R_len_t i=0; i Date: Thu, 14 May 2026 15:27:25 +0200 Subject: [PATCH 2/4] use copy instead binplace bool --- src/nafill.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/nafill.c b/src/nafill.c index abba67cba..7a8fe4fdd 100644 --- a/src/nafill.c +++ b/src/nafill.c @@ -128,7 +128,7 @@ SEXP nafillR(SEXP obj, SEXP type, SEXP fill, SEXP nan_is_na_arg, SEXP inplace, S if (verbose) tic = omp_get_wtime(); - bool binplace = LOGICAL(inplace)[0]; + bool copy = !LOGICAL(inplace)[0]; if (!IS_TRUE_OR_FALSE(nan_is_na_arg)) error(_("'%s' must be TRUE or FALSE"), "nan_is_na"); // # nocov bool nan_is_na = LOGICAL(nan_is_na_arg)[0]; @@ -136,7 +136,7 @@ SEXP nafillR(SEXP obj, SEXP type, SEXP fill, SEXP nan_is_na_arg, SEXP inplace, S SEXP x = R_NilValue; bool obj_scalar = isVectorAtomic(obj); if (obj_scalar) { - if (binplace) + if (!copy) error(_("'x' argument is atomic vector, in-place update is supported only for list/data.table")); else if (!isReal(obj) && TYPEOF(obj) != INTSXP && !isLogical(obj) && !isString(obj)) error(_("'x' argument (type %s) not supported."), type2char(TYPEOF(obj))); @@ -181,7 +181,7 @@ SEXP nafillR(SEXP obj, SEXP type, SEXP fill, SEXP nan_is_na_arg, SEXP inplace, S } } SEXP ans = R_NilValue; - if (!binplace) { + if (copy) { ans = PROTECT(allocVector(VECSXP, nx)); protecti++; for (R_len_t i=0; i Date: Thu, 14 May 2026 15:51:52 +0200 Subject: [PATCH 3/4] add comment about any_char parallel guard --- src/nafill.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/nafill.c b/src/nafill.c index 7a8fe4fdd..43146f9dc 100644 --- a/src/nafill.c +++ b/src/nafill.c @@ -246,6 +246,7 @@ SEXP nafillR(SEXP obj, SEXP type, SEXP fill, SEXP nan_is_na_arg, SEXP inplace, S fillp[i] = SEXPPTR_RO(VECTOR_ELT(fill, i)); // do like this so we can use in parallel region } } + // need any_char guard to avoid parallelizing on character columns #pragma omp parallel for if (nx>1 && !any_char) num_threads(getDTthreads(nx, true)) for (R_len_t i=0; i Date: Thu, 14 May 2026 20:04:11 +0200 Subject: [PATCH 4/4] add tests to check whether setnafill really happens in-place --- inst/tests/nafill.Rraw | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/inst/tests/nafill.Rraw b/inst/tests/nafill.Rraw index af6f21afa..98c866310 100644 --- a/inst/tests/nafill.Rraw +++ b/inst/tests/nafill.Rraw @@ -478,6 +478,21 @@ test(16.14, setnafill(copy(DT), fill=list(TRUE, "z"), cols=c("l1", "c1"))[, .(l1 DT = data.table(l=c(NA, FALSE), i=c(NA, 0L)) setnafill(DT, fill=list(TRUE, 1L)) test(16.15, DT, data.table(l=c(TRUE, FALSE), i=1:0)) +# setnafill truly works in-place +DT = data.table(l1=c(NA, TRUE, NA), i1=c(NA, 1L, NA), d1=c(NA, 1.5, NA), + f1=as.factor(c(NA, "a", NA)), c1=c(NA, "a", NA), + t1=as.POSIXct(c(NA, "2025-01-01", NA))) +setnafill(DT, fill=list(TRUE, 9L, 9.0, "a", "z", as.POSIXct("2027-01-01")), + cols=c("l1", "i1", "d1", "f1", "c1", "t1")) +test(16.16, DT, data.table(l1=c(TRUE, TRUE, TRUE), i1=c(9L, 1L, 9L), d1=c(9.0, 1.5, 9.0), + f1=as.factor(c("a", "a", "a")), c1=c("z", "a", "z"), + t1=as.POSIXct(c("2027-01-01", "2025-01-01", "2027-01-01")))) +DT = data.table(x=c(NA, "a", NA, "b", NA)) +setnafill(DT, type="locf", cols="x") +test(16.17, DT, data.table(x=c(NA, "a", "a", "b", "b"))) +DT = data.table(x=c(NA, "a", NA, "b", NA)) +setnafill(DT, type="nocb", cols="x") +test(16.18, DT, data.table(x=c("a", "a", "b", "b", NA))) # related to !is.integer(verbose) test(99.1, data.table(a=1,b=2)[1,1, verbose=1], error="verbose must be logical or integer")