From 4f7bcc3e0e8aec17b754ce0eb282160393564ec8 Mon Sep 17 00:00:00 2001 From: Ivan K Date: Sat, 20 Dec 2025 23:47:43 +0300 Subject: [PATCH 1/2] regression test --- inst/tests/tests.Rraw | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index d7fe0f7f9..8b151609b 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -21896,3 +21896,8 @@ DT = data.table(x = strings) setorder(DT, x) test(2350, DT[["x"]], sort.int(strings, method='radix')) rm(DT, strings) + +# do remove columns in freshly unserialized data.tables, #7488 +DT = unserialize(serialize(as.data.table(mtcars), NULL)) +test(2351, DT[,carb:=NULL], as.data.table(mtcars)[,carb:=NULL]) +rm(DT) From 87734013279d81ffdf4097d370dcac3aa0975a87 Mon Sep 17 00:00:00 2001 From: Ivan K Date: Sat, 20 Dec 2025 23:55:34 +0300 Subject: [PATCH 2/2] reallocate non-ok table before removing columns --- R/data.table.R | 77 ++++++++++++++++++++++++++++---------------------- 1 file changed, 43 insertions(+), 34 deletions(-) diff --git a/R/data.table.R b/R/data.table.R index 9255b3669..3893fe2fc 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -1191,8 +1191,9 @@ replace_dot_alias = function(e) { if (any(m<1L | ncol(x) DT(,:=) tests 2212.16-19 (#5113) where a shallow copy is routine for data.frame - if ((ok<1L) || (truelength(x) < ncol(x)+length(newnames))) { - DT = x # in case getOption contains "ncol(DT)" as it used to. TODO: warn and then remove - n = length(newnames) + eval(getOption("datatable.alloccol")) # TODO: warn about expressions and then drop the eval() - # i.e. reallocate at the size as if the new columns were added followed by setalloccol(). - name = substitute(x) - if (is.name(name) && ok && verbose) { # && NAMED(x)>0 (TO DO) # ok here includes -1 (loaded from disk) - catf("Growing vector of column pointers from truelength %d to %d. A shallow copy has been taken, see ?setalloccol. Only a potential issue if two variables point to the same data (we can't yet detect that well) and if not you can safely ignore this. To avoid this message you could setalloccol() first, deep copy first using copy(), wrap with suppressWarnings() or increase the 'datatable.alloccol' option.\n", truelength(x), n) - # #1729 -- copying to the wrong environment here can cause some confusion - if (ok == -1L) catf("Note that the shallow copy will assign to the environment from which := was called. That means for example that if := was called within a function, the original table may be unaffected.\n") - - # Verbosity should not issue warnings, so cat rather than warning. - # TO DO: Add option 'datatable.pedantic' to turn on warnings like this. - - # TO DO ... comments moved up from C ... - # Note that the NAMED(dt)>1 doesn't work because .Call - # always sets to 2 (see R-ints), it seems. Work around - # may be possible but not yet working. When the NAMED test works, we can drop allocwarn argument too - # because that's just passed in as FALSE from [<- where we know `*tmp*` isn't really NAMED=2. - # Note also that this growing will happen for missing columns assigned NULL, too. But so rare, we - # don't mind. - } - setalloccol(x, n, verbose=verbose) # always assigns to calling scope; i.e. this scope - if (is.name(name)) { - assign(as.character(name),x,parent.frame(),inherits=TRUE) - } else if (.is_simple_extraction(name)) { - .reassign_extracted_table(name, x) - } # TO DO: else if env$<- or list$<- - } } } else if (is.numeric(lhs)) { lhs = names_x[m] } + # ok <- selfrefok above called without verbose -- only activated when + # ok=-1 which will trigger setalloccol with verbose in the next + # branch, which again calls _selfrefok and returns the message then + # !is.data.table for DF |> DT(,:=) tests 2212.16-19 (#5113) where a shallow copy is routine for data.frame + if ( + ( + !is.null(newnames) || # adding new columns + is.null(jval) || (is.list(jval) && any(vapply_1b(jval, is.null))) # removing columns + ) && ( + (ok<1L) || # unsafe to resize + (truelength(x) < ncol(x)+length(newnames)) # not enough space for new columns + ) + ) { + DT = x # in case getOption contains "ncol(DT)" as it used to. TODO: warn and then remove + n = length(newnames) + eval(getOption("datatable.alloccol")) # TODO: warn about expressions and then drop the eval() + # i.e. reallocate at the size as if the new columns were added followed by setalloccol(). + name = substitute(x) + if (is.name(name) && ok && verbose) { # && NAMED(x)>0 (TO DO) # ok here includes -1 (loaded from disk) + catf("Growing vector of column pointers from truelength %d to %d. A shallow copy has been taken, see ?setalloccol. Only a potential issue if two variables point to the same data (we can't yet detect that well) and if not you can safely ignore this. To avoid this message you could setalloccol() first, deep copy first using copy(), wrap with suppressWarnings() or increase the 'datatable.alloccol' option.\n", truelength(x), n) + # #1729 -- copying to the wrong environment here can cause some confusion + if (ok == -1L) catf("Note that the shallow copy will assign to the environment from which := was called. That means for example that if := was called within a function, the original table may be unaffected.\n") + + # Verbosity should not issue warnings, so cat rather than warning. + # TO DO: Add option 'datatable.pedantic' to turn on warnings like this. + + # TO DO ... comments moved up from C ... + # Note that the NAMED(dt)>1 doesn't work because .Call + # always sets to 2 (see R-ints), it seems. Work around + # may be possible but not yet working. When the NAMED test works, we can drop allocwarn argument too + # because that's just passed in as FALSE from [<- where we know `*tmp*` isn't really NAMED=2. + # Note also that this growing will happen for missing columns assigned NULL, too. But so rare, we + # don't mind. + } + setalloccol(x, n, verbose=verbose) # always assigns to calling scope; i.e. this scope + if (is.name(name)) { + assign(as.character(name),x,parent.frame(),inherits=TRUE) + } else if (.is_simple_extraction(name)) { + .reassign_extracted_table(name, x) + } # TO DO: else if env$<- or list$<- + } # TODO?: use set() here now that it can add new columns. Then remove newnames and alloc logic above. .Call(Cassign,x,irows,cols,newnames,jval) return(suppPrint(x))