From 41c8f670d08644104d5be7dd4fe599110fbdc47d Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Fri, 11 Jul 2025 13:27:58 -0700 Subject: [PATCH 01/17] refactor cedta rules into helper --- R/cedta.R | 44 ++++++++++++++++++++++++++++++++------------ 1 file changed, 32 insertions(+), 12 deletions(-) diff --git a/R/cedta.R b/R/cedta.R index 83d92ec9dd..2afea32b7b 100644 --- a/R/cedta.R +++ b/R/cedta.R @@ -39,6 +39,36 @@ cedta.pkgEvalsUserCode = c("gWidgetsWWW","statET","FastRWeb","slidify","rmarkdow } # nocov end +# in a helper to promote readability +# NB: put the most common and recommended cases first for speed +.cedta_impl_ <- function(ns, n) { + nsname = getNamespaceName(ns) + if (nsname == "data.table") return(TRUE) + + if ("data.table" %chin% names(getNamespaceImports(ns))) return(TRUE) + + if (nsname == "utils") { + if (exists("debugger.look", parent.frame(n+1L))) return(TRUE) + + # 'example' for #2972 + sc <- sys.calls() + if (length(sc) >= 8L && sc[[length(sc) - 7L]] %iscall% 'example') return(TRUE) + } + + if (nsname == "base" && all(c("FUN", "X") %chin% ls(parent.frame(n)))) return(TRUE) # lapply + + if (nsname %chin% cedta.pkgEvalsUserCode && .any_eval_calls_in_stack()) return(TRUE) + + if (nsname %chin% cedta.override) return(TRUE) + + # As of Sep 2018: RCAS, caretEnsemble, dtplyr, rstanarm, rbokeh, CEMiTool, rqdatatable, RImmPort, BPRMeth, rlist + if (isTRUE(ns$.datatable.aware)) return(TRUE) + + # both ns$.Depends and get(.Depends,ns) are not sufficient + pkg_ns = paste("package", nsname, sep=":") + tryCatch("data.table" %chin% get(".Depends", pkg_ns, inherits=FALSE), error=function(e) FALSE) +} + # cedta = Calling Environment Data.Table-Aware cedta = function(n=2L) { # Calling Environment Data Table Aware @@ -51,20 +81,10 @@ cedta = function(n=2L) { # e.g. DT queries at the prompt (.GlobalEnv) and knitr's eval(,envir=globalenv()) but not DF[...] inside knitr::kable v1.6 return(TRUE) } - nsname = getNamespaceName(ns) - ans = nsname=="data.table" || - "data.table" %chin% names(getNamespaceImports(ns)) || # most common and recommended cases first for speed - (nsname=="utils" && - (exists("debugger.look", parent.frame(n+1L)) || - (length(sc<-sys.calls())>=8L && sc[[length(sc)-7L]] %iscall% 'example')) ) || # 'example' for #2972 - (nsname=="base" && all(c("FUN", "X") %chin% ls(parent.frame(n)))) || # lapply - (nsname %chin% cedta.pkgEvalsUserCode && .any_eval_calls_in_stack()) || - nsname %chin% cedta.override || - isTRUE(ns$.datatable.aware) || # As of Sep 2018: RCAS, caretEnsemble, dtplyr, rstanarm, rbokeh, CEMiTool, rqdatatable, RImmPort, BPRMeth, rlist - tryCatch("data.table" %chin% get(".Depends",paste("package",nsname,sep=":"),inherits=FALSE),error=function(e)FALSE) # both ns$.Depends and get(.Depends,ns) are not sufficient + ans = .cedta_impl_(ns, n) if (!ans && getOption("datatable.verbose")) { # nocov start - catf("cedta decided '%s' wasn't data.table aware. Here is call stack with [[1L]] applied:\n", nsname) + catf("cedta decided '%s' wasn't data.table aware. Here is call stack with [[1L]] applied:\n", getNamespaceName(ns)) print(sapply(sys.calls(), `[[`, 1L)) # nocov end # so we can trace the namespace name that may need to be added (very unusually) From c5e1284c94b26cd272b80c124355181132feaa1a Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Fri, 11 Jul 2025 13:34:03 -0700 Subject: [PATCH 02/17] implement for bug --- R/cedta.R | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/R/cedta.R b/R/cedta.R index 2afea32b7b..e829f054df 100644 --- a/R/cedta.R +++ b/R/cedta.R @@ -28,8 +28,7 @@ cedta.pkgEvalsUserCode = c("gWidgetsWWW","statET","FastRWeb","slidify","rmarkdow # nocov start: very hard to reach this within our test suite -- the call stack within a call generated by e.g. knitr # for loop, not any(vapply_1b(.)), to allow early exit -.any_eval_calls_in_stack = function() { - calls = sys.calls() +.any_eval_calls_in_stack = function(calls) { # likelier to be close to the end of the call stack, right? for (ii in length(calls):1) { # nolint: seq_linter. rev(seq_len(length(calls)))? See https://bugs.r-project.org/show_bug.cgi?id=18406. the_call = calls[[ii]][[1L]] @@ -39,6 +38,17 @@ cedta.pkgEvalsUserCode = c("gWidgetsWWW","statET","FastRWeb","slidify","rmarkdow } # nocov end +.any_sd_queries_in_stack = function(calls) { + for (ii in length(calls):1) { # nolint: seq_linter. As above. + the_call = calls[[ii]][1L] + if (!is.name(the_call) || the_call != "[") next + the_lhs = calls[[ii]][[2L]] + if (!is.name(the_lhs) || the_lhs != ".SD") next + return(TRUE) + } + FALSE +} + # in a helper to promote readability # NB: put the most common and recommended cases first for speed .cedta_impl_ <- function(ns, n) { @@ -47,17 +57,21 @@ cedta.pkgEvalsUserCode = c("gWidgetsWWW","statET","FastRWeb","slidify","rmarkdow if ("data.table" %chin% names(getNamespaceImports(ns))) return(TRUE) + sc <- sys.calls() if (nsname == "utils") { if (exists("debugger.look", parent.frame(n+1L))) return(TRUE) # 'example' for #2972 - sc <- sys.calls() if (length(sc) >= 8L && sc[[length(sc) - 7L]] %iscall% 'example') return(TRUE) } - if (nsname == "base" && all(c("FUN", "X") %chin% ls(parent.frame(n)))) return(TRUE) # lapply + if (nsname == "base") { + if (all(c("FUN", "X") %chin% ls(parent.frame(n)))) return(TRUE) # lapply + + if (.any_sd_queries_in_stack(sc)) return(TRUE) # e.g. lapply() where "piped-in" j= arg has .SD[] + } - if (nsname %chin% cedta.pkgEvalsUserCode && .any_eval_calls_in_stack()) return(TRUE) + if (nsname %chin% cedta.pkgEvalsUserCode && .any_eval_calls_in_stack(sc)) return(TRUE) if (nsname %chin% cedta.override) return(TRUE) From d82923ef6d84cbb69ace752e287c73ffc20cc302 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Fri, 11 Jul 2025 13:37:28 -0700 Subject: [PATCH 03/17] test, NEWS --- NEWS.md | 2 ++ inst/tests/tests.Rraw | 3 +++ 2 files changed, 5 insertions(+) diff --git a/NEWS.md b/NEWS.md index 9421f96400..04a409c8c7 100644 --- a/NEWS.md +++ b/NEWS.md @@ -72,6 +72,8 @@ 12. `print(..., col.names = 'none')` now correctly adapts column widths to the data content, ignoring the original column names and producing a more compact output, [#6882](https://github.com/Rdatatable/data.table/issues/6882). Thanks to @brooksambrose for the report and @venom1204 for the PR. +13. Reference to `.SD` in `...` arguments to `lapply()`, e.g. ``lapply(list_of_tables, `[`, j=.SD[1L])`` is evaluated correctly, [#2982](https://github.com/Rdatatable/data.table/issues/2982). Thanks @franknarf1 for the report and @MichaelChirico for the fix. + ### NOTES 1. The following in-progress deprecations have proceeded: diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index aceeb77f89..4e09bc8df6 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -21426,3 +21426,6 @@ test(2330.7, as.data.table(list(z), keep.rownames=TRUE), data.table(rn=rep("", 3 M <- matrix(1:6, nrow=3, dimnames=list(rep("", 3), c("V1", "V2"))) # test of list(M) for empty-rowname'd matrix input test(2330.8, as.data.table(list(M), keep.rownames=TRUE), data.table(rn=rep("", 3), V1=1:3, V2=4:6)) + +# .SD reference in '...' passed to lapply(FUN=) is recognized as data.table +test(2331, lapply(list(data.table(a=1:2)), `[`, j=.SD[1L]), list(data.table(a=1L))) From 885dbb1ef5c68ef06e752d3158f37e587f07c152 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Fri, 11 Jul 2025 13:40:04 -0700 Subject: [PATCH 04/17] About 100 packages define this now, no sense keeping track --- R/cedta.R | 1 - 1 file changed, 1 deletion(-) diff --git a/R/cedta.R b/R/cedta.R index e829f054df..7e29b81522 100644 --- a/R/cedta.R +++ b/R/cedta.R @@ -75,7 +75,6 @@ cedta.pkgEvalsUserCode = c("gWidgetsWWW","statET","FastRWeb","slidify","rmarkdow if (nsname %chin% cedta.override) return(TRUE) - # As of Sep 2018: RCAS, caretEnsemble, dtplyr, rstanarm, rbokeh, CEMiTool, rqdatatable, RImmPort, BPRMeth, rlist if (isTRUE(ns$.datatable.aware)) return(TRUE) # both ns$.Depends and get(.Depends,ns) are not sufficient From 1643f36f37977a98403fa2369e3314c338ccd263 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Fri, 11 Jul 2025 13:40:35 -0700 Subject: [PATCH 05/17] trailing ws --- R/cedta.R | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/R/cedta.R b/R/cedta.R index 7e29b81522..0089ed6b8c 100644 --- a/R/cedta.R +++ b/R/cedta.R @@ -54,9 +54,9 @@ cedta.pkgEvalsUserCode = c("gWidgetsWWW","statET","FastRWeb","slidify","rmarkdow .cedta_impl_ <- function(ns, n) { nsname = getNamespaceName(ns) if (nsname == "data.table") return(TRUE) - + if ("data.table" %chin% names(getNamespaceImports(ns))) return(TRUE) - + sc <- sys.calls() if (nsname == "utils") { if (exists("debugger.look", parent.frame(n+1L))) return(TRUE) @@ -72,11 +72,11 @@ cedta.pkgEvalsUserCode = c("gWidgetsWWW","statET","FastRWeb","slidify","rmarkdow } if (nsname %chin% cedta.pkgEvalsUserCode && .any_eval_calls_in_stack(sc)) return(TRUE) - + if (nsname %chin% cedta.override) return(TRUE) - + if (isTRUE(ns$.datatable.aware)) return(TRUE) - + # both ns$.Depends and get(.Depends,ns) are not sufficient pkg_ns = paste("package", nsname, sep=":") tryCatch("data.table" %chin% get(".Depends", pkg_ns, inherits=FALSE), error=function(e) FALSE) From c43cee846201b7b30983cbb934f307778d1c62f5 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Fri, 11 Jul 2025 13:41:59 -0700 Subject: [PATCH 06/17] rearrange: .datatable.aware to the top --- R/cedta.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/cedta.R b/R/cedta.R index 0089ed6b8c..0271d6266a 100644 --- a/R/cedta.R +++ b/R/cedta.R @@ -57,6 +57,8 @@ cedta.pkgEvalsUserCode = c("gWidgetsWWW","statET","FastRWeb","slidify","rmarkdow if ("data.table" %chin% names(getNamespaceImports(ns))) return(TRUE) + if (isTRUE(ns$.datatable.aware)) return(TRUE) + sc <- sys.calls() if (nsname == "utils") { if (exists("debugger.look", parent.frame(n+1L))) return(TRUE) @@ -75,8 +77,6 @@ cedta.pkgEvalsUserCode = c("gWidgetsWWW","statET","FastRWeb","slidify","rmarkdow if (nsname %chin% cedta.override) return(TRUE) - if (isTRUE(ns$.datatable.aware)) return(TRUE) - # both ns$.Depends and get(.Depends,ns) are not sufficient pkg_ns = paste("package", nsname, sep=":") tryCatch("data.table" %chin% get(".Depends", pkg_ns, inherits=FALSE), error=function(e) FALSE) From f7be16543dac3ecc5d694ce3e38648a1da50313c Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Fri, 11 Jul 2025 21:47:53 +0000 Subject: [PATCH 07/17] push isNamespace check into helper --- R/cedta.R | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/R/cedta.R b/R/cedta.R index 0271d6266a..a6b8e49fae 100644 --- a/R/cedta.R +++ b/R/cedta.R @@ -52,6 +52,11 @@ cedta.pkgEvalsUserCode = c("gWidgetsWWW","statET","FastRWeb","slidify","rmarkdow # in a helper to promote readability # NB: put the most common and recommended cases first for speed .cedta_impl_ <- function(ns, n) { + if (!isNamespace(ns)) { + # e.g. DT queries at the prompt (.GlobalEnv) and knitr's eval(,envir=globalenv()) but not DF[...] inside knitr::kable v1.6 + return(TRUE) + } + nsname = getNamespaceName(ns) if (nsname == "data.table") return(TRUE) @@ -90,10 +95,6 @@ cedta = function(n=2L) { return(env$.datatable.aware) } ns = topenv(env) - if (!isNamespace(ns)) { - # e.g. DT queries at the prompt (.GlobalEnv) and knitr's eval(,envir=globalenv()) but not DF[...] inside knitr::kable v1.6 - return(TRUE) - } ans = .cedta_impl_(ns, n) if (!ans && getOption("datatable.verbose")) { # nocov start From 5462a20bf046838f8b95e229852fece97cee4fe9 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Fri, 11 Jul 2025 21:59:36 +0000 Subject: [PATCH 08/17] Fix: look up another level, use %iscall% --- R/cedta.R | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/R/cedta.R b/R/cedta.R index a6b8e49fae..b877860148 100644 --- a/R/cedta.R +++ b/R/cedta.R @@ -31,8 +31,7 @@ cedta.pkgEvalsUserCode = c("gWidgetsWWW","statET","FastRWeb","slidify","rmarkdow .any_eval_calls_in_stack = function(calls) { # likelier to be close to the end of the call stack, right? for (ii in length(calls):1) { # nolint: seq_linter. rev(seq_len(length(calls)))? See https://bugs.r-project.org/show_bug.cgi?id=18406. - the_call = calls[[ii]][[1L]] - if (is.name(the_call) && (the_call %chin% c("eval", "evalq"))) return(TRUE) + if (calls[[ii]] %iscall% c("eval", "evalq")) return(TRUE) } FALSE } @@ -40,8 +39,7 @@ cedta.pkgEvalsUserCode = c("gWidgetsWWW","statET","FastRWeb","slidify","rmarkdow .any_sd_queries_in_stack = function(calls) { for (ii in length(calls):1) { # nolint: seq_linter. As above. - the_call = calls[[ii]][1L] - if (!is.name(the_call) || the_call != "[") next + if (!calls[[ii]] %iscall% "[") next the_lhs = calls[[ii]][[2L]] if (!is.name(the_lhs) || the_lhs != ".SD") next return(TRUE) @@ -95,7 +93,7 @@ cedta = function(n=2L) { return(env$.datatable.aware) } ns = topenv(env) - ans = .cedta_impl_(ns, n) + ans = .cedta_impl_(ns, n+1L) if (!ans && getOption("datatable.verbose")) { # nocov start catf("cedta decided '%s' wasn't data.table aware. Here is call stack with [[1L]] applied:\n", getNamespaceName(ns)) From ea59923b2d8af2f4414f9b94dff56952510c1871 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Sat, 12 Jul 2025 14:32:24 -0700 Subject: [PATCH 09/17] Revert "Fix: look up another level, use %iscall%" This reverts commit 5462a20bf046838f8b95e229852fece97cee4fe9. --- R/cedta.R | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/R/cedta.R b/R/cedta.R index b877860148..a6b8e49fae 100644 --- a/R/cedta.R +++ b/R/cedta.R @@ -31,7 +31,8 @@ cedta.pkgEvalsUserCode = c("gWidgetsWWW","statET","FastRWeb","slidify","rmarkdow .any_eval_calls_in_stack = function(calls) { # likelier to be close to the end of the call stack, right? for (ii in length(calls):1) { # nolint: seq_linter. rev(seq_len(length(calls)))? See https://bugs.r-project.org/show_bug.cgi?id=18406. - if (calls[[ii]] %iscall% c("eval", "evalq")) return(TRUE) + the_call = calls[[ii]][[1L]] + if (is.name(the_call) && (the_call %chin% c("eval", "evalq"))) return(TRUE) } FALSE } @@ -39,7 +40,8 @@ cedta.pkgEvalsUserCode = c("gWidgetsWWW","statET","FastRWeb","slidify","rmarkdow .any_sd_queries_in_stack = function(calls) { for (ii in length(calls):1) { # nolint: seq_linter. As above. - if (!calls[[ii]] %iscall% "[") next + the_call = calls[[ii]][1L] + if (!is.name(the_call) || the_call != "[") next the_lhs = calls[[ii]][[2L]] if (!is.name(the_lhs) || the_lhs != ".SD") next return(TRUE) @@ -93,7 +95,7 @@ cedta = function(n=2L) { return(env$.datatable.aware) } ns = topenv(env) - ans = .cedta_impl_(ns, n+1L) + ans = .cedta_impl_(ns, n) if (!ans && getOption("datatable.verbose")) { # nocov start catf("cedta decided '%s' wasn't data.table aware. Here is call stack with [[1L]] applied:\n", getNamespaceName(ns)) From 2995b17de9a907ef3f4ca804bf4322b82429bc32 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Sat, 12 Jul 2025 14:32:33 -0700 Subject: [PATCH 10/17] Revert "push isNamespace check into helper" This reverts commit f7be16543dac3ecc5d694ce3e38648a1da50313c. --- R/cedta.R | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/R/cedta.R b/R/cedta.R index a6b8e49fae..0271d6266a 100644 --- a/R/cedta.R +++ b/R/cedta.R @@ -52,11 +52,6 @@ cedta.pkgEvalsUserCode = c("gWidgetsWWW","statET","FastRWeb","slidify","rmarkdow # in a helper to promote readability # NB: put the most common and recommended cases first for speed .cedta_impl_ <- function(ns, n) { - if (!isNamespace(ns)) { - # e.g. DT queries at the prompt (.GlobalEnv) and knitr's eval(,envir=globalenv()) but not DF[...] inside knitr::kable v1.6 - return(TRUE) - } - nsname = getNamespaceName(ns) if (nsname == "data.table") return(TRUE) @@ -95,6 +90,10 @@ cedta = function(n=2L) { return(env$.datatable.aware) } ns = topenv(env) + if (!isNamespace(ns)) { + # e.g. DT queries at the prompt (.GlobalEnv) and knitr's eval(,envir=globalenv()) but not DF[...] inside knitr::kable v1.6 + return(TRUE) + } ans = .cedta_impl_(ns, n) if (!ans && getOption("datatable.verbose")) { # nocov start From 4b91ab6f1b672de4cef9ee0b7fa57d640b6b5b8d Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Sat, 12 Jul 2025 14:32:43 -0700 Subject: [PATCH 11/17] Revert "rearrange: .datatable.aware to the top" This reverts commit c43cee846201b7b30983cbb934f307778d1c62f5. --- R/cedta.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/cedta.R b/R/cedta.R index 0271d6266a..0089ed6b8c 100644 --- a/R/cedta.R +++ b/R/cedta.R @@ -57,8 +57,6 @@ cedta.pkgEvalsUserCode = c("gWidgetsWWW","statET","FastRWeb","slidify","rmarkdow if ("data.table" %chin% names(getNamespaceImports(ns))) return(TRUE) - if (isTRUE(ns$.datatable.aware)) return(TRUE) - sc <- sys.calls() if (nsname == "utils") { if (exists("debugger.look", parent.frame(n+1L))) return(TRUE) @@ -77,6 +75,8 @@ cedta.pkgEvalsUserCode = c("gWidgetsWWW","statET","FastRWeb","slidify","rmarkdow if (nsname %chin% cedta.override) return(TRUE) + if (isTRUE(ns$.datatable.aware)) return(TRUE) + # both ns$.Depends and get(.Depends,ns) are not sufficient pkg_ns = paste("package", nsname, sep=":") tryCatch("data.table" %chin% get(".Depends", pkg_ns, inherits=FALSE), error=function(e) FALSE) From ba4403b13de098660956cbe3e62970194b667339 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Sat, 12 Jul 2025 14:32:59 -0700 Subject: [PATCH 12/17] Revert "trailing ws" This reverts commit 1643f36f37977a98403fa2369e3314c338ccd263. --- R/cedta.R | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/R/cedta.R b/R/cedta.R index 0089ed6b8c..7e29b81522 100644 --- a/R/cedta.R +++ b/R/cedta.R @@ -54,9 +54,9 @@ cedta.pkgEvalsUserCode = c("gWidgetsWWW","statET","FastRWeb","slidify","rmarkdow .cedta_impl_ <- function(ns, n) { nsname = getNamespaceName(ns) if (nsname == "data.table") return(TRUE) - + if ("data.table" %chin% names(getNamespaceImports(ns))) return(TRUE) - + sc <- sys.calls() if (nsname == "utils") { if (exists("debugger.look", parent.frame(n+1L))) return(TRUE) @@ -72,11 +72,11 @@ cedta.pkgEvalsUserCode = c("gWidgetsWWW","statET","FastRWeb","slidify","rmarkdow } if (nsname %chin% cedta.pkgEvalsUserCode && .any_eval_calls_in_stack(sc)) return(TRUE) - + if (nsname %chin% cedta.override) return(TRUE) - + if (isTRUE(ns$.datatable.aware)) return(TRUE) - + # both ns$.Depends and get(.Depends,ns) are not sufficient pkg_ns = paste("package", nsname, sep=":") tryCatch("data.table" %chin% get(".Depends", pkg_ns, inherits=FALSE), error=function(e) FALSE) From b1ea31e4d0986d9f4b9a57effda9f0bb064ae656 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Sat, 12 Jul 2025 14:33:10 -0700 Subject: [PATCH 13/17] Revert "About 100 packages define this now, no sense keeping track" This reverts commit 885dbb1ef5c68ef06e752d3158f37e587f07c152. --- R/cedta.R | 1 + 1 file changed, 1 insertion(+) diff --git a/R/cedta.R b/R/cedta.R index 7e29b81522..e829f054df 100644 --- a/R/cedta.R +++ b/R/cedta.R @@ -75,6 +75,7 @@ cedta.pkgEvalsUserCode = c("gWidgetsWWW","statET","FastRWeb","slidify","rmarkdow if (nsname %chin% cedta.override) return(TRUE) + # As of Sep 2018: RCAS, caretEnsemble, dtplyr, rstanarm, rbokeh, CEMiTool, rqdatatable, RImmPort, BPRMeth, rlist if (isTRUE(ns$.datatable.aware)) return(TRUE) # both ns$.Depends and get(.Depends,ns) are not sufficient From f6996b23d6916728ec4cb1497402b519869416eb Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Sat, 12 Jul 2025 14:33:17 -0700 Subject: [PATCH 14/17] Revert "test, NEWS" This reverts commit d82923ef6d84cbb69ace752e287c73ffc20cc302. --- NEWS.md | 2 -- inst/tests/tests.Rraw | 3 --- 2 files changed, 5 deletions(-) diff --git a/NEWS.md b/NEWS.md index 04a409c8c7..9421f96400 100644 --- a/NEWS.md +++ b/NEWS.md @@ -72,8 +72,6 @@ 12. `print(..., col.names = 'none')` now correctly adapts column widths to the data content, ignoring the original column names and producing a more compact output, [#6882](https://github.com/Rdatatable/data.table/issues/6882). Thanks to @brooksambrose for the report and @venom1204 for the PR. -13. Reference to `.SD` in `...` arguments to `lapply()`, e.g. ``lapply(list_of_tables, `[`, j=.SD[1L])`` is evaluated correctly, [#2982](https://github.com/Rdatatable/data.table/issues/2982). Thanks @franknarf1 for the report and @MichaelChirico for the fix. - ### NOTES 1. The following in-progress deprecations have proceeded: diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 4e09bc8df6..aceeb77f89 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -21426,6 +21426,3 @@ test(2330.7, as.data.table(list(z), keep.rownames=TRUE), data.table(rn=rep("", 3 M <- matrix(1:6, nrow=3, dimnames=list(rep("", 3), c("V1", "V2"))) # test of list(M) for empty-rowname'd matrix input test(2330.8, as.data.table(list(M), keep.rownames=TRUE), data.table(rn=rep("", 3), V1=1:3, V2=4:6)) - -# .SD reference in '...' passed to lapply(FUN=) is recognized as data.table -test(2331, lapply(list(data.table(a=1:2)), `[`, j=.SD[1L]), list(data.table(a=1L))) From 2d5d9ca64c5c76a6ad7fa574b4a4a7dee3097c9e Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Sat, 12 Jul 2025 14:33:28 -0700 Subject: [PATCH 15/17] Revert "implement for bug" This reverts commit c5e1284c94b26cd272b80c124355181132feaa1a. --- R/cedta.R | 24 +++++------------------- 1 file changed, 5 insertions(+), 19 deletions(-) diff --git a/R/cedta.R b/R/cedta.R index e829f054df..2afea32b7b 100644 --- a/R/cedta.R +++ b/R/cedta.R @@ -28,7 +28,8 @@ cedta.pkgEvalsUserCode = c("gWidgetsWWW","statET","FastRWeb","slidify","rmarkdow # nocov start: very hard to reach this within our test suite -- the call stack within a call generated by e.g. knitr # for loop, not any(vapply_1b(.)), to allow early exit -.any_eval_calls_in_stack = function(calls) { +.any_eval_calls_in_stack = function() { + calls = sys.calls() # likelier to be close to the end of the call stack, right? for (ii in length(calls):1) { # nolint: seq_linter. rev(seq_len(length(calls)))? See https://bugs.r-project.org/show_bug.cgi?id=18406. the_call = calls[[ii]][[1L]] @@ -38,17 +39,6 @@ cedta.pkgEvalsUserCode = c("gWidgetsWWW","statET","FastRWeb","slidify","rmarkdow } # nocov end -.any_sd_queries_in_stack = function(calls) { - for (ii in length(calls):1) { # nolint: seq_linter. As above. - the_call = calls[[ii]][1L] - if (!is.name(the_call) || the_call != "[") next - the_lhs = calls[[ii]][[2L]] - if (!is.name(the_lhs) || the_lhs != ".SD") next - return(TRUE) - } - FALSE -} - # in a helper to promote readability # NB: put the most common and recommended cases first for speed .cedta_impl_ <- function(ns, n) { @@ -57,21 +47,17 @@ cedta.pkgEvalsUserCode = c("gWidgetsWWW","statET","FastRWeb","slidify","rmarkdow if ("data.table" %chin% names(getNamespaceImports(ns))) return(TRUE) - sc <- sys.calls() if (nsname == "utils") { if (exists("debugger.look", parent.frame(n+1L))) return(TRUE) # 'example' for #2972 + sc <- sys.calls() if (length(sc) >= 8L && sc[[length(sc) - 7L]] %iscall% 'example') return(TRUE) } - if (nsname == "base") { - if (all(c("FUN", "X") %chin% ls(parent.frame(n)))) return(TRUE) # lapply - - if (.any_sd_queries_in_stack(sc)) return(TRUE) # e.g. lapply() where "piped-in" j= arg has .SD[] - } + if (nsname == "base" && all(c("FUN", "X") %chin% ls(parent.frame(n)))) return(TRUE) # lapply - if (nsname %chin% cedta.pkgEvalsUserCode && .any_eval_calls_in_stack(sc)) return(TRUE) + if (nsname %chin% cedta.pkgEvalsUserCode && .any_eval_calls_in_stack()) return(TRUE) if (nsname %chin% cedta.override) return(TRUE) From 606c4fab3c8d162764aa793f086e8568fa81c2d8 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Sat, 12 Jul 2025 14:33:34 -0700 Subject: [PATCH 16/17] Revert "refactor cedta rules into helper" This reverts commit 41c8f670d08644104d5be7dd4fe599110fbdc47d. --- R/cedta.R | 44 ++++++++++++-------------------------------- 1 file changed, 12 insertions(+), 32 deletions(-) diff --git a/R/cedta.R b/R/cedta.R index 2afea32b7b..83d92ec9dd 100644 --- a/R/cedta.R +++ b/R/cedta.R @@ -39,36 +39,6 @@ cedta.pkgEvalsUserCode = c("gWidgetsWWW","statET","FastRWeb","slidify","rmarkdow } # nocov end -# in a helper to promote readability -# NB: put the most common and recommended cases first for speed -.cedta_impl_ <- function(ns, n) { - nsname = getNamespaceName(ns) - if (nsname == "data.table") return(TRUE) - - if ("data.table" %chin% names(getNamespaceImports(ns))) return(TRUE) - - if (nsname == "utils") { - if (exists("debugger.look", parent.frame(n+1L))) return(TRUE) - - # 'example' for #2972 - sc <- sys.calls() - if (length(sc) >= 8L && sc[[length(sc) - 7L]] %iscall% 'example') return(TRUE) - } - - if (nsname == "base" && all(c("FUN", "X") %chin% ls(parent.frame(n)))) return(TRUE) # lapply - - if (nsname %chin% cedta.pkgEvalsUserCode && .any_eval_calls_in_stack()) return(TRUE) - - if (nsname %chin% cedta.override) return(TRUE) - - # As of Sep 2018: RCAS, caretEnsemble, dtplyr, rstanarm, rbokeh, CEMiTool, rqdatatable, RImmPort, BPRMeth, rlist - if (isTRUE(ns$.datatable.aware)) return(TRUE) - - # both ns$.Depends and get(.Depends,ns) are not sufficient - pkg_ns = paste("package", nsname, sep=":") - tryCatch("data.table" %chin% get(".Depends", pkg_ns, inherits=FALSE), error=function(e) FALSE) -} - # cedta = Calling Environment Data.Table-Aware cedta = function(n=2L) { # Calling Environment Data Table Aware @@ -81,10 +51,20 @@ cedta = function(n=2L) { # e.g. DT queries at the prompt (.GlobalEnv) and knitr's eval(,envir=globalenv()) but not DF[...] inside knitr::kable v1.6 return(TRUE) } - ans = .cedta_impl_(ns, n) + nsname = getNamespaceName(ns) + ans = nsname=="data.table" || + "data.table" %chin% names(getNamespaceImports(ns)) || # most common and recommended cases first for speed + (nsname=="utils" && + (exists("debugger.look", parent.frame(n+1L)) || + (length(sc<-sys.calls())>=8L && sc[[length(sc)-7L]] %iscall% 'example')) ) || # 'example' for #2972 + (nsname=="base" && all(c("FUN", "X") %chin% ls(parent.frame(n)))) || # lapply + (nsname %chin% cedta.pkgEvalsUserCode && .any_eval_calls_in_stack()) || + nsname %chin% cedta.override || + isTRUE(ns$.datatable.aware) || # As of Sep 2018: RCAS, caretEnsemble, dtplyr, rstanarm, rbokeh, CEMiTool, rqdatatable, RImmPort, BPRMeth, rlist + tryCatch("data.table" %chin% get(".Depends",paste("package",nsname,sep=":"),inherits=FALSE),error=function(e)FALSE) # both ns$.Depends and get(.Depends,ns) are not sufficient if (!ans && getOption("datatable.verbose")) { # nocov start - catf("cedta decided '%s' wasn't data.table aware. Here is call stack with [[1L]] applied:\n", getNamespaceName(ns)) + catf("cedta decided '%s' wasn't data.table aware. Here is call stack with [[1L]] applied:\n", nsname) print(sapply(sys.calls(), `[[`, 1L)) # nocov end # so we can trace the namespace name that may need to be added (very unusually) From ec65a539f861baf5df013d33e211dfff22f8a9dd Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Sat, 12 Jul 2025 15:26:29 -0700 Subject: [PATCH 17/17] fix j=.SD passed to lapply '...' --- NEWS.md | 2 ++ R/cedta.R | 17 +++++++++++++++-- inst/tests/tests.Rraw | 3 +++ 3 files changed, 20 insertions(+), 2 deletions(-) diff --git a/NEWS.md b/NEWS.md index 9421f96400..04a409c8c7 100644 --- a/NEWS.md +++ b/NEWS.md @@ -72,6 +72,8 @@ 12. `print(..., col.names = 'none')` now correctly adapts column widths to the data content, ignoring the original column names and producing a more compact output, [#6882](https://github.com/Rdatatable/data.table/issues/6882). Thanks to @brooksambrose for the report and @venom1204 for the PR. +13. Reference to `.SD` in `...` arguments to `lapply()`, e.g. ``lapply(list_of_tables, `[`, j=.SD[1L])`` is evaluated correctly, [#2982](https://github.com/Rdatatable/data.table/issues/2982). Thanks @franknarf1 for the report and @MichaelChirico for the fix. + ### NOTES 1. The following in-progress deprecations have proceeded: diff --git a/R/cedta.R b/R/cedta.R index 83d92ec9dd..5722b5438c 100644 --- a/R/cedta.R +++ b/R/cedta.R @@ -39,6 +39,16 @@ cedta.pkgEvalsUserCode = c("gWidgetsWWW","statET","FastRWeb","slidify","rmarkdow } # nocov end +.any_sd_queries_in_stack = function(calls) { + for (ii in length(calls):1) { # nolint: seq_linter. As above. + if (!calls[[ii]] %iscall% "[") next + the_lhs = calls[[ii]][[2L]] + if (!is.name(the_lhs) || the_lhs != ".SD") next + return(TRUE) + } + FALSE +} + # cedta = Calling Environment Data.Table-Aware cedta = function(n=2L) { # Calling Environment Data Table Aware @@ -52,12 +62,15 @@ cedta = function(n=2L) { return(TRUE) } nsname = getNamespaceName(ns) + sc = sys.calls() ans = nsname=="data.table" || "data.table" %chin% names(getNamespaceImports(ns)) || # most common and recommended cases first for speed (nsname=="utils" && (exists("debugger.look", parent.frame(n+1L)) || - (length(sc<-sys.calls())>=8L && sc[[length(sc)-7L]] %iscall% 'example')) ) || # 'example' for #2972 - (nsname=="base" && all(c("FUN", "X") %chin% ls(parent.frame(n)))) || # lapply + (length(sc)>=8L && sc[[length(sc)-7L]] %iscall% 'example')) ) || # 'example' for #2972 + (nsname=="base" && # lapply + (all(c("FUN", "X") %chin% ls(parent.frame(n))) || + .any_sd_queries_in_stack(sc))) || (nsname %chin% cedta.pkgEvalsUserCode && .any_eval_calls_in_stack()) || nsname %chin% cedta.override || isTRUE(ns$.datatable.aware) || # As of Sep 2018: RCAS, caretEnsemble, dtplyr, rstanarm, rbokeh, CEMiTool, rqdatatable, RImmPort, BPRMeth, rlist diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index aceeb77f89..4e09bc8df6 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -21426,3 +21426,6 @@ test(2330.7, as.data.table(list(z), keep.rownames=TRUE), data.table(rn=rep("", 3 M <- matrix(1:6, nrow=3, dimnames=list(rep("", 3), c("V1", "V2"))) # test of list(M) for empty-rowname'd matrix input test(2330.8, as.data.table(list(M), keep.rownames=TRUE), data.table(rn=rep("", 3), V1=1:3, V2=4:6)) + +# .SD reference in '...' passed to lapply(FUN=) is recognized as data.table +test(2331, lapply(list(data.table(a=1:2)), `[`, j=.SD[1L]), list(data.table(a=1L)))