Rdatatable · venom1204 · Mar 20, 2026 · Mar 20, 2026 · Mar 20, 2026
@@ -30,6 +30,8 @@
 
 5. `tables()` can now optionally report `data.table` objects stored one level deep inside list objects when `depth=1L`, [#2606](https://github.com/Rdatatable/data.table/issues/2606). Thanks @MichaelChirico for the report and @manmita for the PR
 
+6. `setnames()` now supports a global option `datatable.unique.names` to control the creation of duplicate column names. Users can choose between `"off"` (default), `"warn"`, `"error"`, or `"rename"`. This addresses long-standing ambiguity issues when duplicate names were created silently, [#4044](https://github.com/Rdatatable/data.table/issues/4044). Thanks to @venom1204 for the PR.
+
 ### BUG FIXES
 
 1. `fread()` with `skip=0` and `(header=TRUE|FALSE)` no longer skips the first row when it has fewer fields than subsequent rows, [#7463](https://github.com/Rdatatable/data.table/issues/7463). Thanks @emayerhofer for the report and @ben-schwen for the fix.

@@ -2943,6 +2943,12 @@ setnames = function(x,old,new,skip_absent=FALSE) {
     if (!length(new)) return(invisible(x)) # no changes
     if (length(i) != length(new)) internal_error("length(i)!=length(new)") # nocov
   }
+
+  full_names = names(x)
+  full_names[i] = new
+  full_names = process_name_policy(full_names)
+  new = full_names[i]
+
   # update the key if the column name being change is in the key
   m = chmatch(names(x)[i], key(x))
   w = which(!is.na(m))

@@ -98,7 +98,8 @@
     datatable.auto.index=TRUE,          # DT[col=="val"] to auto add index so 2nd time faster
     datatable.use.index=TRUE,           # global switch to address #1422
     datatable.prettyprint.char=NULL,    # FR #1091
-    datatable.old.matrix.autoname=FALSE # #7145: how data.table(x=1, matrix(1)) is auto-named set to change
+    datatable.old.matrix.autoname=FALSE, # #7145: how data.table(x=1, matrix(1)) is auto-named set to change
+    datatable.unique.names = NULL
   )
   opts = opts[!names(opts) %chin% names(options())]
   options(opts)

@@ -35,6 +35,30 @@ check_duplicate_names = function(x, table_name=deparse(substitute(x))) {
         table_name, brackify(duplicate_names), domain=NA)
 }
 
+process_name_policy = function(names_vec) {
+  policy = getOption("datatable.unique.names")
+  if (is.null(policy) || policy == "off") return(names_vec)
+
+  allowed = c("warn", "error", "rename")
+  if (!policy %in% allowed) {
+    warningf("Invalid value for 'datatable.unique.names': [%s]. Falling back to 'off'. Allowed values are: 'off', 'warn', 'error', 'rename'.", as.character(policy))
+    return(names_vec)
+  }
+
+  if (anyDuplicated(names_vec)) {
+    dups = unique(names_vec[duplicated(names_vec)])
+    # Use paste0 to avoid sprintf issues with column names containing '%'
+    msg = paste0("Duplicate column names created: ", brackify(dups), ". This may cause ambiguity.")
+
+    switch(policy,
+      warn = warningf(msg),
+      error = stopf(msg),
+      rename = return(make.unique(names_vec))
+    )
+  }
+  names_vec
+}
+
 duplicated_values = function(x) {
   # fast anyDuplicated for the typical/non-error case; second duplicated() pass for (usually) error case
   if (!anyDuplicated(x)) return(vector(typeof(x)))

@@ -21577,3 +21577,23 @@ close(con)
 file.create(f <- tempfile())
 test(2367.6, fread(file(f)), data.table(), warning="Connection has size 0.")
 unlink(f)
+
+#4044
+DT = as.data.table(iris)
+test(2368.1, names(setnames(copy(DT), "Petal.Length", "Sepal.Length")), 
+     c("Sepal.Length", "Sepal.Width", "Sepal.Length", "Petal.Width", "Species"), 
+     options = list(datatable.unique.names = "off"))
+test(2368.2, names(setnames(copy(DT), "Petal.Length", "Sepal.Length")), 
+     c("Sepal.Length", "Sepal.Width", "Sepal.Length", "Petal.Width", "Species"), 
+     warning = "Duplicate column names created", 
+     options = list(datatable.unique.names = "warn"))
+test(2368.3, setnames(copy(DT), "Petal.Length", "Sepal.Length"), 
+     error = "Duplicate column names created", 
+     options = list(datatable.unique.names = "error"))
+test(2368.4, names(setnames(copy(DT), "Petal.Length", "Sepal.Length")), 
+     c("Sepal.Length", "Sepal.Width", "Sepal.Length.1", "Petal.Width", "Species"), 
+     options = list(datatable.unique.names = "rename"))
+test(2368.5, names(setnames(copy(DT), "Petal.Length", "Sepal.Length")), 
+     c("Sepal.Length", "Sepal.Width", "Sepal.Length", "Petal.Width", "Species"), 
+     warning = "Invalid value for 'datatable.unique.names'", 
+     options = list(datatable.unique.names = "invalid_option_name"))
@@ -105,6 +105,9 @@
     \item{\code{datatable.enlist}}{Experimental feature. Default is \code{NULL}. If set to a function
       (e.g., \code{list}), the \code{j} expression can return a \code{list}, which will then
       be "enlisted" into columns in the result.}
+    \item{\code{datatable.unique.names}}{A character string, default \code{NULL} (same as \code{"off"}). 
+      Controls the behavior when operations (\bold{currently only \code{setnames}}) 
+      would result in duplicate column names.}
   }
 }