Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -354,6 +354,8 @@ See [#2611](https://github.com/Rdatatable/data.table/issues/2611) for details. T

27. `dogroups()` no longer reads beyond the resized end of over-allocated data.table list columns, [#7486](https://github.com/Rdatatable/data.table/issues/7486). While this didn't crash in practice, it is now explicitly checked for in recent R versions (r89198+). Thanks @TimTaylor and @aitap for the report and @aitap for the fix.

28. `fread()` with `col.names=` now correctly applies `drop=` and `select=` using user-provided column names instead of auto-generated names, [#3459](https://github.com/Rdatatable/data.table/issues/3459). Thanks to @malcook for the report and @ben-schwen for the fix.

### NOTES

1. The following in-progress deprecations have proceeded:
Expand Down
6 changes: 2 additions & 4 deletions R/fread.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ fread = function(
input="", file=NULL, text=NULL, cmd=NULL, sep="auto", sep2="auto", dec="auto", quote="\"", nrows=Inf, header="auto",
na.strings=getOption("datatable.na.strings","NA"), stringsAsFactors=FALSE, verbose=getOption("datatable.verbose",FALSE),
skip="__auto__", select=NULL, drop=NULL, colClasses=NULL, integer64=getOption("datatable.integer64","integer64"),
col.names, check.names=FALSE, encoding="unknown", strip.white=TRUE, fill=FALSE, blank.lines.skip=FALSE, comment.char="", key=NULL, index=NULL,
col.names=NULL, check.names=FALSE, encoding="unknown", strip.white=TRUE, fill=FALSE, blank.lines.skip=FALSE, comment.char="", key=NULL, index=NULL,
showProgress=getOption("datatable.showProgress",interactive()), data.table=getOption("datatable.fread.datatable",TRUE),
nThread=getDTthreads(verbose), logical01=getOption("datatable.logical01",FALSE),
logicalYN=getOption("datatable.logicalYN", FALSE),
Expand Down Expand Up @@ -293,7 +293,7 @@ yaml=FALSE, tmpdir=tempdir(), tz="UTC")
tz="UTC"
}
ans = .Call(CfreadR,input,identical(input,file),sep,dec,quote,header,nrows,skip,na.strings,strip.white,blank.lines.skip,comment.char,
fill,showProgress,nThread,verbose,warnings2errors,logical01,logicalYN,select,drop,colClasses,integer64,encoding,keepLeadingZeros,tz=="UTC")
fill,showProgress,nThread,verbose,warnings2errors,logical01,logicalYN,select,drop,colClasses,integer64,encoding,keepLeadingZeros,tz=="UTC",col.names)
if (!length(ans)) return(null.data.table()) # test 1743.308 drops all columns
nr = length(ans[[1L]])
require_bit64_if_needed(ans)
Expand Down Expand Up @@ -356,8 +356,6 @@ yaml=FALSE, tmpdir=tempdir(), tz="UTC")
for (j in cols_to_factor) set(ans, j=j, value=as_factor(.subset2(ans, j)))
}

if (!missing(col.names)) # FR #768
setnames(ans, col.names) # setnames checks and errors automatically
if (!is.null(key) && data.table) {
if (!is.character(key))
stopf("key argument of data.table() must be a character vector naming columns (NB: col.names are applied before this)")
Expand Down
12 changes: 11 additions & 1 deletion inst/tests/tests.Rraw
Original file line number Diff line number Diff line change
Expand Up @@ -8072,7 +8072,7 @@ test(1557.1, names(fread(str)), c("V1", "V2")) # autonamed
test(1557.2, names(fread(str, col.names=letters[1:2])), letters[1:2])
test(1557.3, names(fread(str, col.names=letters[1])), error="Can't assign 1 names to")
test(1557.4, names(fread(str, col.names=letters[1:3])), error="Can't assign 3 names to")
test(1557.5, names(fread(str, col.names=1:2)), error="Passed a vector of type")
test(1557.5, names(fread(str, col.names=1:2)), error="'col.names' must be")

# Fix for #773
f = testDir("issue_773_fread.txt")
Expand Down Expand Up @@ -21901,3 +21901,13 @@ rm(DT, strings)
DT = unserialize(serialize(as.data.table(mtcars), NULL))
test(2351, DT[,carb:=NULL], as.data.table(mtcars)[,carb:=NULL])
rm(DT)

# drop works with user override colnames #3459
DT = data.table(a=c(1L, 4L), c=c(3L, 6L))
test(2352.1, fread("a,b,c\n1,2,3\n4,5,6", drop='b'), DT)
test(2352.2, fread(col.names=c("a","b","c"),"1,2,3\n4,5,6", drop='b'), DT)
test(2352.3, fread(col.names=c("a","b","c"),"1,2,3\n4,5,6", drop=2), DT)
test(2352.4, fread(col.names=c("a","b","c"),"1,2,3\n4,5,6", colClasses=c(b="NULL")), DT)
test(2352.5, fread(col.names=c("a","b","c"),"1,2,3\n4,5,6", select=c('a','c')), DT)
test(2352.6, fread(col.names=c("a","b","c"),"1,2,3\n4,5,6", select=c(1,3)), DT)
rm(DT)
2 changes: 1 addition & 1 deletion src/data.table.h
Original file line number Diff line number Diff line change
Expand Up @@ -399,7 +399,7 @@ SEXP setcharvec(SEXP, SEXP, SEXP);
SEXP chmatch_R(SEXP, SEXP, SEXP);
SEXP chmatchdup_R(SEXP, SEXP, SEXP);
SEXP chin_R(SEXP, SEXP);
SEXP freadR(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP);
SEXP freadR(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP);
SEXP fwriteR(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP);
SEXP rbindlist(SEXP, SEXP, SEXP, SEXP, SEXP);
SEXP setlistelt(SEXP, SEXP, SEXP);
Expand Down
40 changes: 34 additions & 6 deletions src/freadR.c
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ static colType readInt64As = CT_INT64;
static SEXP selectSxp;
static SEXP dropSxp;
static SEXP colClassesSxp;
static SEXP cNamesSxp;
static bool selectColClasses = false;
cetype_t ienc = CE_NATIVE;
static SEXP RCHK;
Expand Down Expand Up @@ -77,7 +78,8 @@ SEXP freadR(
SEXP integer64Arg,
SEXP encodingArg,
SEXP keepLeadingZerosArgs,
SEXP noTZasUTC
SEXP noTZasUTC,
SEXP colNamesArg
)
{
verbose = LOGICAL(verboseArg)[0];
Expand Down Expand Up @@ -185,6 +187,8 @@ SEXP freadR(
args.readInt64As = readInt64As;

colClassesSxp = colClassesArg;
if (!isNull(colNamesArg) && !isString(colNamesArg)) error(_("'col.names' must be NULL or a character vector"));
cNamesSxp = colNamesArg;

selectSxp = selectArg;
dropSxp = dropArg;
Expand Down Expand Up @@ -231,10 +235,10 @@ SEXP freadR(
return DT;
}

static void applyDrop(SEXP items, int8_t *type, int ncol, int dropSource)
static void applyDrop(SEXP items, int8_t *type, int ncol, int dropSource, SEXP matchNames)
{
if (!length(items)) return;
const SEXP itemsInt = PROTECT(isString(items) ? chmatch(items, colNamesSxp, NA_INTEGER) : coerceVector(items, INTSXP));
const SEXP itemsInt = PROTECT(isString(items) ? chmatch(items, matchNames, NA_INTEGER) : coerceVector(items, INTSXP));
const int* const itemsD = INTEGER(itemsInt);
const int n = LENGTH(itemsInt);
for (int j = 0; j < n; j++) {
Expand Down Expand Up @@ -277,13 +281,15 @@ bool userOverride(int8_t *type, lenOff *colNames, const char *anchor, const int
}
}
// "use either select= or drop= but not both" was checked earlier in freadR
applyDrop(dropSxp, type, ncol, /*dropSource=*/-1);
bool hasHeader = ncol > 0 && strcmp(CHAR(STRING_ELT(colNamesSxp, 0)), "V1") != 0;
SEXP matchNames = (isNull(cNamesSxp) || hasHeader) ? colNamesSxp : cNamesSxp;
applyDrop(dropSxp, type, ncol, /*dropSource=*/-1, matchNames);
if (TYPEOF(colClassesSxp) == VECSXP) { // not isNewList() because that returns true for NULL
SEXP listNames = PROTECT(getAttrib(colClassesSxp, R_NamesSymbol)); // rchk wanted this protected
for (int i = 0; i < LENGTH(colClassesSxp); i++) {
if (STRING_ELT(listNames, i) == char_NULL) {
SEXP items = VECTOR_ELT(colClassesSxp, i);
applyDrop(items, type, ncol, /*dropSource=*/i);
applyDrop(items, type, ncol, /*dropSource=*/i, matchNames);
}
}
UNPROTECT(1); // listNames
Expand All @@ -294,7 +300,7 @@ bool userOverride(int8_t *type, lenOff *colNames, const char *anchor, const int
if (length(selectSxp)) {
const int n = length(selectSxp);
if (isString(selectSxp)) {
selectInts = INTEGER(PROTECT(chmatch(selectSxp, colNamesSxp, NA_INTEGER))); nprotect++;
selectInts = INTEGER(PROTECT(chmatch(selectSxp, matchNames, NA_INTEGER))); nprotect++;
for (int i = 0; i < n; i++) if (selectInts[i] == NA_INTEGER)
DTWARN(_("Column name '%s' not found in column name header (case sensitive), skipping."), CHAR(STRING_ELT(selectSxp, i)));
} else {
Expand All @@ -318,6 +324,28 @@ bool userOverride(int8_t *type, lenOff *colNames, const char *anchor, const int
else type[i] = CT_DROP;
}
}
// override col names if user provided them
if (!isNull(cNamesSxp)) {
int ncnames = LENGTH(cNamesSxp);
if (hasHeader) {
int ndrop = 0;
for (int i = 0; i < ncol; i++) if (type[i] == CT_DROP) ndrop++;
if (ncnames != (ncol - ndrop)) STOP(_("Can't assign %d names to a %d-column data.table"), ncnames, ncol - ndrop);
int *selectRankD = selectRank ? INTEGER(selectRank) : NULL;
int cname_idx = 0;
for (int i = 0; i < ncol; i++) {
if (type[i] != CT_DROP) {
const int idx = selectRankD ? (selectRankD[i] - 1) : cname_idx++;
SET_STRING_ELT(colNamesSxp, i, STRING_ELT(cNamesSxp, idx));
}
}
} else {
if (ncnames != ncol) STOP(_("Can't assign %d names to a %d-column data.table"), ncnames, ncol);
for (int i = 0; i < ncol; i++) {
SET_STRING_ELT(colNamesSxp, i, STRING_ELT(cNamesSxp, i));
}
}
}
colClassesAs = NULL; // any coercions we can't handle here in C are deferred to R (to handle with methods::as) via this attribute
if (length(colClassesSxp)) {
SEXP typeRName_sxp = PROTECT(allocVector(STRSXP, NUT));
Expand Down
Loading