diff --git a/..Rcheck/00check.log b/..Rcheck/00check.log new file mode 100644 index 0000000000..7f317dfe23 --- /dev/null +++ b/..Rcheck/00check.log @@ -0,0 +1,10 @@ +* using log directory ‘/home/skit/data.table/..Rcheck’ +* using R version 3.5.0 (2018-04-23) +* using platform: x86_64-pc-linux-gnu (64-bit) +* using session charset: UTF-8 +* using option ‘--no-manual’ +* checking for file ‘./DESCRIPTION’ ... ERROR +Required fields missing or empty: + ‘Author’ ‘Maintainer’ +* DONE +Status: 1 ERROR diff --git a/.ci/atime/tests.R b/.ci/atime/tests.R index 710a0c0cb2..1c9791a02c 100644 --- a/.ci/atime/tests.R +++ b/.ci/atime/tests.R @@ -9,15 +9,15 @@ extra.args.6107 <- c( extra.test.list <- list() for (extra.arg in extra.args.6107){ this.test <- atime::atime_test( + FasterIO = "b70d0267ae89d3fffe8f4a5a6041dcb131709e97", # Merge commit of the PR (https://github.com/Rdatatable/data.table/pull/6925) that reduced time usage + Slow = "e9087ce9860bac77c51467b19e92cf4b72ca78c7", # Parent of the merge commit (https://github.com/Rdatatable/data.table/commit/a77e8c22e44e904835d7b34b047df2eff069d1f2) of the PR (https://github.com/Rdatatable/data.table/pull/6107) that fixes the issue + Fast = "a77e8c22e44e904835d7b34b047df2eff069d1f2", # Merge commit of the PR (https://github.com/Rdatatable/data.table/pull/6107) that fixes the issue setup = { set.seed(1) DT = data.table(date=.Date(sample(20000, N, replace=TRUE))) tmp_csv = tempfile() fwrite(DT, tmp_csv) - }, - FasterIO = "60a01fa65191c44d7997de1843e9a1dfe5be9f72", # First commit of the PR (https://github.com/Rdatatable/data.table/pull/6925/commits) that reduced time usage - Slow = "e9087ce9860bac77c51467b19e92cf4b72ca78c7", # Parent of the merge commit (https://github.com/Rdatatable/data.table/commit/a77e8c22e44e904835d7b34b047df2eff069d1f2) of the PR (https://github.com/Rdatatable/data.table/pull/6107) that fixes the issue - Fast = "a77e8c22e44e904835d7b34b047df2eff069d1f2") # Merge commit of the PR (https://github.com/Rdatatable/data.table/pull/6107) that fixes the issue + }) this.test$expr = str2lang(sprintf("data.table::fread(tmp_csv, %s)", extra.arg)) extra.test.list[[sprintf("fread(%s) improved in #6107", extra.arg)]] <- this.test } @@ -26,14 +26,6 @@ for (extra.arg in extra.args.6107){ for(retGrp_chr in c("T","F"))extra.test.list[[sprintf( "forderv(retGrp=%s) improved in #4386", retGrp_chr )]] <- list( - setup = quote({ - dt <- data.table(group = rep(1:2, l=N)) - }), - expr = substitute({ - old.opt <- options(datatable.forder.auto.index = TRUE) # required for test, un-documented, comments in forder.c say it is for debugging only. - data.table:::forderv(dt, "group", retGrp = RETGRP) - options(old.opt) # so the option does not affect other tests. - }, list(RETGRP=eval(str2lang(retGrp_chr)))), ## From ?bench::mark, "Each expression will always run at least twice, ## once to measure the memory allocation and store results ## and one or more times to measure timing." @@ -44,7 +36,15 @@ for(retGrp_chr in c("T","F"))extra.test.list[[sprintf( ## Timings should be constant if the cached index is used (Fast), ## and (log-)linear if the index is re-computed (Slow). Slow = "b1b1832b0d2d4032b46477d9fe6efb15006664f4", # Parent of the first commit (https://github.com/Rdatatable/data.table/commit/b0efcf59442a7d086c6df17fa6a45c81b082322e) in the PR (https://github.com/Rdatatable/data.table/pull/4386/commits) where the performance was improved. - Fast = "ffe431fbc1fe2d52ed9499f78e7e16eae4d71a93" # Last commit of the PR (https://github.com/Rdatatable/data.table/pull/4386/commits) where the performance was improved. + Fast = "1a84514f6d20ff1f9cc614ea9b92ccdee5541506", # Merge commit of the PR (https://github.com/Rdatatable/data.table/pull/4386/commits) where the performance was improved. + setup = quote({ + dt <- data.table(group = rep(1:2, l=N)) + }), + expr = substitute({ + old.opt <- options(datatable.forder.auto.index = TRUE) # required for test, un-documented, comments in forder.c say it is for debugging only. + data.table:::forderv(dt, "group", retGrp = RETGRP) + options(old.opt) # so the option does not affect other tests. + }, list(RETGRP=eval(str2lang(retGrp_chr)))) ) # A list of performance tests. @@ -69,7 +69,7 @@ for(retGrp_chr in c("T","F"))extra.test.list[[sprintf( # nolint start: undesirable_operator_linter. ':::' needed+appropriate here. test.list <- atime::atime_test_list( # Common N and pkg.edit.fun are defined here, and inherited in all test cases below which do not re-define them. - N = as.integer(10^seq(1, 7, by=0.25)), + N = as.integer(10^seq(1, 7, by=0.5)), # A function to customize R package metadata and source files to facilitate version-specific installation and testing. # # This is specifically tailored for handling data.table which requires specific changes in non-standard files (such as the object file name in Makevars and version checking code in onLoad.R) @@ -136,6 +136,125 @@ test.list <- atime::atime_test_list( "NAMESPACE", sprintf('useDynLib\\("?%s"?', Package_regex), paste0('useDynLib(', new.Package_)) + pkg_find_replace( + file.path("src", "Makevars.*in"), + "@PKG_CFLAGS@", "@PKG_CFLAGS@ -DSTRING_PTR_RO=STRING_PTR_RO") + backports = c( + "src/data.table.h" = ' + #include + #if R_VERSION >= R_Version(4, 6, 0) + // backports.c + void SETLENGTH(SEXP x, R_xlen_t n); + R_xlen_t TRUELENGTH(SEXP x); + void SET_TRUELENGTH(SEXP x, R_xlen_t n); + void SET_GROWABLE_BIT(SEXP); + int LEVELS(SEXP); + int NAMED(SEXP); + #define REFCNT(x) NAMED(x) + SEXP ATTRIB(SEXP); + void SET_ATTRIB(SEXP, SEXP); + int OBJECT(SEXP); + void SET_OBJECT(SEXP, int); + #define isFrame(x) isDataFrame(x) + #define GetOption(x, none) GetOption1(x) + #undef findVar // Rf_ mapping remains + #define findVar(sym, env) R_getVar(sym, env, FALSE) + #define STRING_PTR(x) ((SEXP *)STRING_PTR_RO(x)) + int IS_S4_OBJECT(SEXP); + void SET_S4_OBJECT(SEXP); + void UNSET_S4_OBJECT(SEXP); + void SET_TYPEOF(SEXP, int); + #define VECTOR_ELT(x, i) VECTOR_ELT_(x, i) + SEXP VECTOR_ELT_(SEXP, R_xlen_t); + #define VECTOR_PTR(x) ((SEXP*)DATAPTR_RO(x)) + #define DATAPTR(x) ((void*)DATAPTR_RO(x)) + #endif + ', + "src/backports.c" = ' + #include "data.table.h" + #if R_VERSION >= R_Version(4, 6, 0) + #define NAMED_BITS 16 + struct sxpinfo_struct { + SEXPTYPE type : TYPE_BITS; // in Rinternals.h + unsigned int scalar: 1; + unsigned int obj : 1; + unsigned int alt : 1; + unsigned int gp : 16; + unsigned int mark : 1; + unsigned int debug : 1; + unsigned int trace : 1; + unsigned int spare : 1; + unsigned int gcgen : 1; + unsigned int gccls : 3; + unsigned int named : NAMED_BITS; + unsigned int extra : 32 - NAMED_BITS; + }; + + struct vecsxp_struct { + R_xlen_t length; + R_xlen_t truelength; + }; + + typedef struct VECTOR_SEXPREC { + struct sxpinfo_struct sxpinfo; + SEXP attrib; + SEXP gengc_next_node, gengc_prev_node; + struct vecsxp_struct vecsxp; + } *VECSEXP; + + void SETLENGTH(SEXP x, R_xlen_t n) { + ((VECSEXP)x)->vecsxp.length = n; + } + R_xlen_t TRUELENGTH(SEXP x) { + return ((VECSEXP)x)->vecsxp.truelength; + } + void SET_TRUELENGTH(SEXP x, R_xlen_t n) { + ((VECSEXP)x)->vecsxp.truelength = n; + } + void SET_GROWABLE_BIT(SEXP x) { + ((VECSEXP)x)->sxpinfo.gp |= 0x20; + } + int LEVELS(SEXP x) { + return ((VECSEXP)x)->sxpinfo.gp; + } + int NAMED(SEXP x) { + return ((VECSEXP)x)->sxpinfo.named; + } + int OBJECT(SEXP x) { + return ((VECSEXP)x)->sxpinfo.obj; + } + void SET_OBJECT(SEXP x, int o) { + ((VECSEXP)x)->sxpinfo.obj = o; + } + SEXP ATTRIB(SEXP x) { + return ((VECSEXP)x)->attrib; + } + void SET_ATTRIB(SEXP x, SEXP att) { + ((VECSEXP)x)->attrib = att; + } + #define S4_OBJECT (1<<4) + int IS_S4_OBJECT(SEXP x) { + return ((VECSEXP)x)->sxpinfo.gp & S4_OBJECT; + } + void SET_S4_OBJECT(SEXP x) { + ((VECSEXP)x)->sxpinfo.gp |= S4_OBJECT; + } + void UNSET_S4_OBJECT(SEXP x) { + ((VECSEXP)x)->sxpinfo.gp &= ~S4_OBJECT; + } + void SET_TYPEOF(SEXP x, int type) { + ((VECSEXP)x)->sxpinfo.type = type; + } + SEXP VECTOR_ELT_(SEXP x, R_xlen_t i) { + return ALTREP(x) ? (VECTOR_ELT)(x, i) : ((SEXP*)DATAPTR_RO(x))[i]; + } + #endif + ') + for (n in names(backports)) { + f = file(file.path(new.pkg.path, n), "a") + writeLines(backports[[n]], f) + close(f) + } }, # Constant overhead improvement https://github.com/Rdatatable/data.table/pull/6925 @@ -145,9 +264,9 @@ test.list <- atime::atime_test_list( setup = { fwrite(iris[1], iris.csv <- tempfile()) }, - expr = replicate(N, data.table::fread(iris.csv)), - Fast = "60a01fa65191c44d7997de1843e9a1dfe5be9f72", # First commit of the PR (https://github.com/Rdatatable/data.table/pull/6925/commits) that reduced time usage - Slow = "e25ea80b793165094cea87d946d2bab5628f70a6" # Parent of the first commit (https://github.com/Rdatatable/data.table/commit/60a01fa65191c44d7997de1843e9a1dfe5be9f72) + Fast = "b70d0267ae89d3fffe8f4a5a6041dcb131709e97", # Merge commit of the PR (https://github.com/Rdatatable/data.table/pull/6925) that reduced time usage + Slow = "e25ea80b793165094cea87d946d2bab5628f70a6", # Parent of the first commit (https://github.com/Rdatatable/data.table/commit/60a01fa65191c44d7997de1843e9a1dfe5be9f72) + expr = replicate(N, data.table::fread(iris.csv)) ), # Performance regression discussed in https://github.com/Rdatatable/data.table/issues/4311 @@ -158,10 +277,10 @@ test.list <- atime::atime_test_list( dt <- data.table(a = sample.int(N)) setindexv(dt, "a") }, - expr = data.table:::shallow(dt), # Before = "", This needs to be updated later as there are two issues here: A) The source of regression (or the particular commit that led to it) is not clear; B) Older versions of data.table are having problems when being installed in this manner (This includes commits from before March 20 2020, when the issue that discovered or first mentioned the regression was created) Regression = "b1b1832b0d2d4032b46477d9fe6efb15006664f4", # Parent of the first commit (https://github.com/Rdatatable/data.table/commit/0f0e7127b880df8459b0ed064dc841acd22f5b73) in the PR (https://github.com/Rdatatable/data.table/pull/4440/commits) that fixes the regression - Fixed = "9d3b9202fddb980345025a4f6ac451ed26a423be"), # Merge commit in the PR that fixed the regression (https://github.com/Rdatatable/data.table/pull/4440) + Fixed = "9d3b9202fddb980345025a4f6ac451ed26a423be", # Merge commit in the PR that fixed the regression (https://github.com/Rdatatable/data.table/pull/4440) + expr = data.table:::shallow(dt)), # Test based on https://github.com/Rdatatable/data.table/issues/5424 # Performance regression introduced from a commit in https://github.com/Rdatatable/data.table/pull/4491 @@ -176,10 +295,10 @@ test.list <- atime::atime_test_list( key = "g") dt_mod <- copy(dt) }, - expr = data.table:::`[.data.table`(dt_mod, , N := .N, by = g), - Before = "be2f72e6f5c90622fe72e1c315ca05769a9dc854", # Parent of the regression causing commit (https://github.com/Rdatatable/data.table/commit/e793f53466d99f86e70fc2611b708ae8c601a451) in the PR (https://github.com/Rdatatable/data.table/pull/4491/commits) that introduced the issue - Regression = "e793f53466d99f86e70fc2611b708ae8c601a451", # Commit responsible for regression in the PR (https://github.com/Rdatatable/data.table/pull/4491/commits) that introduced the issue - Fixed = "58409197426ced4714af842650b0cc3b9e2cb842"), # Last commit in the PR (https://github.com/Rdatatable/data.table/pull/5463/commits) that fixed the regression + Before = "d47a83fb2e25582e508f191f87a31ca81b736b57", # Parent of the first commit (https://github.com/Rdatatable/data.table/commit/196f420b50181b92036538776956ddf2c5b7a5a1) in the PR (https://github.com/Rdatatable/data.table/pull/4491/commits) that introduced the issue + Regression = "85adf09e3463838d547977ae9bc75e3b37f9cbaf", # Merge commit of the PR (https://github.com/Rdatatable/data.table/pull/4491) that introduced the issue + Fixed = "19b7866112614db53eb3e909c097407d91cd6738", # Merge commit of the PR (https://github.com/Rdatatable/data.table/pull/5463) that fixed the regression + expr = data.table:::`[.data.table`(dt_mod, , N := .N, by = g)), # Issue reported in https://github.com/Rdatatable/data.table/issues/5426 # Test case adapted from https://github.com/Rdatatable/data.table/pull/5427#issue-1323678063 which is the fix PR. @@ -188,12 +307,12 @@ test.list <- atime::atime_test_list( L <- replicate(N, 1, simplify = FALSE) setDT(L) }, + Slow = "c4a2085e35689a108d67dacb2f8261e4964d7e12", # Parent of the first commit (https://github.com/Rdatatable/data.table/commit/7cc4da4c1c8e568f655ab5167922dcdb75953801) in the PR (https://github.com/Rdatatable/data.table/pull/5427/commits) that fixes the issue + Fast = "2487c61656335764980e478c323f7e6ce4e6d4ca", # Merge commit in the PR (https://github.com/Rdatatable/data.table/pull/5427) that fixes the issue expr = { data.table:::setattr(L, "class", NULL) data.table:::setDT(L) - }, - Slow = "c4a2085e35689a108d67dacb2f8261e4964d7e12", # Parent of the first commit (https://github.com/Rdatatable/data.table/commit/7cc4da4c1c8e568f655ab5167922dcdb75953801) in the PR (https://github.com/Rdatatable/data.table/pull/5427/commits) that fixes the issue - Fast = "af48a805e7a5026a0c2d0a7fd9b587fea5cfa3c4"), # Last commit in the PR (https://github.com/Rdatatable/data.table/pull/5427/commits) that fixes the issue + }), # Test case adapted from https://github.com/Rdatatable/data.table/issues/4200#issuecomment-645980224 which is where the issue was reported. # Fixed in https://github.com/Rdatatable/data.table/pull/4558 @@ -209,7 +328,7 @@ test.list <- atime::atime_test_list( PR7401="0216983c51e03e3f61d5e6f08f4ba0c42cceb22c", # Merge commit (https://github.com/Rdatatable/data.table/commit/0216983c51e03e3f61d5e6f08f4ba0c42cceb22c) of a PR (https://github.com/Rdatatable/data.table/pull/7401) which increased speed and memory usage of this test (https://github.com/Rdatatable/data.table/issues/7687) Before = "7a9eaf62ede487625200981018d8692be8c6f134", # Parent of the first commit (https://github.com/Rdatatable/data.table/commit/515de90a6068911a148e54343a3503043b8bb87c) in the PR (https://github.com/Rdatatable/data.table/pull/4164/commits) that introduced the regression Regression = "c152ced0e5799acee1589910c69c1a2c6586b95d", # Parent of the first commit (https://github.com/Rdatatable/data.table/commit/15f0598b9828d3af2eb8ddc9b38e0356f42afe4f) in the PR (https://github.com/Rdatatable/data.table/pull/4558/commits) that fixes the regression - Fixed = "f750448a2efcd258b3aba57136ee6a95ce56b302", # Second commit of the PR (https://github.com/Rdatatable/data.table/pull/4558/commits) that fixes the regression + Fixed = "ba32f3cba38ec270587e395f6e6c26a80be36be6", # Merge commit of the PR (https://github.com/Rdatatable/data.table/pull/4558) that fixes the regression expr = data.table:::`[.data.table`(d, , max(v1) - min(v2), by = id)), # Issue with sorting again when already sorted, as reported in https://github.com/Rdatatable/data.table/issues/4498 @@ -220,11 +339,11 @@ test.list <- atime::atime_test_list( L = as.data.table(as.character(rnorm(N, 1, 0.5))) setkey(L, V1) }, - ## New DT can safely retain key. - expr = data.table:::`[.data.table`(L, , .SD), - Fast = "353dc7a6b66563b61e44b2fa0d7b73a0f97ca461", # Close-to-last merge commit in the PR (https://github.com/Rdatatable/data.table/pull/4501/commits) that fixes the issue + Fast = "680b5e8e6d3f16a09dfb2f86ac7b2ce5ce70c3f1", # Merge commit in the PR (https://github.com/Rdatatable/data.table/pull/4501/commits) that fixes the issue Slow = "3ca83738d70d5597d9e168077f3768e32569c790", # Circa 2024 master parent of close-to-last merge commit (https://github.com/Rdatatable/data.table/commit/353dc7a6b66563b61e44b2fa0d7b73a0f97ca461) in the PR (https://github.com/Rdatatable/data.table/pull/4501/commits) that fixes the issue - Slower = "cacdc92df71b777369a217b6c902c687cf35a70d"), # Circa 2020 parent of the first commit (https://github.com/Rdatatable/data.table/commit/74636333d7da965a11dad04c322c752a409db098) in the PR (https://github.com/Rdatatable/data.table/pull/4501/commits) that fixes the issue + Slower = "cacdc92df71b777369a217b6c902c687cf35a70d", # Circa 2020 parent of the first commit (https://github.com/Rdatatable/data.table/commit/74636333d7da965a11dad04c322c752a409db098) in the PR (https://github.com/Rdatatable/data.table/pull/4501/commits) that fixes the issue + ## New DT can safely retain key. + expr = data.table:::`[.data.table`(L, , .SD)), # Test case adapted from https://github.com/Rdatatable/data.table/issues/6286#issue-2412141289 which is where the issue was reported. # Fixed in https://github.com/Rdatatable/data.table/pull/6296 @@ -233,9 +352,9 @@ test.list <- atime::atime_test_list( dt = data.table(a = 1:N) dt_mod <- copy(dt) }, - expr = data.table:::`[.data.table`(dt_mod, , 1, by = a, verbose = TRUE), Slow = "a01f00f7438daf4612280d6886e6929fa8c8f76e", # Parent of the first commit (https://github.com/Rdatatable/data.table/commit/fc0c1e76408c34a8482f16f7421d262c7f1bde32) in the PR (https://github.com/Rdatatable/data.table/pull/6296/commits) that fixes the issue - Fast = "f248bbe6d1204dfc8def62328788eaadcc8e17a1"), # Merge commit of the PR (https://github.com/Rdatatable/data.table/pull/6296) that fixes the issue + Fast = "f248bbe6d1204dfc8def62328788eaadcc8e17a1", # Merge commit of the PR (https://github.com/Rdatatable/data.table/pull/6296) that fixes the issue + expr = data.table:::`[.data.table`(dt_mod, , 1, by = a, verbose = TRUE)), # Test case adapted from https://github.com/Rdatatable/data.table/issues/5492#issue-1416598382 which is where the issue was reported, # and from https://github.com/Rdatatable/data.table/pull/5493#issue-1416656788 which is the fix PR. @@ -244,9 +363,9 @@ test.list <- atime::atime_test_list( df <- data.frame(x = runif(N)) dt <- as.data.table(df) }, - expr = data.table:::transform.data.table(dt, y = round(x)), Slow = "0895fa247afcf6b38044bd5f56c0d209691ddb31", # Parent of the first commit (https://github.com/Rdatatable/data.table/commit/93ce3ce1373bf733ebd2036e2883d2ffe377ab58) in the PR (https://github.com/Rdatatable/data.table/pull/5493/commits) that fixes the issue - Fast = "2d1a0575f87cc50e90f64825c30d7a6cb6b05dd7"), # Merge commit of the PR (https://github.com/Rdatatable/data.table/pull/5493) that fixes the issue + Fast = "2d1a0575f87cc50e90f64825c30d7a6cb6b05dd7", # Merge commit of the PR (https://github.com/Rdatatable/data.table/pull/5493) that fixes the issue + expr = data.table:::transform.data.table(dt, y = round(x))), # Test case created directly using the atime code below (not adapted from any other benchmark), based on the issue/fix PR https://github.com/Rdatatable/data.table/pull/5054#issue-930603663 "melt should be more efficient when there are missing input columns." "melt improved in #5054" = atime::atime_test( @@ -258,9 +377,9 @@ test.list <- atime::atime_test_list( x }) }, - expr = data.table:::melt(DT, measure.vars = measure.vars), Slow = "fd24a3105953f7785ea7414678ed8e04524e6955", # Parent of the merge commit (https://github.com/Rdatatable/data.table/commit/ed72e398df76a0fcfd134a4ad92356690e4210ea) of the PR (https://github.com/Rdatatable/data.table/pull/5054) that fixes the issue - Fast = "ed72e398df76a0fcfd134a4ad92356690e4210ea"), # Merge commit of the PR (https://github.com/Rdatatable/data.table/pull/5054) that fixes the issue # Test case created directly using the atime code below (not adapted from any other benchmark), based on the issue/fix PR https://github.com/Rdatatable/data.table/pull/5054#issue-930603663 "melt should be more efficient when there are missing input columns." + Fast = "ed72e398df76a0fcfd134a4ad92356690e4210ea", # Merge commit of the PR (https://github.com/Rdatatable/data.table/pull/5054) that fixes the issue + expr = data.table:::melt(DT, measure.vars = measure.vars)), # Test case created from @tdhock's comment https://github.com/Rdatatable/data.table/pull/6393#issuecomment-2327396833, in turn adapted from @philippechataignon's comment https://github.com/Rdatatable/data.table/pull/6393#issuecomment-2326714012 "fwrite refactored in #6393" = atime::atime_test( @@ -271,42 +390,43 @@ test.list <- atime::atime_test_list( L[, paste0("V", 1:NC) := replicate(NC, rnorm(N), simplify=FALSE)] out.csv <- tempfile() }, - expr = data.table::fwrite(L, out.csv, compress="gzip"), Before = "f339aa64c426a9cd7cf2fcb13d91fc4ed353cd31", # Parent of the first commit https://github.com/Rdatatable/data.table/commit/fcc10d73a20837d0f1ad3278ee9168473afa5ff1 in the PR https://github.com/Rdatatable/data.table/pull/6393/commits with major change to fwrite with gzip. - PR = "3630413ae493a5a61b06c50e80d166924d2ef89a"), # Close-to-last merge commit in the PR. + PR = "e0abdfcd79ba30efcf679e33cbb8eba897a46f9c", # merge commit of PR6393 + expr = data.table::fwrite(L, out.csv, compress="gzip")), - # Test case created directly using the atime code below (not adapted from any other benchmark), based on the PR, Removes unnecessary data.table call from as.data.table.array https://github.com/Rdatatable/data.table/pull/7010 - "as.data.table.array improved in #7010" = atime::atime_test( + # Test case created directly using the atime code below (not adapted from any other benchmark), based on the PR, Removes unnecessary data.table call from as.data.table.array https://github.com/Rdatatable/data.table/pull/7019 + "as.data.table.array improved in #7019" = atime::atime_test( setup = { dims = c(N, 1, 1) arr = array(seq_len(prod(dims)), dim=dims) }, - expr = data.table:::as.data.table.array(arr, na.rm=FALSE), - Slow = "73d79edf8ff8c55163e90631072192301056e336", # Parent of the first commit in the PR (https://github.com/Rdatatable/data.table/commit/8397dc3c993b61a07a81c786ca68c22bc589befc) - Fast = "8397dc3c993b61a07a81c786ca68c22bc589befc"), # Commit in the PR (https://github.com/Rdatatable/data.table/pull/7019/commits) that removes inefficiency + Slow = "73d79edf8ff8c55163e90631072192301056e336", # Parent of the first commit (https://github.com/Rdatatable/data.table/commit/8397dc3c993b61a07a81c786ca68c22bc589befc) in the PR that improves efficiency + Fast = "2715663fcf0344c3f7c73241d391d8de347bdb9d", # Merge commit of the PR that improves efficiency + expr = data.table:::as.data.table.array(arr, na.rm=FALSE)), + # https://github.com/Rdatatable/data.table/pull/7144 added the speedup code and this performance test. "isoweek improved in #7144" = atime::atime_test( setup = { set.seed(349) x = sample(Sys.Date() - 0:5000, N, replace=TRUE) }, - expr = data.table::isoweek(x), - Slow = "548410d23dd74b625e8ea9aeb1a5d2e9dddd2927", # Parent of the first commit in the PR (https://github.com/Rdatatable/data.table/commit/548410d23dd74b625e8ea9aeb1a5d2e9dddd2927) - Fast = "c0b32a60466bed0e63420ec105bc75c34590865e"), # Commit in the PR (https://github.com/Rdatatable/data.table/pull/7144/commits) that uses a much faster implementation + Slow = "038e7f8c2bed60f38c3faa2cc2c4e339c3570b94", # Parent of the first commit (https://github.com/Rdatatable/data.table/commit/c0b32a60466bed0e63420ec105bc75c34590865e) in the PR + Fast = "ed2df986da6d3a4ff35bec1b0f75db2b767e3eb2", # Merge commit of the PR that uses a much faster implementation + expr = data.table::isoweek(x)), - # Regression introduced in #7404 (grouped by factor). + # Regression introduced by https://github.com/Rdatatable/data.table/pull/6890 and discussed in https://github.com/Rdatatable/data.table/issues/7404 (grouped by factor). "DT[by] max regression fixed in #7480" = atime::atime_test( - N = as.integer(10^seq(3, 5, by=0.5)), setup = { dt = data.table( id = as.factor(rep(seq_len(N), each = 100L)), V1 = 1L ) }, - expr = data.table:::`[.data.table`(dt, , base::max(V1, na.rm = TRUE), by = id), - Before = "476de7e3", - Regression = "6f49bf1", - Fixed = "b6ad1a4", - seconds.limit = 1), + Before = "2cb03162a21328cc5f68a8c3b0e554f5edfcb5b9", # Parent of the first commit (https://github.com/Rdatatable/data.table/commit/4cc77c617435b46a0faac35c56e7fb7b81c629fc) in the regression PR (https://github.com/Rdatatable/data.table/pull/6890/commits) + Regression = "6f49bf1935a3009e85ea1e6f9752ff68ffa47d9b", # Merge commit of the regression PR https://github.com/Rdatatable/data.table/pull/6890 + Fixed = "b6ad1a4bc2e44d47f3e86c296c924a809a26bf58", # Merge commit of the fix PR (https://github.com/Rdatatable/data.table/pull/7480) + seconds.limit = 1, + expr = data.table:::`[.data.table`(dt, , base::max(V1, na.rm = TRUE), by = id)), + tests=extra.test.list) # nolint end: undesirable_operator_linter. diff --git a/NEWS.md b/NEWS.md index 5d9e163ad7..2b11f9ac09 100644 --- a/NEWS.md +++ b/NEWS.md @@ -30,6 +30,8 @@ 5. `tables()` can now optionally report `data.table` objects stored one level deep inside list objects when `depth=1L`, [#2606](https://github.com/Rdatatable/data.table/issues/2606). Thanks @MichaelChirico for the report and @manmita for the PR +6. `yearqtr()` and `yearmon()` now gain an optional format specifier [#7694](https://github.com/Rdatatable/data.table/issues/7694). 'numeric' is the default, which preserves the original behavior, but 'character' formats `yearqtr()` as YYYYQ# (e.g. 2025Q2) and `yearmon()` as YYYYM## (e.g. 2025M02, 2025M10). Thanks to @jan-swissre for the report and @LunaticSage218 for the implementation. + ### BUG FIXES 1. `fread()` with `skip=0` and `(header=TRUE|FALSE)` no longer skips the first row when it has fewer fields than subsequent rows, [#7463](https://github.com/Rdatatable/data.table/issues/7463). Thanks @emayerhofer for the report and @ben-schwen for the fix. @@ -40,17 +42,15 @@ 4. `rowwiseDT()` now provides a helpful error message when a complex object that is not a list (e.g., a function) is provided as a cell value, instructing the user to wrap it in `list()`, [#7219](https://github.com/Rdatatable/data.table/issues/7219). Thanks @kylebutts for the report and @venom1204 for the fix. -5. Non-equi joins combining an equality condition with two inequality conditions on the same column (e.g., `on = .(id == id, val >= lo, val <= hi)`) no longer error, [#7641](https://github.com/Rdatatable/data.table/issues/7641). The internal `chmatchdup` remapping of duplicate `rightcols` was overwriting the original column indices, causing downstream code to reference non-existent columns. Thanks @tarun-t for the report and fix, and @aitap for the diagnosis. - -6. By-reference sub-assignments of strings to factor columns now _actually_ match the levels in UTF-8 when required and now don't result in invalid factors being created, [#7648](https://github.com/Rdatatable/data.table/issues/7648), amending a previous incomplete fix to [#6886](https://github.com/Rdatatable/data.table/issues/6886) in v1.17.2. Thanks @BASS-JN for the report and @aitap for the fix. +5. `fread()` can now read from connections directly by spilling to a temporary file first, [#561](https://github.com/Rdatatable/data.table/issues/561). For the best throughput, point `tmpdir=` (or the global temp directory) to fast storage like an SSD or RAM. Thanks to Chris Neff for the report and @ben-schwen for the implementation. -7. `fread()` can now read from connections directly by spilling to a temporary file first, [#561](https://github.com/Rdatatable/data.table/issues/561). For the best throughput, point `tmpdir=` (or the global temp directory) to fast storage like an SSD or RAM. Thanks to Chris Neff for the report and @ben-schwen for the implementation. +6. `fread()` no longer replaces a literal header column name `"NA"` with an auto-generated `Vn` name when `na.strings` includes `"NA"`, [#5124](https://github.com/Rdatatable/data.table/issues/5124). Data rows still continue to parse `"NA"` as missing. Thanks @Mashin6 for the report and @shrektan for the fix. -8. `frollapply()` no longer produces output longer than the input when the window length is also longer than the input [#7646](https://github.com/Rdatatable/data.table/issues/7646). Thanks to @hadley-johnson for reporting and @jangorecki for the fix. +7. `fread()` would not give a warning when every second line of input was empty, [#3339](https://github.com/Rdatatable/data.table/issues/3339). Now, a warning message 'The rows in this file appear to be separated by blank lines.' is given and suggests to set `blank.lines.skip` to `TRUE`. Thanks to @Henrik-P for the report and @Asa-Henry for the fix. -9. `fread()` no longer replaces a literal header column name `"NA"` with an auto-generated `Vn` name when `na.strings` includes `"NA"`, [#5124](https://github.com/Rdatatable/data.table/issues/5124). Data rows still continue to parse `"NA"` as missing. Thanks @Mashin6 for the report and @shrektan for the fix. +8. `test()` now reports multiple expected warnings more clearly when `warning=` has length greater than 1L, instead of printing a collapsed or repeated mismatch summary after messages like `Test 1 produced 1 warnings but expected 2`, [#7092](https://github.com/Rdatatable/data.table/issues/7092). Expected and observed warnings are now printed on separate aligned lines, making small differences easier to spot. Thanks @MichaelChirico for the report, @ben-schwen for assistance, and @lucaslarson25, @tjdavis51, @D3VTHSTVR, and @car723 for the fix. -10. `fread()` no longer misreads dates with negative years, [#7704](https://github.com/Rdatatable/data.table/issues/7704). Thanks to @kevinushey for the report and @aitap for the fix. +11. `fread()` now correctly preserves empty trailing fields when `fill=TRUE` is used with irregular rows, [#7099](https://github.com/Rdatatable/data.table/issues/7099). Thanks to @aitap for the report and @skitsy24 for the fix. ### Notes @@ -66,6 +66,20 @@ 6. Enhanced tests for OpenMP support, detecting incompatibilities such as R-bundled runtime _vs._ newer Xcode and testing for a manually installed runtime from , [#6622](https://github.com/Rdatatable/data.table/issues/6622). Thanks to @dvg-p4 for initial report and testing, @twitched for the pointers, @tdhock and @aitap for the fix. +7. Verbose outputs from `frolladaptivefun()` and `frollfun()` are now clearer and more user friendly [#7021](https://github.com/Rdatatable/data.table/issues/7021). Thanks to @Omartech312, @aidengseay, @kkarissa, and @heb229 for the implementation, to @ben-schwen for the review, and to @jangorecki for the extensive guidance and review. + +## data.table [v1.18.4](https://github.com/Rdatatable/data.table/milestone/45) (6 May 2026) + +### BUG FIXES + +1. Non-equi joins combining an equality condition with two inequality conditions on the same column (e.g., `on = .(id == id, val >= lo, val <= hi)`) no longer error, [#7641](https://github.com/Rdatatable/data.table/issues/7641). The internal `chmatchdup` remapping of duplicate `rightcols` was overwriting the original column indices, causing downstream code to reference non-existent columns. Thanks @tarun-t for the report and fix, and @aitap for the diagnosis. + +2. By-reference sub-assignments of strings to factor columns now _actually_ match the levels in UTF-8 when required and now don't result in invalid factors being created, [#7648](https://github.com/Rdatatable/data.table/issues/7648), amending a previous incomplete fix to [#6886](https://github.com/Rdatatable/data.table/issues/6886) in v1.17.2. Thanks @BASS-JN for the report and @aitap for the fix. + +3. `fread()` no longer misreads dates with negative years, [#7704](https://github.com/Rdatatable/data.table/issues/7704). Thanks to @kevinushey for the report and @aitap for the fix. + +4. `frollapply()` no longer produces output longer than the input when the window length is also longer than the input [#7646](https://github.com/Rdatatable/data.table/issues/7646). Thanks to @hadley-johnson for reporting and @jangorecki for the fix. + ## data.table [v1.18.2.1](https://github.com/Rdatatable/data.table/milestone/44?closed=1) (22 January 2026) ### BUG FIXES diff --git a/R/IDateTime.R b/R/IDateTime.R index 49fa5abda2..63588e7754 100644 --- a/R/IDateTime.R +++ b/R/IDateTime.R @@ -365,8 +365,30 @@ isoyear = function(x) as.integer(format(as.IDate(x), "%G")) month = function(x) convertDate(as.IDate(x), "month") quarter = function(x) convertDate(as.IDate(x), "quarter") year = function(x) convertDate(as.IDate(x), "year") -yearmon = function(x) convertDate(as.IDate(x), "yearmon") -yearqtr = function(x) convertDate(as.IDate(x), "yearqtr") +yearmon = function(x, format = c("numeric", "character")) { + format = match.arg(format) + x_as_idate = as.IDate(x) + ymon = convertDate(x_as_idate, "yearmon") + if (format == "numeric") return(ymon) + ans = rep(NA_character_, length(x_as_idate)) + ok = !is.na(x_as_idate) + yr = floor(ymon[ok]) + mon = round((ymon[ok] - yr) * 12) + 1L + ans[ok] = sprintf("%dM%02d", as.integer(yr), as.integer(mon)) + ans +} +yearqtr = function(x, format = c("numeric", "character")) { + format = match.arg(format) + x_as_idate = as.IDate(x) + yqtr = convertDate(x_as_idate, "yearqtr") + if (format == "numeric") return(yqtr) + ans = rep(NA_character_, length(x_as_idate)) + ok = !is.na(x_as_idate) + yr = floor(yqtr[ok]) + qtr = round((yqtr[ok] - yr) * 4) + 1L + ans[ok] = sprintf("%dQ%d", as.integer(yr), as.integer(qtr)) + ans +} convertDate = function(x, type) { type = match.arg(type, c("yday", "wday", "mday", "week", "month", "quarter", "year", "yearmon", "yearqtr")) diff --git a/R/print.data.table.R b/R/print.data.table.R index 88ff4ea505..37cd586f68 100644 --- a/R/print.data.table.R +++ b/R/print.data.table.R @@ -22,6 +22,7 @@ print.data.table = function(x, topn=getOption("datatable.print.topn"), stopifnot(isTRUEorFALSE(class)) if (col.names == "none" && class) warningf("Column classes will be suppressed when col.names is 'none'") + if (!shouldPrint(x)) { # := in [.data.table sets .global$print=address(x) to suppress the next print i.e., like <- does. See FAQ 2.22 and README item in v1.9.5 # The issue is distinguishing "> DT" (after a previous := in a function) from "> DT[,foo:=1]". To print.data.table(), there @@ -31,7 +32,26 @@ print.data.table = function(x, topn=getOption("datatable.print.topn"), # Other options investigated (could revisit): Cstack_info(), .Last.value gets set first before autoprint, history(), sys.status(), # topenv(), inspecting next statement in caller, using clock() at C level to timeout suppression after some number of cycles SYS = sys.calls() - if (identical(SYS[[1L]][[1L]], print) || # this is what auto-print looks like, i.e. '> DT' and '> DT[, a:=b]' in the terminal; see #3029. + + # TODO(R>=3.6): Remove this branch once the minimal supported R version is raised. No need for is_print_call. Just + # identical(SYS[[1L]][[1L]], print) + + # is_print_call detects whether print() was called either explicitly or through autoprint, + # is wrapped in a promise or not to account for R 3.4/3.5. + is_print_call = FALSE + if (identical(SYS[[1L]][[1L]], print)) { + is_print_call = TRUE + } + # nocov start + else if (typeof(SYS[[1L]][[1L]]) == "promise") { + # in R 3.4 and R 3.5, auto-print uses a promise to reference base::print due to lazy loading + # safely evaluate promise to get the actual function + evaluated = tryCatch(eval(SYS[[1L]][[1L]]), error = function(e) NULL) + if (identical(evaluated, print)) { + is_print_call = TRUE + } + } # nocov end + if (is_print_call || # this is what auto-print looks like, i.e. '> DT' and '> DT[, a:=b]' in the terminal; see #3029. ( length(SYS) >= 3L && is.symbol(thisSYS <- SYS[[length(SYS)-2L]][[1L]]) && as.character(thisSYS) == 'source') ) { # suppress printing from source(echo = TRUE) calls, #2369 return(invisible(x)) diff --git a/R/test.data.table.R b/R/test.data.table.R index d37fba29b5..47e985c034 100644 --- a/R/test.data.table.R +++ b/R/test.data.table.R @@ -576,7 +576,16 @@ test = function(num, x, y=TRUE, } if (length(expected) != length(observed) && (!foreign || is.null(ignore.warning))) { # nocov start - catf("Test %s produced %d %ss but expected %d\n%s\n%s\n", numStr, length(observed), type, length(expected), paste("Expected:", expected), paste("Observed:", observed, collapse = "\n")) + align_messages = function(label, x) paste( + c( + paste0(label, x[1L]), + if (length(x) > 1L) paste0(strrep(" ", nchar(label)), x[-1L]) + ), + collapse = "\n" + ) + expected_text = align_messages("Expected: ", expected) + observed_text = align_messages("Observed: ", observed) + catf("Test %s produced %d %ss but expected %d\n%s\n%s\n", numStr, length(observed), type, length(expected), expected_text, observed_text) fail = TRUE # nocov end } else if (!foreign) { diff --git a/inst/tests/froll.Rraw b/inst/tests/froll.Rraw index 99c878d491..1d7f5dffd0 100644 --- a/inst/tests/froll.Rraw +++ b/inst/tests/froll.Rraw @@ -308,7 +308,7 @@ test(6000.0671, frollmean(c(1:2,NA,4:10), 4), c(rep(NA_real_, 6), 5.5, 6.5, 7.5, "frollfunR: 1:", "frollmeanFast: running for input length 10, window 4, hasnf 0, narm 0", "frollmeanFast: non-finite values are present in input, skip non-finite unaware attempt and run with extra care for NFs straighaway", - "frollfun: processing fun 0 algo 0 took.*", + "frollfun: processing fun MEAN algo fast took.*", "frollfunR: processing.*took.*" )) test(6000.0672, frollmean(c(1:2,NA,4:10), 4, has.nf=FALSE), c(rep(NA_real_, 6), 5.5, 6.5, 7.5, 8.5), output=c( @@ -317,7 +317,7 @@ test(6000.0672, frollmean(c(1:2,NA,4:10), 4, has.nf=FALSE), c(rep(NA_real_, 6), "frollfunR: 1:", "frollmeanFast: running for input length 10, window 4, hasnf -1, narm 0", "frollmeanFast: non-finite values are present in input, skip non-finite unaware attempt and run with extra care for NFs straighaway", - "frollfun: processing fun 0 algo 0 took.*", + "frollfun: processing fun MEAN algo fast took.*", "frollfunR: processing.*took.*" ), warning="has.nf=FALSE used but non-finite values are present in input, use default has.nf=NA to avoid this warning") test(6000.0673, frollmean(c(1:2,NA,4:10), 2, has.nf=FALSE), c(NA, 1.5, NA, NA, 4.5, 5.5, 6.5, 7.5, 8.5, 9.5), output=c( @@ -326,7 +326,7 @@ test(6000.0673, frollmean(c(1:2,NA,4:10), 2, has.nf=FALSE), c(NA, 1.5, NA, NA, 4 "frollfunR: 1:", "frollmeanFast: running for input length 10, window 2, hasnf -1, narm 0", "frollmeanFast: non-finite values are present in input, re-running with extra care for NFs", - "frollfun: processing fun 0 algo 0 took.*", + "frollfun: processing fun MEAN algo fast took.*", "frollfunR: processing.*took.*" ), warning="has.nf=FALSE used but non-finite values are present in input, use default has.nf=NA to avoid this warning") test(6000.0674, frollmean(c(1:2,NA,4:10), 4, align="center"), c(rep(NA_real_, 4), 5.5, 6.5, 7.5, 8.5, NA, NA), output=c( @@ -335,7 +335,7 @@ test(6000.0674, frollmean(c(1:2,NA,4:10), 4, align="center"), c(rep(NA_real_, 4) "frollmeanFast: running for input length 10, window 4, hasnf 0, narm 0", "frollmeanFast: non-finite values are present in input, skip non-finite unaware attempt and run with extra care for NFs straighaway", "frollfun: align 0, shift answer by -2", - "frollfun: processing fun 0 algo 0 took.*", + "frollfun: processing fun MEAN algo fast took.*", "frollfunR: processing.*took.*" )) options(datatable.verbose=FALSE) @@ -436,7 +436,7 @@ test(6000.1196, frollmean(c(1:5,NA), 1:6, algo="exact", na.rm=TRUE, adaptive=TRU "frollfunR: 1:", "frolladaptivemeanExact: running in parallel for input length 6, hasnf 0, narm 1", "frolladaptivemeanExact: non-finite values are present in input, re-running with extra care for NFs", - "frolladaptivefun: processing fun 0 algo 1 took.*", + "frolladaptivefun: processing fun MEAN algo exact took.*", "frollfunR: processing.*took.*" )) #### exact na.rm=TRUE verbose=TRUE @@ -446,7 +446,7 @@ test(6000.1197, frollmean(c(1:5,NA), 2, algo="exact", na.rm=TRUE), output=c( "frollfunR: 1:", "frollmeanExact: running in parallel for input length 6, window 2, hasnf 0, narm 1", "frollmeanExact: non-finite values are present in input, re-running with extra care for NFs", - "frollfun: processing fun 0 algo 1 took.*", + "frollfun: processing fun MEAN algo exact took.*", "frollfunR: processing.*took.*" )) options(datatable.verbose=FALSE) @@ -675,50 +675,50 @@ test(6000.171, frollmean(x, n), output=c( "frollfunR: .*sequentially.*single rolling computation.*", "frollfunR: 1:", "frollmeanFast: running for input length 10, window 3, hasnf 0, narm 0", - "frollfun: processing fun 0 algo 0 took.*", + "frollfun: processing fun MEAN algo fast took.*", "frollfunR: processing.*took.*")) test(6000.172, frollmean(list(x, x+1), n), output=c( "frollfunR: allocating memory for results 2x1", "frollfunR: .*in parallel.*", "frollfunR: 1:", "frollmeanFast: running for input length 10, window 3, hasnf 0, narm 0", - "frollfun: processing fun 0 algo 0 took.*", + "frollfun: processing fun MEAN algo fast took.*", "frollfunR: 2:", "frollmeanFast: running for input length 10, window 3, hasnf 0, narm 0", - "frollfun: processing fun 0 algo 0 took.*", + "frollfun: processing fun MEAN algo fast took.*", "frollfunR: processing.*took.*")) test(6000.173, frollmean(x, c(n, n+1)), output=c( "frollfunR: allocating memory for results 1x2", "frollfunR: .*in parallel.*", "frollfunR: 1:", "frollmeanFast: running for input length 10, window 3, hasnf 0, narm 0", - "frollfun: processing fun 0 algo 0 took.*", + "frollfun: processing fun MEAN algo fast took.*", "frollfunR: 2:", "frollmeanFast: running for input length 10, window 4, hasnf 0, narm 0", - "frollfun: processing fun 0 algo 0 took.*", + "frollfun: processing fun MEAN algo fast took.*", "frollfunR: processing.*took.*")) test(6000.174, frollmean(list(x, x+1), c(n, n+1)), output=c( "frollfunR: allocating memory for results 2x2", "frollfunR: .*in parallel.*", "frollfunR: 1:", "frollmeanFast: running for input length 10, window 3, hasnf 0, narm 0", - "frollfun: processing fun 0 algo 0 took.*", + "frollfun: processing fun MEAN algo fast took.*", "frollfunR: 2:", "frollmeanFast: running for input length 10, window 4, hasnf 0, narm 0", - "frollfun: processing fun 0 algo 0 took.*", + "frollfun: processing fun MEAN algo fast took.*", "frollfunR: 3:", "frollmeanFast: running for input length 10, window 3, hasnf 0, narm 0", - "frollfun: processing fun 0 algo 0 took.*", + "frollfun: processing fun MEAN algo fast took.*", "frollfunR: 4:", "frollmeanFast: running for input length 10, window 4, hasnf 0, narm 0", - "frollfun: processing fun 0 algo 0 took.*", + "frollfun: processing fun MEAN algo fast took.*", "frollfunR: processing.*took.*")) test(6000.175, frollmean(x, n, algo="exact"), output=c( "frollfunR: allocating memory for results 1x1", "frollfunR: .*algo='exact' is already parallelised.*", "frollfunR: 1:", "frollmeanExact: running in parallel for input length 10, window 3, hasnf 0, narm 0", - "frollfun: processing fun 0 algo 1 took.*", + "frollfun: processing fun MEAN algo exact took.*", "frollfunR: processing.*took.*")) test(6000.176, frollmean(x, n, align="center"), output=c( "frollfunR: allocating memory for results 1x1", @@ -726,7 +726,7 @@ test(6000.176, frollmean(x, n, align="center"), output=c( "frollfunR: 1:", "frollmeanFast: running for input length 10, window 3, hasnf 0, narm 0", "frollfun: align 0, shift answer by -1", - "frollfun: processing fun 0 algo 0 took.*", + "frollfun: processing fun MEAN algo fast took.*", "frollfunR: processing.*took.*")) test(6000.177, frollmean(x, n, align="left"), output=c( "frollfunR: allocating memory for results 1x1", @@ -734,7 +734,7 @@ test(6000.177, frollmean(x, n, align="left"), output=c( "frollfunR: 1:", "frollmeanFast: running for input length 10, window 3, hasnf 0, narm 0", "frollfun: align -1, shift answer by -2", - "frollfun: processing fun 0 algo 0 took.*", + "frollfun: processing fun MEAN algo fast took.*", "frollfunR: processing.*took.*")) nn = c(1:4,2:3,1:4) test(6000.178, frollmean(x, nn, adaptive=TRUE), output=c( @@ -742,14 +742,14 @@ test(6000.178, frollmean(x, nn, adaptive=TRUE), output=c( "frollfunR: .*sequentially because adaptive.*", "frollfunR: 1:", "frolladaptivemeanFast: running for input length 10, hasnf 0, narm 0", - "frolladaptivefun: processing fun 0 algo 0 took.*", + "frolladaptivefun: processing fun MEAN algo fast took.*", "frollfunR: processing.*took.*")) test(6000.179, frollmean(x, nn, algo="exact", adaptive=TRUE), output=c( "frollfunR: allocating memory for results 1x1", "frollfunR: .*algo='exact' is already parallelised.*", "frollfunR: 1:", "frolladaptivemeanExact: running in parallel for input length 10, hasnf 0, narm 0", - "frolladaptivefun: processing fun 0 algo 1 took.*", + "frolladaptivefun: processing fun MEAN algo exact took.*", "frollfunR: processing.*took.*")) x[8] = NA @@ -759,7 +759,7 @@ test(6000.180, frollmean(x, n), output=c( "frollfunR: 1:", "frollmeanFast: running for input length 10, window 3, hasnf 0, narm 0", "frollmeanFast: non-finite values are present in input, re-running with extra care for NFs", - "frollfun: processing fun 0 algo 0 took.*", + "frollfun: processing fun MEAN algo fast took.*", "frollfunR: processing.*took.*")) test(6000.181, frollmean(x, n, algo="exact"), output=c( "frollfunR: allocating memory for results 1x1", @@ -767,7 +767,7 @@ test(6000.181, frollmean(x, n, algo="exact"), output=c( "frollfunR: 1:", "frollmeanExact: running in parallel for input length 10, window 3, hasnf 0, narm 0", "frollmeanExact: non-finite values are present in input, na.rm=FALSE and algo='exact' propagates NFs properply, no need to re-run", - "frollfun: processing fun 0 algo 1 took.*", + "frollfun: processing fun MEAN algo exact took.*", "frollfunR: processing.*took.*")) test(6000.182, frollmean(x, nn, adaptive=TRUE), output=c( "frollfunR: allocating memory for results 1x1", @@ -775,7 +775,7 @@ test(6000.182, frollmean(x, nn, adaptive=TRUE), output=c( "frollfunR: 1:", "frolladaptivemeanFast: running for input length 10, hasnf 0, narm 0", "frolladaptivemeanFast: non-finite values are present in input, re-running with extra care for NFs", - "frolladaptivefun: processing fun 0 algo 0 took.*", + "frolladaptivefun: processing fun MEAN algo fast took.*", "frollfunR: processing.*took.*")) test(6000.183, frollmean(x, nn, algo="exact", adaptive=TRUE), output=c( "frollfunR: allocating memory for results 1x1", @@ -783,7 +783,7 @@ test(6000.183, frollmean(x, nn, algo="exact", adaptive=TRUE), output=c( "frollfunR: 1:", "frolladaptivemeanExact: running in parallel for input length 10, hasnf 0, narm 0", "frolladaptivemeanExact: non-finite values are present in input, na.rm=FALSE and algo='exact' propagates NFs properply, no need to re-run", - "frolladaptivefun: processing fun 0 algo 1 took.*", + "frolladaptivefun: processing fun MEAN algo exact took.*", "frollfunR: processing.*took.*")) d = as.data.table(list(1:10/2, 10:1/4)) @@ -792,7 +792,7 @@ test(6000.184, frollmean(d[,1], 3, algo="exact"), output=c( "frollfunR: .*algo='exact' is already parallelised.*", "frollfunR: 1:", "frollmeanExact: running in parallel for input length 10, window 3, hasnf 0, narm 0", - "frollfun: processing fun 0 algo 1 took.*", + "frollfun: processing fun MEAN algo exact took.*", "frollfunR: processing.*took.*" )) test(6000.185, frollmean(d, 3:4, algo="exact"), output=c( @@ -800,16 +800,16 @@ test(6000.185, frollmean(d, 3:4, algo="exact"), output=c( "frollfunR: .*sequentially.*algo='exact'.*already parallelised.*", "frollfunR: 1:", "frollmeanExact: running in parallel for input length 10, window 3, hasnf 0, narm 0", - "frollfun: processing fun 0 algo 1 took.*", + "frollfun: processing fun MEAN algo exact took.*", "frollfunR: 2:", "frollmeanExact: running in parallel for input length 10, window 4, hasnf 0, narm 0", - "frollfun: processing fun 0 algo 1 took.*", + "frollfun: processing fun MEAN algo exact took.*", "frollfunR: 3:", "frollmeanExact: running in parallel for input length 10, window 3, hasnf 0, narm 0", - "frollfun: processing fun 0 algo 1 took.*", + "frollfun: processing fun MEAN algo exact took.*", "frollfunR: 4:", "frollmeanExact: running in parallel for input length 10, window 4, hasnf 0, narm 0", - "frollfun: processing fun 0 algo 1 took.*", + "frollfun: processing fun MEAN algo exact took.*", "frollfunR: processing.*took.*" )) options(datatable.verbose=FALSE) @@ -861,9 +861,9 @@ options(datatable.verbose=FALSE) ## frollmax adaptive options(datatable.verbose=TRUE) ## adaptive frollmax no fast algo -test(6000.3, frollmax(1:4, c(2,2,2,2), adaptive=TRUE), output="frolladaptivefun: algo 0 not implemented, fall back to 1") -test(6000.3001, frollmax(1:4, c(2,2,2,2), algo="fast", adaptive=TRUE), output="frolladaptivefun: algo 0 not implemented, fall back to 1") -test(6000.3002, frollmax(1:4, c(2,2,2,2), algo="exact", adaptive=TRUE), notOutput="frolladaptivefun: algo 0 not implemented, fall back to 1") +test(6000.3, frollmax(1:4, c(2,2,2,2), adaptive=TRUE), output="frolladaptivefun: algo fast not implemented, fall back to exact") +test(6000.3001, frollmax(1:4, c(2,2,2,2), algo="fast", adaptive=TRUE), output="frolladaptivefun: algo fast not implemented, fall back to exact") +test(6000.3002, frollmax(1:4, c(2,2,2,2), algo="exact", adaptive=TRUE), notOutput="frolladaptivefun: algo fast not implemented, fall back to exact") options(datatable.verbose=FALSE) n = c(3,2,2,4,2,1,4,8) x = c(7,2,3,6,3,2,6,6) # no NA @@ -1022,9 +1022,9 @@ test(6000.564, frollapply(FUN=mean, 1:3, list(c(0,-1,1)), adaptive=TRUE), error= ## frollmin adaptive options(datatable.verbose=TRUE) ## adaptive frollmin no fast algo -test(6000.6, frollmin(1:4, c(2,2,2,2), adaptive=TRUE), output="frolladaptivefun: algo 0 not implemented, fall back to 1") -test(6000.6001, frollmin(1:4, c(2,2,2,2), algo="fast", adaptive=TRUE), output="frolladaptivefun: algo 0 not implemented, fall back to 1") -test(6000.6002, frollmin(1:4, c(2,2,2,2), algo="exact", adaptive=TRUE), notOutput="frolladaptivefun: algo 0 not implemented, fall back to 1") +test(6000.6, frollmin(1:4, c(2,2,2,2), adaptive=TRUE), output="frolladaptivefun: algo fast not implemented, fall back to exact") +test(6000.6001, frollmin(1:4, c(2,2,2,2), algo="fast", adaptive=TRUE), output="frolladaptivefun: algo fast not implemented, fall back to exact") +test(6000.6002, frollmin(1:4, c(2,2,2,2), algo="exact", adaptive=TRUE), notOutput="frolladaptivefun: algo fast not implemented, fall back to exact") options(datatable.verbose=FALSE) n = c(3,2,2,4,2,1,4,8) x = c(7,2,3,6,3,2,6,6) # no NA @@ -1200,7 +1200,7 @@ test(6000.931, frollprod(1:3, 2), c(NA, 2, 6), output="frollprodFast: running fo test(6000.932, frollprod(1:3, 2, align="left"), c(2, 6, NA), output="frollfun: align") test(6000.933, frollprod(c(1,2,NA), 2), c(NA, 2, NA), output="non-finite values are present in input, re-running with extra care for NFs") test(6000.934, frollprod(c(NA,2,3), 2), c(NA, NA, 6), output="non-finite values are present in input, skip non-finite inaware attempt and run with extra care for NFs straighaway") -test(6000.935, frollprod(1:3, c(2,2,2), adaptive=TRUE), c(NA, 2, 6), output="algo 0 not implemented, fall back to 1") +test(6000.935, frollprod(1:3, c(2,2,2), adaptive=TRUE), c(NA, 2, 6), output="algo fast not implemented, fall back to exact") test(6000.936, frollprod(c(NA,2,3), c(2,2,2), adaptive=TRUE), c(NA, NA, 6), output="non-finite values are present in input, na.rm=FALSE and algo='exact' propagates NFs properply, no need to re-run") options(datatable.verbose=FALSE) # floating point overflow @@ -1433,7 +1433,7 @@ test(6001.715, frollvar(1:3, 0, algo="exact"), c(NA_real_,NA_real_,NA_real_), op test(6001.716, frollvar(c(1:2,NA), 0, algo="exact"), c(NA_real_,NA_real_,NA_real_)) test(6001.717, frollvar(c(1:2,NA), 0, algo="exact", na.rm=TRUE), c(NA_real_,NA_real_,NA_real_)) test(6001.718, frollvar(c(1:2,NA), 2), c(NA,0.5,NA), options=c("datatable.verbose"=TRUE), output="redirecting to frollvarExact") -test(6001.721, frollvar(adaptive=TRUE, 1:3, c(2,0,2)), c(NA,NA,0.5), options=c("datatable.verbose"=TRUE), output="not implemented, fall back to") +test(6001.721, frollvar(adaptive=TRUE, 1:3, c(2,0,2)), c(NA,NA,0.5), options=c("datatable.verbose"=TRUE), output="algo fast not implemented, fall back to exact") test(6001.722, frollvar(adaptive=TRUE, 1:3, c(2,0,2), fill=99), c(99,NA,0.5)) test(6001.723, frollvar(adaptive=TRUE, c(1:2,NA), c(2,0,2)), c(NA_real_,NA_real_,NA_real_)) test(6001.724, frollvar(adaptive=TRUE, c(1:2,NA), c(2,0,2), na.rm=TRUE), c(NA_real_,NA_real_,NA_real_)) @@ -1492,7 +1492,7 @@ test(6001.8194, frollsd(c(NA,2:3), 2, has.nf=FALSE), c(NA,NA,sqrt(0.5)), warning test(6001.8195, frollsd(c(NA,2:3), 2), c(NA,NA,sqrt(0.5)), options=c("datatable.verbose"=TRUE), output="skip non-finite inaware attempt and run with extra care") test(6001.8196, frollsd(c(NA,2:3), 2, has.nf=FALSE, algo="exact"), c(NA,NA,sqrt(0.5)), warning="used but non-finite values are present in input") test(6001.8197, frollsd(c(NA,2:3), 2, algo="exact", na.rm=TRUE), c(NA,NA,sqrt(0.5)), options=c("datatable.verbose"=TRUE), output="re-running with extra care for NF") -test(6001.8201, frollsd(adaptive=TRUE, 1:3, c(2,2,2)), c(NA,sqrt(0.5),sqrt(0.5)), options=c("datatable.verbose"=TRUE), output="frolladaptivefun: algo 0 not implemented, fall back to 1") +test(6001.8201, frollsd(adaptive=TRUE, 1:3, c(2,2,2)), c(NA,sqrt(0.5),sqrt(0.5)), options=c("datatable.verbose"=TRUE), output="frolladaptivefun: algo fast not implemented, fall back to exact") test(6001.8202, frollsd(adaptive=TRUE, 1:3, c(2,2,2)), c(NA,sqrt(0.5),sqrt(0.5)), options=c("datatable.verbose"=TRUE), output="frolladaptivesdExact: calling sqrt(frolladaptivevarExact(...))") test(6001.821, frollsd(adaptive=TRUE, 1:3, c(2,0,2)), c(NA,NA,sqrt(0.5))) test(6001.822, frollsd(adaptive=TRUE, 1:3, c(2,0,2), fill=99), c(99,NA,sqrt(0.5))) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 1b5ea162ae..7083636e56 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -8059,20 +8059,22 @@ test(1577.3, levels(X$b), character(0)) # FR #530, skip blank lines input = "Header not 2 columns\n\n1,3\n2,4" -test(1578.1, fread(input), data.table(V1=1:2, V2=3:4)) +test(1578.01, fread(input), data.table(V1=1:2, V2=3:4)) input = "a,b\n\n1,3\n2,4" -test(1578.2, fread(input), data.table(V1=1:2, V2=3:4)) # the block of 2x2 dominates the one line with sep in auto-removed header section -test(1578.3, fread(input, blank.lines.skip=TRUE), data.table( a=1:2, b=3:4)) +test(1578.02, fread(input), data.table(V1=1:2, V2=3:4)) # the block of 2x2 dominates the one line with sep in auto-removed header section +test(1578.03, fread(input, blank.lines.skip=TRUE), data.table( a=1:2, b=3:4)) input = "a,b\n\n\n1,3\n2,4" -test(1578.4, fread(input, blank.lines.skip=TRUE), data.table( a=1:2, b=3:4)) +test(1578.04, fread(input, blank.lines.skip=TRUE), data.table( a=1:2, b=3:4)) input = "a,b\n\n\n1,3\n\n2,4\n\n" -test(1578.5, fread(input, blank.lines.skip=TRUE), data.table( a=1:2, b=3:4)) +test(1578.05, fread(input, blank.lines.skip=TRUE), data.table( a=1:2, b=3:4)) f = testDir("530_fread.txt") -test(1578.6, fread(f, skip=47L, verbose=TRUE), data.table(V1=1:2, V2=3:4), output="Positioned on line 48 starting: <>") -test(1578.7, fread(f, skip=49L), data.table(V1=1:2, V2=3:4)) -test(1578.8, fread(f, skip=47L, blank.lines.skip=TRUE), data.table(a=1:2, b=3:4)) -test(1578.9, fread(f, skip=48L), data.table(V1=1:2, V2=3:4)) # start on blank line 49 and skip="auto" to first data row on line 50 +test(1578.06, fread(f, skip=47L, verbose=TRUE), data.table(V1=1:2, V2=3:4), output="Positioned on line 48 starting: <>") +test(1578.07, fread(f, skip=49L), data.table(V1=1:2, V2=3:4)) +test(1578.08, fread(f, skip=47L, blank.lines.skip=TRUE), data.table(a=1:2, b=3:4)) +test(1578.09, fread(f, skip=48L), data.table(V1=1:2, V2=3:4)) # start on blank line 49 and skip="auto" to first data row on line 50 +input = "x y\n\n1 a\n\n2 b\n\n3 c" +test(1578.10, fread(input), data.table(V1=3L, V2="c"), warning="The rows in this file appear to be separated by blank lines. This resulted in most rows being skipped. If this was not the intended outcome, please consider setting 'blank.lines.skip' to TRUE.") # test 1579 moved to optimize.Rraw @@ -18419,7 +18421,7 @@ test(2264.8, print(DT, show.indices=TRUE), output=ans) if (test_bit64) local({ DT = data.table(a = 'abc', b = as.integer64(1)) suppressPackageStartupMessages(unloadNamespace("bit64")) - on.exit(suppressPackageStartupMessages(library(bit64, pos="package:base"))) + on.exit(suppressWarnings(suppressPackageStartupMessages(library(bit64, pos="package:base")))) test(2265, DT, output="abc\\s*1$") }) @@ -21591,3 +21593,23 @@ x = fread("x\n-1-01-01")$x test(2368.1, year(x), -1L) test(2368.2, month(x), 1L) test(2368.3, mday(x), 1L) + +# yearqtr() and yearmon() could optionally output 2025Q4 format #7694 +x = c("1111-11-11", "2019-01-01", "2019-02-28", "2019-03-01", "2019-12-31", "2020-02-29", "2020-03-01", "2020-12-31", "2040-01-01", "2040-12-31", "2100-03-01", NA) +test(2369.1, yearqtr(x, format="numeric"), c(1111.75, 2019, 2019, 2019, 2019.75, 2020, 2020, 2020.75, 2040, 2040.75, 2100, NA)) +test(2369.2, yearqtr(x, format="numeric"), yearqtr(x)) # numeric is the default, preserves backwards compatibility +test(2369.3, yearqtr(x, format="character"), c("1111Q4", "2019Q1", "2019Q1", "2019Q1", "2019Q4", "2020Q1", "2020Q1", "2020Q4", "2040Q1", "2040Q4", "2100Q1", NA_character_)) +test(2369.4, yearqtr("2016-08-03 01:02:03.45", format="character"), "2016Q3") +test(2369.5, yearqtr(NA, format="character"), NA_character_) + +test(2370.1, yearmon(x, format="numeric"), c(1111+10/12, 2019, 2019+1/12, 2019+2/12, 2019+11/12, 2020+1/12, 2020+2/12, 2020+11/12, 2040, 2040+11/12, 2100+2/12, NA)) +test(2370.2, yearmon(x, format="numeric"), yearmon(x)) # numeric is the default, preserves backwards compatibility +test(2370.3, yearmon(x, format="character"), c("1111M11", "2019M01", "2019M02", "2019M03", "2019M12", "2020M02", "2020M03", "2020M12", "2040M01", "2040M12", "2100M03", NA_character_)) +test(2370.4, yearmon("2016-08-03 01:02:03.45", format="character"), "2016M08") +test(2370.5, yearmon(NA, format="character"), NA_character_) + +# multiple expected/observed warnings in test() are printed on aligned lines, #7092 +test(2371.1, test(0, {warning("a"); 2L}, 2L, warning=c("a", "b")), FALSE, + output="Test 0 produced 1 warnings but expected 2\nExpected: a\n b\nObserved: a") +test(2372.2, test(0, {warning("a"); warning("b"); 2L}, 2L, warning="a"), FALSE, + output="Test 0 produced 2 warnings but expected 1\nExpected: a\nObserved: a\n b") diff --git a/man/IDateTime.Rd b/man/IDateTime.Rd index cf762337e9..109db4ed35 100644 --- a/man/IDateTime.Rd +++ b/man/IDateTime.Rd @@ -97,8 +97,8 @@ isoyear(x) month(x) quarter(x) year(x) -yearmon(x) -yearqtr(x) +yearmon(x, format = c("numeric", "character")) +yearqtr(x, format = c("numeric", "character")) } @@ -115,6 +115,7 @@ yearqtr(x) the S3 generic.} \item{units}{one of the units listed for truncating. May be abbreviated.} \item{ms}{ For \code{as.ITime} methods, what should be done with sub-second fractions of input? Valid values are \code{'truncate'} (floor), \code{'nearest'} (round), and \code{'ceil'} (ceiling). See Details. } + \item{format}{For \code{yearmon} and \code{yearqtr}, either \code{"numeric"} (default) or \code{"character"}. \code{"character"} formats the result as \code{"2025M04"} for \code{yearmon} and \code{"2025Q2"} for \code{yearqtr}.} } \details{ \code{IDate} is a date class derived from \code{Date}. It has the same @@ -209,7 +210,11 @@ Similarly, \code{isoyear()} returns the ISO 8601 year corresponding to the ISO w for second, minute, hour, day of year, day of week, day of month, week, month, quarter, and year, respectively. \code{yearmon} and \code{yearqtr} return double values representing - respectively \code{year + (month-1) / 12} and \code{year + (quarter-1) / 4}. + respectively \code{year + (month-1) / 12} and \code{year + (quarter-1) / 4} + when \code{format = "numeric"} (the default). When \code{format = "character"}, + they return character vectors of the form \code{"YYYYM##"} (e.g. \code{"2025M04"}, + zero-padded for sortability) and \code{"YYYYQN"} (e.g. \code{"2025Q2"}) respectively, + with \code{NA} input returned as \code{NA_character_}. \code{second}, \code{minute}, \code{hour} are taken directly from the \code{POSIXlt} representation. @@ -296,6 +301,11 @@ year(d2) isoweek(d2) isoyear(d2) +# Character format for yearmon() and yearqtr() +d3 = as.IDate(c("2019-01-01", "2019-12-31")) +yearmon(d3, format = "character") # "2019M01" "2019M12" +yearqtr(d3, format = "character") # "2019Q1" "2019Q4" + } \keyword{utilities} diff --git a/src/data.table.h b/src/data.table.h index c561b17328..3ebd30791f 100644 --- a/src/data.table.h +++ b/src/data.table.h @@ -259,6 +259,7 @@ typedef enum { // adding rolling functions here and in frollfunR in frollR.c VAR = 6, SD = 7 } rollfun_t; +extern const char *const rfunNames[]; // Array of roll function names defined in froll.c // froll.c void frollfun(rollfun_t rfun, unsigned int algo, const double *x, uint64_t nx, ans_t *ans, int k, int align, double fill, bool narm, int hasnf, bool verbose, bool par); void frollmeanFast(const double *x, uint64_t nx, ans_t *ans, int k, double fill, bool narm, int hasnf, bool verbose); diff --git a/src/fread.c b/src/fread.c index 70125ec323..a017c18f33 100644 --- a/src/fread.c +++ b/src/fread.c @@ -1847,6 +1847,7 @@ int freadMain(freadMainArgs _args) int topNumFields = 1; // how many fields that was, to resolve ties enum quote_rule_t topQuoteRule = -1; // which quote rule that was int topSkip = 0; // how many rows to auto-skip + // #7707 'topSkip' accumulates as blank lines are encountered; can be used to differentiate between a file where the header and data are separated by a blank line and a file where block(s) of lines or each line is separated by a blank line const char *topStart = NULL; for (quoteRule = quote ? QUOTE_RULE_EMBEDDED_QUOTES_DOUBLED : QUOTE_RULE_IGNORE_QUOTES; quoteRule < QUOTE_RULE_COUNT; quoteRule++) { // #loop_counter_not_local_scope_ok @@ -1950,6 +1951,10 @@ int freadMain(freadMainArgs _args) } } } + if (!prevStart && topSkip > 1 && !skipEmptyLines) + { + DTWARN(_("The rows in this file appear to be separated by blank lines. This resulted in most rows being skipped. If this was not the intended outcome, please consider setting 'blank.lines.skip' to TRUE.\n")); + } if (!firstJumpEnd) { if (verbose) DTPRINT(_(" No sep and quote rule found a block of 2x2 or greater. Single column input.\n")); topNumFields = 1; diff --git a/src/froll.c b/src/froll.c index f8315c3ec4..97b37a08d8 100644 --- a/src/froll.c +++ b/src/froll.c @@ -1,5 +1,7 @@ #include "data.table.h" +const char *const rfunNames[] = { + "MEAN", "SUM", "MAX", "MIN", "PROD", "MEDIAN", "VAR", "SD"}; //constant definition of froll functions /* OpenMP is used here to parallelize the loops in most of the implementations of rolling functions. @@ -104,7 +106,7 @@ void frollfun(rollfun_t rfun, unsigned int algo, const double *x, uint64_t nx, a } } if (verbose) - snprintf(end(ans->message[0]), 500, _("%s: processing fun %d algo %u took %.3fs\n"), __func__, rfun, algo, omp_get_wtime()-tic); + snprintf(end(ans->message[0]), 500, _("%s: processing fun %s algo %s took %.3fs\n"), __func__, rfunNames[rfun], (algo == 0) ? "fast" : "exact", omp_get_wtime()-tic); } #undef SUM_WINDOW_STEP_FRONT diff --git a/src/frolladaptive.c b/src/frolladaptive.c index 41faf1f30c..edcd205ef8 100644 --- a/src/frolladaptive.c +++ b/src/frolladaptive.c @@ -29,42 +29,50 @@ void frolladaptivefun(rollfun_t rfun, unsigned int algo, const double *x, uint64 case MAX : if (algo==0 && verbose) { //frolladaptivemaxFast(x, nx, ans, k, fill, narm, hasnf, verbose); // frolladaptivemaxFast does not exists as of now - snprintf(end(ans->message[0]), 500, _("%s: algo %u not implemented, fall back to %u\n"), __func__, algo, (unsigned int) 1); + snprintf(end(ans->message[0]), 500, _("%s: algo fast not implemented, fall back to exact\n"), __func__); + //set algo to 1 to print exact instead of fast in snprinf "fun %s" (Last snprintf in this function) + algo = 1; } frolladaptivemaxExact(x, nx, ans, k, fill, narm, hasnf, verbose); break; case MIN : if (algo==0 && verbose) { //frolladaptiveminFast(x, nx, ans, k, fill, narm, hasnf, verbose); // frolladaptiveminFast does not exists as of now - snprintf(end(ans->message[0]), 500, _("%s: algo %u not implemented, fall back to %u\n"), __func__, algo, (unsigned int) 1); + snprintf(end(ans->message[0]), 500, _("%s: algo fast not implemented, fall back to exact\n"), __func__); + //set algo to 1 to print exact instead of fast in snprinf "fun %s" (Last snprintf in this function) + algo = 1; } frolladaptiveminExact(x, nx, ans, k, fill, narm, hasnf, verbose); break; case PROD : if (algo==0 && verbose) { //frolladaptiveprodFast(x, nx, ans, k, fill, narm, hasnf, verbose); // frolladaptiveprodFast does not exists as of now - snprintf(end(ans->message[0]), 500, _("%s: algo %u not implemented, fall back to %u\n"), __func__, algo, (unsigned int) 1); + snprintf(end(ans->message[0]), 500, _("%s: algo fast not implemented, fall back to exact\n"), __func__); + algo = 1; } frolladaptiveprodExact(x, nx, ans, k, fill, narm, hasnf, verbose); break; case MEDIAN : if (algo==0 && verbose) { //frolladaptivemedianFast(x, nx, ans, k, fill, narm, hasnf, verbose); // frolladaptivemedianFast does not exists as of now - snprintf(end(ans->message[0]), 500, _("%s: algo %u not implemented, fall back to %u\n"), __func__, algo, (unsigned int) 1); + snprintf(end(ans->message[0]), 500, _("%s: algo fast not implemented, fall back to exact\n"), __func__); + algo = 1; } frolladaptivemedianExact(x, nx, ans, k, fill, narm, hasnf, verbose); break; case VAR : if (algo==0 && verbose) { //frolladaptivevarFast(x, nx, ans, k, fill, narm, hasnf, verbose); // frolladaptivevarFast does not exists as of now - snprintf(end(ans->message[0]), 500, _("%s: algo %u not implemented, fall back to %u\n"), __func__, algo, (unsigned int) 1); + snprintf(end(ans->message[0]), 500, _("%s: algo fast not implemented, fall back to exact\n"), __func__); + algo = 1; } frolladaptivevarExact(x, nx, ans, k, fill, narm, hasnf, verbose); break; case SD : if (algo==0 && verbose) { //frolladaptivesdFast(x, nx, ans, k, fill, narm, hasnf, verbose); // frolladaptivesdFast does not exists as of now - snprintf(end(ans->message[0]), 500, _("%s: algo %u not implemented, fall back to %u\n"), __func__, algo, (unsigned int) 1); + snprintf(end(ans->message[0]), 500, _("%s: algo fast not implemented, fall back to exact\n"), __func__); + algo = 1; } frolladaptivesdExact(x, nx, ans, k, fill, narm, hasnf, verbose); break; @@ -72,7 +80,7 @@ void frolladaptivefun(rollfun_t rfun, unsigned int algo, const double *x, uint64 internal_error(__func__, "Unknown rfun value in frolladaptive: %d", rfun); // # nocov } if (verbose) - snprintf(end(ans->message[0]), 500, _("%s: processing fun %d algo %u took %.3fs\n"), __func__, rfun, algo, omp_get_wtime()-tic); + snprintf(end(ans->message[0]), 500, _("%s: processing fun %s algo %s took %.3fs\n"), __func__, rfunNames[rfun], (algo == 0) ? "fast" : "exact", omp_get_wtime()-tic); } #undef MEAN_WINDOW_STEP_VALUE