From 6ffa4e2e425d646f287770b7a20ed776ab7125c5 Mon Sep 17 00:00:00 2001 From: abigailkeller Date: Fri, 1 May 2026 11:11:51 +0000 Subject: [PATCH] working on generic disc function --- R/GenericDiscrFunction.R | 91 ++++++++++++ R/registeredDiscrepancies.R | 119 ++++++++++++++++ tests/testthat/test-GenericDiscrFunction.R | 158 +++++++++++++++++++++ 3 files changed, 368 insertions(+) create mode 100644 R/GenericDiscrFunction.R create mode 100644 R/registeredDiscrepancies.R create mode 100644 tests/testthat/test-GenericDiscrFunction.R diff --git a/R/GenericDiscrFunction.R b/R/GenericDiscrFunction.R new file mode 100644 index 0000000..8db1660 --- /dev/null +++ b/R/GenericDiscrFunction.R @@ -0,0 +1,91 @@ +calcDiscrepancies <- nimbleFunction( + setup = function(control) { + + # get model + model <- control$model + + # get dimensions and data + nDisc <- length(control$discType) + discTypes <- control$discType + + # get node names + dataNodes <- control$dataNodes + paramNodes <- control$paramNodes + expectedNodes <- control$expectedNodes + + # get parameter and data dependencies + paramDependencies <- model$getDependencies(paramNodes) + dataDependencies <- model$getDependencies(dataNodes) + + # get node indices + paramNodeIndices <- match(paramNodes, colnames(MCMCSamples)) + + # get nodes to simulate + simNodes <- model$getDependencies(paramNodes, self = FALSE) + simNodes <- model$topologicallySortNodes(simNodes) + + # create empty disc function list + discrepancyFunctionList <- nimbleFunctionList(discrepancyFunction_BASE) + + # use look-up table to add disc functions + for (i in seq_along(discTypes)) { + + if (discTypes[i] %in% names(discLookup)) { + # grab the function from the lookup and initialize it + discrepancyFunctionList[[i]] <- discLookup[[discTypes[i]]](model, + control) + } else { + stop(paste0("discType '", discTypes[i], + "' is invalid. Must be: mean, variance, deviance, ", + "chisquared, or freemantukey")) + } + + } + + }, + run = function(MCMCSamples = double(2)) { + + origDataValues <- values(model, dataNodes) + origParamValues <- values(model, paramNodes) + nSamples <- dim(MCMCSamples)[1] + + ## results[j, i, k]: + ## j = discrepancy index, + ## i = MCMC iteration, + ## k = 1 for observed or 2 for simulated + results <- array(dim = c(nDisc, nSamples, 2) ) + + for (i in 1:nSamples) { + ## put MCMC values from sampled iteration in the model + values(model, paramNodes) <<- MCMCSamples[i, paramNodeIndices] + + ## calculate + model$calculate(paramDependencies) + + ## calculate observed discrepancies + for (j in 1:nDisc) { + obsDiscrepancy <- discrepancyFunctionList[[j]]$run() + results[j, i, 1] <- obsDiscrepancy + } + + ## simulate from posterior predictive + model$simulate(simNodes, includeData = TRUE) + model$calculate(dataDependencies) + + for (j in 1:nDisc) { + simDiscrepancy <- discrepancyFunctionList[[j]]$run() + results[j, i, 2] <- simDiscrepancy + } + + ## Careful here with latent states! + ## Return the model to original state (i.e. state upon entry to this function) + values(model, dataNodes) <<- origDataValues + model$calculate(dataDependencies) + } + values(model, paramNodes) <<- origParamValues + model$calculate(paramDependencies) + + returnType(double(3)) + return(results) + } +) diff --git a/R/registeredDiscrepancies.R b/R/registeredDiscrepancies.R new file mode 100644 index 0000000..2b9a505 --- /dev/null +++ b/R/registeredDiscrepancies.R @@ -0,0 +1,119 @@ +##-------------------------------------------------------## +## This file may contain some already implemented discrepancies +##-------------------------------------------------------## + +## discrepancy base class +discrepancyFunction_BASE <- nimbleFunctionVirtual( + run = function() returnType(double()) +) + +# mean discrepancy function +MeanDiscFunction <- nimbleFunction( + contains = discrepancyFunction_BASE, + setup = function(model, control) { + + data <- control$dataNodes + + }, + run = function() { + + disc <- mean(values(model, data)) + + returnType(double(0)) + return(disc) + } +) + +# variance discrepancy function +VarDiscFunction <- nimbleFunction( + contains = discrepancyFunction_BASE, + setup = function(model, control) { + + data <- control$dataNodes + + }, + run = function() { + + disc <- var(values(model, data)) + + returnType(double(0)) + return(disc) + } +) + +# deviance discrepancy function +DevianceDiscFunction <- nimbleFunction( + contains = discrepancyFunction_BASE, + setup = function(model, control) { + + data <- control$dataNodes + + }, + run = function() { + + disc <- 2 * model$calculate(data) + + returnType(double(0)) + return(disc) + } +) + +# chi-squared discrepancy function +ChiSqDiscFunction <- nimbleFunction( + contains = discrepancyFunction_BASE, + setup = function(model, control) { + + dataNodes <- control$dataNodes + expectedNodes <- control$expectedNodes + + }, + run = function() { + + data_val <- values(model, dataNodes) + + ## AK: check that this works for multiple expected nodes + data_exp <- values(model, expectedNodes) + + disc <- sum( + (data_val - data_exp) ^ 2 / + (data_exp + 1e-6) + ) + + returnType(double(0)) + return(disc) + } +) + +# Freeman-Tukey discrepancy function +FTukeyFunction <- nimbleFunction( + contains = discrepancyFunction_BASE, + setup = function(model, control) { + + dataNodes <- control$dataNodes + expectedNodes <- control$expectedNodes + + }, + run = function() { + + data_val <- values(model, dataNodes) + + ## AK: check that this works for multiple expected nodes + data_exp <- values(model, expectedNodes) + + disc <- sum( + (sqrt(data_val) - sqrt(data_exp)) ^ 2 + ) + + returnType(double(0)) + return(disc) + } +) + +# discrepancy look-up map +discLookup <- list( + mean = MeanDiscFunction, + variance = VarDiscFunction, + deviance = DevianceDiscFunction, + chisquared = ChiSqDiscFunction, + freemantukey = FTukeyFunction +) diff --git a/tests/testthat/test-GenericDiscrFunction.R b/tests/testthat/test-GenericDiscrFunction.R new file mode 100644 index 0000000..657a250 --- /dev/null +++ b/tests/testthat/test-GenericDiscrFunction.R @@ -0,0 +1,158 @@ + +######################################## +# create sample model and MCMC samples # +######################################## + +# occupancy model +occu_model <- nimbleCode({ + + psi ~ dbeta(1, 1) + p ~ dbeta(1, 1) + + for (i in 1:nSites) { + + z[i] ~ dbern(psi) + + # observed data + y[i] ~ dbinom(z[i] * p, nVisits) + + # expected data + y_exp[i] <- z[i] * p * nVisits + + } + +}) + +# function for simulating data +simulate_occu <- function(params, nSites, nVisits) { + + y <- rep(NA, nSites) + + z <- rbinom(nSites, 1, params$psi) + + for (i in 1:nSites) { + + y[i] <- rbinom(1, nVisits, z[i] * params$p) + + } + + return(y) +} + +# model components +nVisits <- 6 +nSites <- 20 +params <- list(p = 0.3, psi = 0.4) +constants <- list(nSites = nSites, nVisits = nVisits) +observedData <- simulate_occu(params = list(p = 0.3, psi = 0.4), + nSites = 20, nVisits = 6) + +# uncompiled model +model_uncompiled <- nimbleModel(occu_model, constants = constants, + data = list(y = observedData)) + +# compiled model +model <- compileNimble(model_uncompiled) + +# get data, param, and expected nodes +dataNames <- "y" +dataNodes <- model$expandNodeNames(dataNames) + +paramNames <- c("p", "psi", "z") +paramNodes <- model$expandNodeNames(paramNames) + +expectedNames <- "y_exp" +expectedNodes <- model$expandNodeNames(expectedNames) + +# get MCMC samples +# MCMCcontrolMain = list(niter = 5000, nburnin = 1000, thin = 1) +MCMCcontrolMain = list(niter = 500, nburnin = 100, thin = 1) + +mcmcConfFun <- function(model) { + configureMCMC(model, monitors = paramNodes, print = FALSE) +} +mcmcConf <- mcmcConfFun(model) +mcmcUncompiled <- buildMCMC(mcmcConf) +cmcmc <- compileNimble(mcmcUncompiled, project = model, + resetFunctions = TRUE) + +obsMCMC <- runMCMC( + cmcmc, + niter = MCMCcontrolMain$niter, + nburnin = MCMCcontrolMain$nburnin, + thin = MCMCcontrolMain$thin +) +MCMCSamples <- as.matrix(obsMCMC) + +# test discrepancy calculation output +test_that("GenericDiscrFunction works for basic cases", { + + # get original data and param values + original_data <- values(model, dataNodes) + original_param <- values(model, paramNodes) + + # choose discType + discType <- c("mean", "variance", "deviance", "chisquared", "freemantukey") + + # disc control + discControl = list( + model = model_uncompiled, + dataNames = dataNames, + dataNodes = dataNodes, + paramNames = paramNames, + paramNodes = paramNodes, + expectedNames = expectedNames, + expectedNodes = expectedNodes, + discType = discType + ) + + # create instance of discrepancy function + calc_disc <- calcDiscrepancies(discControl) + + # compile + Ccalc_disc <- compileNimble(calc_disc, project = model_uncompiled) + + # run + out <- Ccalc_disc$run(MCMCSamples) + + # test dimensions of output + expect_equal(dim(out), c(5, 400, 2)) + + # test that no values are NA + expect_true(all(!is.na(out))) + + # test that original data values have been returned to model + expect_equal(model$y, original_data) + expect_equal(model_uncompiled$y, original_data) + + # test that original parameter values have been returned to model + expect_equal(values(model, paramNames), original_param) + +}) + +# test error trapping +test_that("GenericDiscrFunction error trap works", { + + # choose discType + discType <- c("mean", "deviance", "chisq") + + # disc control + discControl = list( + model = model_uncompiled, + dataNames = dataNames, + dataNodes = dataNodes, + paramNames = paramNames, + paramNodes = paramNodes, + expectedNames = expectedNames, + expectedNodes = expectedNodes, + discType = discType + ) + + # create instance of discrepancy function + expect_error( + calcDiscrepancies(discControl), + paste0("discType 'chisq' is invalid. Must be: mean, variance, deviance, ", + "chisquared, or freemantukey") + ) + +})