Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

232 functions used in formula= argument in defdata not available for functions defined out of global namespace #233

7 changes: 5 additions & 2 deletions CITATION.cff
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ message: 'To cite package "simstudy" in publications use:'
type: software
license: GPL-3.0-only
title: 'simstudy: Simulation of Study Data'
version: 0.7.1.9000
version: 0.8.0.9000
doi: 10.21105/joss.02763
abstract: Simulates data sets in order to explore modeling techniques or better understand
data generating processes. The user specifies a set of relationships between covariates,
Expand Down Expand Up @@ -49,7 +49,7 @@ preferred-citation:
repository: https://CRAN.R-project.org/package=simstudy
repository-code: https://github.com/kgoldfeld/simstudy
url: https://kgoldfeld.github.io/simstudy/
date-released: '2024-05-10'
date-released: '2024-07-19'
contact:
- family-names: Goldfeld
given-names: Keith
Expand Down Expand Up @@ -170,6 +170,9 @@ references:
given-names: Michel
email: [email protected]
orcid: https://orcid.org/0000-0001-9754-0393
- family-names: Murdoch
given-names: Duncan
email: [email protected]
- name: R Core Team
year: '2024'
- type: software
Expand Down
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ Type: Package
Package: simstudy
Title: Simulation of Study Data
Version: 0.8.0.9000
Date: 2024-05-14
Date: 2024-07-19
Authors@R:
c(person(given = "Keith",
family = "Goldfeld",
Expand Down
1 change: 1 addition & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

## Minor fix
* `addCorGen` no longer requires all clusters to have the same size when using the *rho* and *corstr* arguments to define the correlation.
* Fixed an issue that prevented functions defined outside the global namespace from being referenced in `defData`.

# simstudy 0.8.0

Expand Down
11 changes: 6 additions & 5 deletions R/generate_data.R
Original file line number Diff line number Diff line change
Expand Up @@ -960,6 +960,7 @@ genSpline <- function(dt, newvar, predictor, theta,
#' @param keepEvents Indicator to retain original "events" columns. Defaults
#' to FALSE.
#' @param idName Name of id field in existing data set.
#' @param envir Optional environment, defaults to current calling environment.
#' @return Original data table with survival time
#' @examples
#' # Baseline data definitions
Expand Down Expand Up @@ -996,7 +997,7 @@ genSpline <- function(dt, newvar, predictor, theta,
#
genSurv <- function(dtName, survDefs, digits = 3,
timeName = NULL, censorName = NULL, eventName = "event",
typeName = "type", keepEvents = FALSE, idName = "id") {
typeName = "type", keepEvents = FALSE, idName = "id", envir = parent.frame()) {

# For double-dot notation

Expand Down Expand Up @@ -1042,13 +1043,13 @@ genSurv <- function(dtName, survDefs, digits = 3,
subDef <- survDefs[varname == events[i]]

formshape <- subDef[1, shape]
shape <- as.vector(.evalWith(formshape, .parseDotVars(formshape, envir = parent.frame()), dtSurv))
shape <- as.vector(.evalWith(formshape, .parseDotVars(formshape, envir = parent.frame()), dtSurv, envir = envir))

formscale <- subDef[1, scale]
scale <- as.vector(.evalWith(formscale, .parseDotVars(formscale, envir = parent.frame()), dtSurv))
scale <- as.vector(.evalWith(formscale, .parseDotVars(formscale, envir = parent.frame()), dtSurv, envir = envir))

formulas <- subDef[, formula]
form1 <- as.vector(.evalWith(formulas[1], .parseDotVars(formulas[1], envir = parent.frame()), dtSurv))
form1 <- as.vector(.evalWith(formulas[1], .parseDotVars(formulas[1], envir = parent.frame()), dtSurv, envir = envir))

if (nrow(subDef) > 1) {

Expand All @@ -1063,7 +1064,7 @@ genSurv <- function(dtName, survDefs, digits = 3,
transition <- subDef[2, transition]
t_adj <- transition ^ (1/shape)

form2 <- as.vector(.evalWith(formulas[2], .parseDotVars(formulas[2], envir = parent.frame()), dtSurv))
form2 <- as.vector(.evalWith(formulas[2], .parseDotVars(formulas[2], envir = parent.frame()), dtSurv, envir = envir))

threshold <- exp(form1) * t_adj
period <- 1*(nlogu < threshold) + 2*(nlogu >= threshold)
Expand Down
49 changes: 28 additions & 21 deletions R/generate_dist.R
Original file line number Diff line number Diff line change
Expand Up @@ -216,7 +216,7 @@

.getBetaMean <- function(dtSim, formula, link, n = nrow(dtSim),
envir = parent.frame()) {
mean <- .evalWith(formula, .parseDotVars(formula, envir), dtSim, n)
mean <- .evalWith(formula, .parseDotVars(formula, envir), dtSim, n, envir)
if (link == "logit") {
mean <- 1 / (1 + exp(-mean))
}
Expand All @@ -228,7 +228,7 @@
.genbeta <- function(n, formula, precision, link = "identity", dtSim, envir) {
mean <- .getBetaMean(dtSim, formula, link, n, envir)

d <- .evalWith(precision, .parseDotVars(precision, envir), dtSim, n)
d <- .evalWith(precision, .parseDotVars(precision, envir), dtSim, n, envir)

sr <- betaGetShapes(mean = mean, precision = d)
new <- stats::rbeta(n, shape = sr$shape1, shape2 = sr$shape2)
Expand All @@ -250,8 +250,8 @@
link,
n = nrow(dtSim),
envir = parent.frame()) {
size <- .evalWith(size, .parseDotVars(size, envir), dtSim, n)
p <- .evalWith(formula, .parseDotVars(formula, envir), dtSim, n)
size <- .evalWith(size, .parseDotVars(size, envir), dtSim, n, envir)
p <- .evalWith(formula, .parseDotVars(formula, envir), dtSim, n, envir)

if (link == "log") {
p <- exp(p)
Expand Down Expand Up @@ -300,7 +300,7 @@
}

parsedProbs <-
.evalWith(formulas, .parseDotVars(formulas, envir), dtSim, n)
.evalWith(formulas, .parseDotVars(formulas, envir), dtSim, n, envir)

if (link == "logit") {
parsedProbs <- exp(parsedProbs)
Expand Down Expand Up @@ -340,7 +340,7 @@

.gendeterm <- function(n, formula, link, dtSim, envir) {

new <- .evalWith(formula, .parseDotVars(formula, envir), dtSim, n)
new <- .evalWith(formula, .parseDotVars(formula, envir), dtSim, n, envir)

if (link == "log") {
new <- exp(new)
Expand All @@ -365,7 +365,7 @@

var_vec <- unlist(lapply(arg_l, function(a) a[[1]]))
arg_list <- lapply(arg_l,
function(a) .evalWith(a[[2]], .parseDotVars( a[[2]], envir ), dtSim, n)
function(a) .evalWith(a[[2]], .parseDotVars( a[[2]], envir ), dtSim, n, envir)
)
names(arg_list) <- var_vec
assertNotInVector("n", names(arg_list))
Expand All @@ -381,7 +381,7 @@
# @param formula String that specifies the mean (lambda)
# @return A data.frame column with the updated simulated data
.genexp <- function(n, formula, link = "identity", dtSim, envir) {
mean <- .evalWith(formula, .parseDotVars(formula, envir), dtSim, n)
mean <- .evalWith(formula, .parseDotVars(formula, envir), dtSim, n, envir)
if (link == "log") {
mean <- exp(mean)
}
Expand All @@ -398,7 +398,7 @@
# @return A data.frame column with the updated simulated data

.getGammaMean <- function(dtSim, formula, link, n = nrow(dtSim), envir) {
mean <- .evalWith(formula, .parseDotVars(formula, envir), dtSim, n)
mean <- .evalWith(formula, .parseDotVars(formula, envir), dtSim, n, envir)
if (link == "log") {
mean <- exp(mean)
}
Expand All @@ -408,7 +408,7 @@

.gengamma <- function(n, formula, dispersion, link = "identity", dtSim, envir) {
mean <- .getGammaMean(dtSim, formula, link, n, envir)
d <- .evalWith(dispersion, .parseDotVars(dispersion, envir), dtSim, n)
d <- .evalWith(dispersion, .parseDotVars(dispersion, envir), dtSim, n, envir)

sr <- gammaGetShapeRate(mean = mean, dispersion = d)
new <- stats::rgamma(n, shape = sr$shape, rate = sr$rate)
Expand All @@ -417,13 +417,18 @@
}

.genmixture <- function(n, formula, dtSim, envir) {

origFormula <- formula
formula <- .rmWS(formula)
var_pr <- strsplit(formula, "+", fixed = T)
var_dt <- strsplit(var_pr[[1]], "|", fixed = T)
formDT <- as.data.table(do.call(rbind, var_dt))
ps <-
cumsum(.evalWith(unlist(formDT[, 2]), .parseDotVars(formDT[, 2], envir)))

ps <- cumsum(.evalWith(
formula = unlist(formDT[, 2]),
extVars = .parseDotVars(formDT[, 2]),
envir = envir
))

if (!isTRUE(all.equal(max(ps), 1))) {
valueError(origFormula,
Expand All @@ -444,7 +449,7 @@
u <- stats::runif(n)
dvars$interval <- findInterval(u, ps, rightmost.closed = TRUE) + 1

.evalWith(interval_formula, dvars, dtSim, n)
.evalWith(interval_formula, dvars, dtSim, n, envir)
}

# Internal function called by .generate - returns negative binomial data
Expand All @@ -455,7 +460,7 @@

.getNBmean <- function(dtSim, formula, link, n = nrow(dtSim),
envir = parent.frame()) {
mean <- .evalWith(formula, .parseDotVars(formula, envir), dtSim, n)
mean <- .evalWith(formula, .parseDotVars(formula, envir), dtSim, n, envir)
if (link == "log") {
mean <- exp(mean)
}
kgoldfeld marked this conversation as resolved.
Show resolved Hide resolved
Expand Down Expand Up @@ -486,12 +491,14 @@
formula,
n = nrow(dtSim),
envir = parent.frame()) {
.evalWith(formula, .parseDotVars(formula, envir), dtSim, n)

.evalWith(formula, .parseDotVars(formula, envir), dtSim, n, envir)
}

.gennorm <- function(n, formula, variance, link, dtSim, envir) {
mean <- .getNormalMean(dtSim, formula, n, envir)
v <- .evalWith(variance, .parseDotVars(variance, envir), dtSim, n)
v <- .evalWith(variance, .parseDotVars(variance, envir), dtSim, n, envir)
# added envir 20240718

return(stats::rnorm(n, mean, sqrt(v)))
}
Expand All @@ -505,7 +512,7 @@
# @return A data.frame column with the updated simulated data

.getPoissonMean <- function(dtSim, formula, link, n = nrow(dtSim), envir = parent.frame()) {
mean <- .evalWith(formula, .parseDotVars(formula, envir), dtSim, n)
mean <- .evalWith(formula, .parseDotVars(formula, envir), dtSim, n, envir)

if (link == "log") {
mean <- exp(mean)
Expand Down Expand Up @@ -570,7 +577,7 @@
)
}

parsedRange <- .evalWith(range, .parseDotVars(range, envir), dtSim, n)
parsedRange <- .evalWith(range, .parseDotVars(range, envir), dtSim, n, envir)

r_min <- parsedRange[, 1]
r_max <- parsedRange[, 2]
Expand Down Expand Up @@ -644,7 +651,7 @@

# Internal function called by .generate - returns cluster size data

.genclustsize <- function(n, formula, variance = 0, envir) {
.genclustsize <- function(n, formula, variance = 0, envir = parent.frame()) {

if (!requireNamespace("dirmult", quietly = TRUE)) {
stop(
Expand All @@ -654,8 +661,8 @@
)
}

formula <- .evalWith(formula, .parseDotVars(formula, envir))[1]
variance <- .evalWith(variance, .parseDotVars(variance, envir))[1]
formula <- .evalWith(formula, .parseDotVars(formula, envir), envir = envir)[1]
variance <- .evalWith(variance, .parseDotVars(variance, envir), envir = envir)[1]

assertInteger(formula = formula)
assertAtLeast(formula = formula, minVal = 1)
Expand Down
33 changes: 18 additions & 15 deletions R/internal_utility.R
Original file line number Diff line number Diff line change
Expand Up @@ -50,36 +50,37 @@
.evalWith <- function(formula,
extVars,
dtSim = data.table(),
n = nrow(dtSim)) {

if (missing(dtSim) && missing(n)) {
n <- 1
.n = nrow(dtSim),
envir = parent.frame()) {

if (missing(dtSim) && missing(.n)) {
.n <- 1
}

if (!missing(dtSim) && !is.null(dtSim) && n != nrow(dtSim)) {
if (!missing(dtSim) && !is.null(dtSim) && .n != nrow(dtSim)) {
stop(glue(
"Both 'dtSim' and 'n' are set but are of different length: ",
"{nrow(dtSim)} != {n}"
"{nrow(dtSim)} != {.n}"
))
}

e <- list2env(extVars)

if (!missing(dtSim) && !is.null(dtSim)) {
e$dtSim <- as.data.table(dtSim)
# e$def_id <- names(dtSim)[[1]] # original, but incorrect
e$def_id <- key(dtSim)
}

}
if (missing(dtSim) || is.null(dtSim)) {
e$dtSim <- genData(n)
e$dtSim <- genData(.n)
e$def_id <- "id"
}

if (!is.null(e$formula2parse)) {
stop("'formula2parse' is a reserved variable name!")
}

evalFormula <- function(formula) {
e$formula2parse <- formula

Expand All @@ -105,14 +106,16 @@
}

if (length(res) == 1) {
rep(res, n)
rep(res, times = .n)
} else {
res
}
}


list2env(as.list(envir), envir = environment()) # added 20240718

parsedValues <- sapply(formula, evalFormula)

# If only a single formula with 1 rep is eval'ed output would be not be
# matrix, so transpose for uniform output.
if (!is.matrix(parsedValues)) {
Expand Down
Loading
Loading