# Update history: # Posted online on: 8/26/2015 require(mgcv) require(refund) ################################### # Function to find the solution of a probability equation # Arguments: # guess.root = temporary root for guessing an initial interval # step.size = step size to create an interval # prob.eqn = probability equation # delta = increment/decrement size for finding a temporary interval # grid.size = size of the grid where the search interval is partitioned # # values: # solution of the probability equation solve.prob.eqn <- function (guess.root, prob.eqn, step.size = .2, delta = .1, grid.size = .001){ search.int <- c(guess.root - step.size, guess.root + step.size) tt <- double(2) while (prob.eqn(search.int[1]) > 0 || prob.eqn(search.int[2]) < 0){ if(prob.eqn(search.int[1]) < 0) { tt[1] <- search.int[1] } else tt[1] <- search.int[1] - delta if(prob.eqn(search.int[2]) > 0){ tt[2] <- search.int[2] } else tt[2] <- search.int[2] + delta search.int <- tt } grid <- seq(search.int[1], search.int[2], grid.size) prob.grid <- sapply(grid, prob.eqn) return(grid[min(which(prob.grid >= 0))]) } ################################### # # Function for estimate parameters (mu and sigma squared and G squared and tau squared) # Arguments: # Y = data # I = number of subjects # D = number of points in the union grid # nbasis = number of basis functions in GAM model # # Values: # mean hat = estimated mean # sigma.sq.hat = estimated sigma squared # pve = proportion of variance explained by the extracted principal components # # Note: This function is a modified version of ccb.fpc function in # the refund package param.estimate <- function(Y, I, D, argvals, nbasis = 10, pve = 0.99){ library(mgcv) d.vec <- rep(argvals, each = I) #estimate mean using gam model gam0 <- gam((as.vector(Y) ~ s(d.vec, k = nbasis)), method="REML") mu <- predict(gam0, newdata = data.frame(d.vec = argvals)) Y.tilde <- Y - matrix(mu, I, D, byrow = TRUE) #Find a raw estimate for G cov.sum <- cov.count <- cov.mean <- matrix(0, D, D) for (i in 1:I) { obs.points <- which(!is.na(Y[i, ])) cov.count[obs.points, obs.points] <- cov.count[obs.points, obs.points] + 1 cov.sum[obs.points, obs.points] <- cov.sum[obs.points, obs.points] + tcrossprod(Y.tilde[i, obs.points]) } G.0 <- ifelse(cov.count == 0, NA, cov.sum / cov.count) diag.G0 <- diag(G.0) diag(G.0) <- NA #smooth G row.vec <- rep(argvals, each = D) col.vec <- rep(argvals, D) npc.0 <- matrix(predict(gam(as.vector(G.0) ~ te(row.vec, col.vec, k = nbasis), method="REML", weights = as.vector(cov.count)), newdata = data.frame(row.vec = row.vec, col.vec = col.vec)), D, D) #predict the covariance using fitted model# npc.0 <- (npc.0 + t(npc.0))/2 evalues <- eigen(npc.0, symmetric = TRUE, only.values = TRUE)$values evalues <- replace(evalues, which(evalues <= 0), 0) ##compute eigen values# npc <- min(which(cumsum(evalues) / sum(evalues) > pve))# npc=prespecified value for the number of principal components # efunctions <- matrix(eigen(npc.0, symmetric = TRUE)$vectors[, seq(len = npc)], nrow = D, ncol = npc) #extract the number of eigen functions# evalues <- eigen(npc.0, symmetric = TRUE, only.values = TRUE)$values[1:npc] #extract eigen values# cov.hat <- efunctions %*% tcrossprod(diag(evalues, nrow = npc, ncol = npc), efunctions) DIAG <- (diag.G0 - diag(cov.hat))[floor(D * 0.2):ceiling(D * 0.8)] tau.sq <- max(mean(DIAG, na.rm = TRUE), 0) sigma.sq <- diag(cov.hat) + tau.sq return.obj <- list("mean.hat" = mu, "sigma.sq.hat" = sigma.sq, "G.sq.hat" = diag(cov.hat)) return(return.obj) } ################################### # # Function for finding bootstrap critical points # Arguments: # y = Sample from original data # y.mean = mean of the sample # y.sd = sd of the sample # root.int = Interval for root finding # simul = TRUE (if simultaneous tolerance factor need to be computed) # # Values: # list of point wise critical points and simultaneous critical points for onesided upper TI # boot.tolfactor <- function(n.boot, y, y.mean, y.sigma, y.G, root.int, prob, alpha, simul, I, D, argvals){ #bootstrap samples ystar <- replicate(n.boot, param.estimate(y[sample(1:I, I, replace = T),], I = I, D = D, argvals = argvals), simplify = T) # n x n.boot matrix #bootsrap estimates ystar.mean <- matrix(unlist(ystar[1,]), ncol = D, nrow = n.boot, byrow = T) # n.boot x D ystar.sigma.sq <- matrix(unlist(ystar[2,]), ncol = D, nrow = n.boot, byrow = T) # n.boot x D ystar.G <- sqrt(matrix(unlist(ystar[3,]), ncol = D, nrow = n.boot, byrow = T)) # n.boot x D ystar.sigma <- sqrt(ystar.sigma.sq) # n.boot x D #onesided bootstrap tolerance factors k.boot.upper <- onesided.tolfactor(prob = prob, alpha = alpha, y.mean = y.mean, y.sigma = y.sigma, y.G = y.G, ystar.mean = ystar.mean, ystar.sigma = ystar.sigma, ystar.G = ystar.G, root.int = root.int, simul = simul, I, D, n.boot = n.boot) #twosided bootstrap tolerance factors k.boot.twosided <- twosided.tolfactor(prob = prob, alpha = alpha, y.mean = y.mean, y.sigma = y.sigma, y.G = y.G, ystar.mean = ystar.mean, ystar.sigma = ystar.sigma, ystar.G = ystar.G, root.int = root.int, simul = simul, I, D, n.boot = n.boot) return(list(k.boot.upper, k.boot.twosided)) } ################################### # # Function to calculate critical points for onesided Tbs # Arguments: # y.mean = mean of the sample # y.sd = sd of the sample # ystar.mean = bootstrap mean # ystar.sigma = bootsrap variance # y.sd.corrected = corrected variance # root.int = Interval for root finding # simul = TRUE (if simultaneous tolerance factor is computed) # # Values: # list of pointwise critical points and simultaneous critical points for onesided upper TB # onesided.tolfactor <- function(prob, alpha, y.mean, y.sigma, y.G, ystar.mean, ystar.sigma, ystar.G, root.int, simul, I, D, n.boot){ pwise.obs <- double(D) sim.obs <- NULL pwise.true <- double(D) sim.true <- NULL #calculate pointwise tolerance factors for obseved and true curves for (i in 1:D){ prob.eqn.pwise.obs <- function(cval) { ubstar <- ystar.mean[,i] + cval * ystar.sigma[,i] prob.diff <- mean(pnorm(ubstar, y.mean[i], y.sigma[i]) >= prob) - (1 - alpha) return(prob.diff) } root.temp <- uniroot( prob.eqn.pwise.obs, root.int)$root pwise.obs[i] <- solve.prob.eqn(root.temp, prob.eqn.pwise.obs) prob.eqn.pwise.true <- function(cval) { ubstar <- ystar.mean[,i] + cval * ystar.G[,i] prob.diff <- mean(pnorm(ubstar, y.mean[i], y.G[i]) >= prob) - (1 - alpha) return(prob.diff) } root.temp <- uniroot( prob.eqn.pwise.true, root.int)$root pwise.true[i] <- solve.prob.eqn(root.temp, prob.eqn.pwise.true) } # calculate simulataneous tolerance factor for observed and true curves if(simul == T) { prob.eqn.sim.obs <- function(cval) { ubstar <- ystar.mean + cval * ystar.sigma prob.matrix <- t(sapply (1:n.boot, function(x) {pnorm(ubstar[x,], y.mean, y.sigma)})) prob.diff <- mean(apply(prob.matrix, MAR=1, FUN=min) >= prob) - (1 - alpha) return(prob.diff) } root.temp <- uniroot(prob.eqn.sim.obs, root.int)$root sim.obs <- solve.prob.eqn(root.temp, prob.eqn.sim.obs) prob.eqn.sim.true <- function(cval) { ubstar <- ystar.mean + cval * ystar.G prob.matrix <- t(sapply (1:n.boot, function(x) {pnorm(ubstar[x,], y.mean, y.G)})) prob.diff <- mean(apply(prob.matrix, MAR=1, FUN=min) >= prob) - (1 - alpha) return(prob.diff) } root.temp <- uniroot(prob.eqn.sim.true, root.int)$root sim.true <- solve.prob.eqn(root.temp, prob.eqn.sim.true) } return(list(pwise.obs = pwise.obs, sim.obs = sim.obs, pwise.true = pwise.true, sim.true = sim.true)) } ################################### # # Function to calculate critical points for twosided TBs # Arguments: # y.mean = mean of the sample # y.sigma = sd of the sample # ystar.mean = bootstrap mean # ystar.var = bootsrap variance # root.int = Interval for root finding # simul = TRUE (if simultaneous tolerance factor is computed) # # Values: # list of pointwise critical points and simultaneous critical points for onesided upper TB # twosided.tolfactor <- function(prob, alpha, y.mean, y.sigma, y.G, ystar.mean, ystar.sigma, ystar.G, root.int, simul, I, D, n.boot){ pwise.obs <- double(D) sim.obs <- NULL pwise.true <- double(D) sim.true <- NULL #calculate pointwise tolerance factors for obseved and true curves for (h in 1:D){ prob.eqn.pwise.obs <- function(cval) { ubstar <- ystar.mean[,h] + cval * ystar.sigma[,h] lbstar <- ystar.mean[,h] - cval * ystar.sigma[,h] prob.diff <- mean((pnorm(ubstar, y.mean[h], y.sigma[h]) - pnorm(lbstar, y.mean[h], y.sigma[h])) >= prob) - (1 - alpha) return(prob.diff) } root.temp <- uniroot(prob.eqn.pwise.obs, root.int)$root pwise.obs[h] <- solve.prob.eqn(root.temp, prob.eqn.pwise.obs) prob.eqn.pwise.true <- function(cval) { ubstar <- ystar.mean[,h] + cval * ystar.G[,h] lbstar <- ystar.mean[,h] - cval * ystar.G[,h] prob.diff <- mean((pnorm(ubstar, y.mean[h], y.G[h]) - pnorm(lbstar, y.mean[h], y.G[h])) >= prob) - (1 - alpha) return(prob.diff) } root.temp <- uniroot( prob.eqn.pwise.true, root.int)$root pwise.true[h] <- solve.prob.eqn(root.temp, prob.eqn.pwise.true) } #calculate simultaneous tolerance factors for obseved and true curves if(simul == T) { prob.eqn.sim.obs <- function(cval) { ubstar <- ystar.mean + cval * ystar.sigma lbstar <- ystar.mean - cval * ystar.sigma prob.matrix<-t(sapply(1:n.boot, function(x){pnorm(ubstar[x,], y.mean, y.sigma) - pnorm(lbstar[x,], y.mean, y.sigma)})) prob.diff<- mean(apply(prob.matrix, MAR=1, FUN=min) >= prob) - (1 - alpha) return(prob.diff) } root.temp <- uniroot(prob.eqn.sim.obs, root.int)$root sim.obs <- solve.prob.eqn(root.temp, prob.eqn.sim.obs) prob.eqn.sim.true <- function(cval) { ubstar <- ystar.mean + cval * ystar.G lbstar <- ystar.mean - cval * ystar.G prob.matrix <- t(sapply(1:n.boot, function(x){pnorm(ubstar[x,], y.mean, y.G) - pnorm(lbstar[x,], y.mean, y.G)})) prob.diff <- mean(apply(prob.matrix, MAR=1, FUN=min) >= prob) - (1 - alpha) return(prob.diff) } root.temp <- uniroot(prob.eqn.sim.true, root.int)$root sim.true <- solve.prob.eqn(root.temp, prob.eqn.sim.true) } return(list(pwise.obs = pwise.obs, sim.obs = sim.obs, pwise.true = pwise.true, sim.true = sim.true)) } ################################### # # Function to compute tolerance bands # Arguments: # y = functional data that we need to construct tolerance bands for. # alpha = parameter for confidence level such that 1-alpha = confidence level (default = .05). # prob = content probability. # simul = logical argument to indicate simultaneous tolerance bands is constructed or not. # n.boot = size of the bootstrap sample to compute bootstrap tolerance factors (default = 500). # # Values: # A list of constructed simultaneous tolerance bands and/or pointwise tolerance bands and estimated parameters. # TB.calc <- function(y, alpha = .05, prob, simul = TRUE, n.boot = 500){ I <- dim(y)[1] #number of subjects D <- dim(y)[2] #number of observations for the subjects (both obseved and missing) col.names <- as.integer(colnames(y)) arg <- seq(col.names[1], col.names[length(col.names)], 1) root.int <- c(-20,20) set.seed(1234) require(mgcv) # estimate sample mean and sd y.param <- param.estimate(Y = y, I = I, D = D, argvals = arg) y.mean <- y.param$mean.hat y.sigma <- sqrt(y.param$sigma.sq.hat) y.G <- sqrt(y.param$G.sq.hat) # compute bootstrap tolerance factors k.boot <- boot.tolfactor(n.boot = n.boot, y = y, y.mean = y.mean, y.sigma = y.sigma, y.G = y.G, root.int = root.int, prob = prob, alpha = alpha, simul = simul, I = I, D = D, argvals = arg) # the two upper TIs onesided.boot.obs.u <- y.mean + k.boot[[1]]$pwise.obs * y.sigma onesided.boot.true.u <- y.mean + k.boot[[1]]$pwise.true * y.G #dataframe containing indicator for onesided upper tolerance interval >prob onesided.pwise <- cbind(onesided.boot.obs.u, onesided.boot.true.u) if (simul == T){ #the two simultaneous TIs onesided.boot.obs.sim <- y.mean + k.boot[[1]]$sim.obs* y.sigma onesided.boot.true.sim <- y.mean + k.boot[[1]]$sim.true * y.G onesided.sim <- cbind(onesided.boot.obs.sim, onesided.boot.true.sim) } # the two sided pointwise TIs twosided.boot.obs.u <- y.mean + k.boot[[2]]$pwise.obs * y.sigma twosided.boot.true.u <- y.mean + k.boot[[2]]$pwise.true * y.G twosided.boot.obs.l <- y.mean - k.boot[[2]]$pwise.obs * y.sigma twosided.boot.true.l <- y.mean - k.boot[[2]]$pwise.true * y.G twosided.pwise <- cbind(twosided.boot.obs.l, twosided.boot.obs.u , twosided.boot.true.l, twosided.boot.true.u) #the twosided bootstrap simultaneous TIs if (simul == T){ twosided.boot.obs.sim.u <- y.mean + k.boot[[2]]$sim.obs * y.sigma twosided.boot.obs.sim.l <- y.mean - k.boot[[2]]$sim.obs * y.sigma twosided.boot.true.sim.u <- y.mean + k.boot[[2]]$sim.true * y.G twosided.boot.true.sim.l <- y.mean - k.boot[[2]]$sim.true * y.G #dataframe containing indicator function for simultaneous TI twosided.sim <- cbind(twosided.boot.obs.sim.l, twosided.boot.obs.sim.u, twosided.boot.true.sim.l, twosided.boot.true.sim.u) } if (simul == T){ result <- list(onesided.pwise, onesided.sim, twosided.pwise, twosided.sim, cbind(y.mean, y.sigma, y.G)) names(result) <- c("onesided.pwise TB", "onesided.sim TB", "twosided.pwise TB", "twosided.sim TB", "estimated parameters") }else { result <- list(onesided.pwise, twosided.pwise, cbind(y.mean, y.sigma, y.G)) names(result) <- c("onesided.pwise TB", "twosided.pwise TB", "estimated parameters") } return(result) }