# Update history:
# Posted online on: 8/26/2015

require(mgcv)
require(refund)

###################################
# Function to find the solution of a probability equation 
# Arguments:
# guess.root = temporary root for guessing an initial interval
# step.size = step size to create an interval 
# prob.eqn = probability equation
# delta = increment/decrement size for finding a temporary interval
# grid.size = size of the grid where the search interval is partitioned 
#
# values:
# solution of the probability equation

solve.prob.eqn <- function (guess.root, prob.eqn, step.size = .2, delta = .1, grid.size = .001){
  
  search.int <- c(guess.root - step.size, guess.root + step.size)
  tt <- double(2)
  
  while (prob.eqn(search.int[1]) > 0 || prob.eqn(search.int[2]) < 0){
    
    if(prob.eqn(search.int[1]) < 0) {
      tt[1] <- search.int[1] } else tt[1] <- search.int[1] - delta
    
    if(prob.eqn(search.int[2]) > 0){
      tt[2] <- search.int[2] } else tt[2] <- search.int[2] + delta
    
    search.int <- tt
  }
  
  grid <- seq(search.int[1], search.int[2], grid.size)
  prob.grid <- sapply(grid, prob.eqn)
  
  return(grid[min(which(prob.grid >= 0))])
  
}


###################################
#
# Function for estimate parameters (mu and sigma squared and G squared and tau squared)
# Arguments:
# Y = data 
# I = number of subjects
# D = number of points in the union grid
# nbasis = number of basis functions in GAM model
#
# Values:
# mean hat = estimated mean 
# sigma.sq.hat  = estimated sigma squared
# pve = proportion of variance explained by the extracted principal components
#
# Note: This function is a modified version of ccb.fpc function in 
# the refund package

param.estimate <- function(Y, I, D, argvals, nbasis = 10, pve = 0.99){

  library(mgcv)
  d.vec <- rep(argvals, each = I)
  
  #estimate mean using gam model
  gam0 <- gam((as.vector(Y) ~ s(d.vec, k = nbasis)), method="REML")
  mu <- predict(gam0, newdata = data.frame(d.vec = argvals))
  Y.tilde <- Y - matrix(mu, I, D, byrow = TRUE)
    
  #Find a raw estimate for G 
  cov.sum <- cov.count <- cov.mean <- matrix(0, D, D)

  for (i in 1:I) {
    obs.points <- which(!is.na(Y[i, ]))
    cov.count[obs.points, obs.points] <- cov.count[obs.points, obs.points] + 1
    cov.sum[obs.points, obs.points] <- cov.sum[obs.points, obs.points] + tcrossprod(Y.tilde[i, obs.points])
  }

  G.0 <- ifelse(cov.count == 0, NA, cov.sum / cov.count)  
  diag.G0 <- diag(G.0)
  diag(G.0) <- NA
  
#smooth G
     
  row.vec <- rep(argvals, each = D)
  col.vec <- rep(argvals, D)
  
  npc.0 <- matrix(predict(gam(as.vector(G.0) ~ te(row.vec, col.vec, k = nbasis), 
                              method="REML", weights = as.vector(cov.count)), 
                         newdata = data.frame(row.vec = row.vec, col.vec = col.vec)), 
                 D, D) #predict the covariance  using fitted model#
  npc.0 <- (npc.0 + t(npc.0))/2

  evalues <- eigen(npc.0, symmetric = TRUE, only.values = TRUE)$values
  evalues <- replace(evalues, which(evalues <= 0), 0) ##compute eigen values#
  npc <- min(which(cumsum(evalues) / sum(evalues) > pve))# npc=prespecified value for the number of principal components #
  efunctions <- matrix(eigen(npc.0, symmetric = TRUE)$vectors[, 
                                                           seq(len = npc)], nrow = D, ncol = npc) #extract the number of eigen functions#
  evalues <- eigen(npc.0, symmetric = TRUE, only.values = TRUE)$values[1:npc] #extract eigen values#
  cov.hat <- efunctions %*% tcrossprod(diag(evalues, nrow = npc, ncol = npc), efunctions)
  DIAG <- (diag.G0 - diag(cov.hat))[floor(D * 0.2):ceiling(D * 0.8)]

  tau.sq <- max(mean(DIAG, na.rm = TRUE), 0)
  sigma.sq <- diag(cov.hat) + tau.sq

  return.obj <- list("mean.hat" = mu, "sigma.sq.hat" = sigma.sq, "G.sq.hat" = diag(cov.hat))
  return(return.obj)

}


###################################
#
# Function for finding bootstrap critical points
# Arguments:
# y = Sample from original data
# y.mean = mean of the sample
# y.sd = sd of the sample
# root.int = Interval for root finding
# simul = TRUE (if simultaneous tolerance factor need to be computed)
#
# Values:
# list of point wise critical points and simultaneous critical points for onesided upper TI 
# 

boot.tolfactor <- function(n.boot, y, y.mean, y.sigma, y.G, root.int, prob, alpha, simul, I, D, argvals){
  

  
  #bootstrap samples   
  ystar <- replicate(n.boot, param.estimate(y[sample(1:I, I, replace = T),], I = I, D = D, argvals = argvals), simplify = T)  # n x n.boot matrix
  
  #bootsrap estimates
  ystar.mean <- matrix(unlist(ystar[1,]), ncol = D, nrow = n.boot, byrow = T) # n.boot x D
  ystar.sigma.sq <- matrix(unlist(ystar[2,]), ncol = D, nrow = n.boot, byrow = T) # n.boot x D
  ystar.G <- sqrt(matrix(unlist(ystar[3,]), ncol = D, nrow = n.boot, byrow = T)) # n.boot x D
  ystar.sigma <- sqrt(ystar.sigma.sq) # n.boot x D
  
  #onesided bootstrap tolerance factors
  k.boot.upper <- onesided.tolfactor(prob = prob, alpha = alpha, y.mean = y.mean, y.sigma = y.sigma, y.G = y.G, ystar.mean = ystar.mean, ystar.sigma = ystar.sigma, ystar.G = ystar.G, root.int = root.int, simul = simul, I, D, n.boot = n.boot)
  #twosided bootstrap tolerance factors
  k.boot.twosided <- twosided.tolfactor(prob = prob, alpha = alpha, y.mean = y.mean, y.sigma = y.sigma, y.G = y.G, ystar.mean = ystar.mean, ystar.sigma = ystar.sigma, ystar.G = ystar.G, root.int = root.int, simul = simul, I, D, n.boot = n.boot)
  
  return(list(k.boot.upper, k.boot.twosided))
  
}

###################################
#
# Function to calculate critical points for onesided Tbs
# Arguments:
# y.mean = mean of the sample
# y.sd = sd of the sample
# ystar.mean  = bootstrap mean
# ystar.sigma = bootsrap variance
# y.sd.corrected = corrected variance 
# root.int = Interval for root finding
# simul = TRUE (if simultaneous tolerance factor is computed)
#
# Values:
# list of pointwise critical points and simultaneous critical points for onesided upper TB 
# 


onesided.tolfactor <- function(prob, alpha, y.mean, y.sigma, y.G, ystar.mean, ystar.sigma, ystar.G, root.int, simul, I, D, n.boot){ 
  
  pwise.obs <- double(D)
  sim.obs <- NULL
  pwise.true <- double(D)
  sim.true <- NULL
  
  #calculate pointwise tolerance factors for obseved and true curves
  for (i in 1:D){
    
    prob.eqn.pwise.obs <- function(cval) { 
      
      ubstar <- ystar.mean[,i] + cval *  ystar.sigma[,i]
      prob.diff <- mean(pnorm(ubstar, y.mean[i], y.sigma[i]) >= prob) - (1 - alpha)
      return(prob.diff)
      
    }
    
    root.temp <- uniroot( prob.eqn.pwise.obs, root.int)$root
    pwise.obs[i] <- solve.prob.eqn(root.temp, prob.eqn.pwise.obs)
    
    prob.eqn.pwise.true <- function(cval) { 
      
      ubstar <- ystar.mean[,i] + cval * ystar.G[,i]
      prob.diff <- mean(pnorm(ubstar, y.mean[i], y.G[i]) >= prob) - (1 - alpha)
      return(prob.diff)
      
    }
    
    root.temp <- uniroot( prob.eqn.pwise.true, root.int)$root
    pwise.true[i] <- solve.prob.eqn(root.temp, prob.eqn.pwise.true)
    
  } 
  
  # calculate simulataneous tolerance factor for observed and true curves
  if(simul == T)
  
  {
    prob.eqn.sim.obs <- function(cval) { 
      
      ubstar <- ystar.mean + cval *  ystar.sigma
      prob.matrix <- t(sapply (1:n.boot, function(x) {pnorm(ubstar[x,], y.mean,  y.sigma)}))
      prob.diff <- mean(apply(prob.matrix, MAR=1, FUN=min) >= prob) - (1 - alpha)
      
      return(prob.diff)
      
    }
    
    root.temp <- uniroot(prob.eqn.sim.obs, root.int)$root
    sim.obs <- solve.prob.eqn(root.temp, prob.eqn.sim.obs)
    
    prob.eqn.sim.true <- function(cval) { 
      
      ubstar <- ystar.mean + cval * ystar.G
      prob.matrix <- t(sapply (1:n.boot, function(x) {pnorm(ubstar[x,], y.mean, y.G)}))
      prob.diff <- mean(apply(prob.matrix, MAR=1, FUN=min) >= prob) - (1 - alpha)
      
      return(prob.diff)
      
    }
    
    root.temp <- uniroot(prob.eqn.sim.true, root.int)$root
    sim.true <- solve.prob.eqn(root.temp, prob.eqn.sim.true)
    
  }
  
  return(list(pwise.obs = pwise.obs,
              sim.obs = sim.obs,
              pwise.true = pwise.true,
              sim.true = sim.true))
  
}


###################################
#
# Function to calculate critical points for twosided TBs
# Arguments:
# y.mean = mean of the sample
# y.sigma = sd of the sample
# ystar.mean  = bootstrap mean
# ystar.var = bootsrap variance
# root.int = Interval for root finding
# simul = TRUE (if simultaneous tolerance factor is computed)
#
# Values:
# list of pointwise critical points and simultaneous critical points for onesided upper TB 
# 


twosided.tolfactor <- function(prob, alpha, y.mean,  y.sigma, y.G, ystar.mean,  ystar.sigma, ystar.G, root.int, simul, 
                             I, D, n.boot){
  
  pwise.obs <- double(D)
  sim.obs <- NULL
  pwise.true <- double(D)
  sim.true <- NULL
  
  #calculate pointwise tolerance factors for obseved and true curves
  for (h in 1:D){
    
    prob.eqn.pwise.obs <- function(cval) {
      
      ubstar <- ystar.mean[,h] + cval *  ystar.sigma[,h]
      lbstar <- ystar.mean[,h] - cval *  ystar.sigma[,h]
      prob.diff <- mean((pnorm(ubstar, y.mean[h],  y.sigma[h]) - pnorm(lbstar, y.mean[h],  y.sigma[h])) >= prob) - (1 - alpha)
      
      return(prob.diff)
      
    }
    
    root.temp <- uniroot(prob.eqn.pwise.obs, root.int)$root
    pwise.obs[h] <- solve.prob.eqn(root.temp, prob.eqn.pwise.obs)
    
    prob.eqn.pwise.true <- function(cval) { 
      
      ubstar <- ystar.mean[,h] + cval * ystar.G[,h]
      lbstar <- ystar.mean[,h] - cval * ystar.G[,h]
      prob.diff <- mean((pnorm(ubstar, y.mean[h], y.G[h]) - pnorm(lbstar, y.mean[h], y.G[h])) >= prob) - (1 - alpha)
      
      return(prob.diff)
      
    }
    
    root.temp <- uniroot( prob.eqn.pwise.true, root.int)$root
    pwise.true[h] <- solve.prob.eqn(root.temp, prob.eqn.pwise.true)
    
    
  }
  
  #calculate simultaneous tolerance factors for obseved and true curves
  if(simul == T)
    
  {
    
    prob.eqn.sim.obs <- function(cval) { 
      
      ubstar <- ystar.mean + cval *  ystar.sigma
      lbstar <- ystar.mean - cval *  ystar.sigma
      
      prob.matrix<-t(sapply(1:n.boot, function(x){pnorm(ubstar[x,], y.mean,  y.sigma) - pnorm(lbstar[x,], y.mean,  y.sigma)}))
      prob.diff<- mean(apply(prob.matrix, MAR=1, FUN=min) >= prob) - (1 - alpha)
      
      return(prob.diff)
      
    }
    
    root.temp <- uniroot(prob.eqn.sim.obs, root.int)$root
    sim.obs <- solve.prob.eqn(root.temp, prob.eqn.sim.obs)
    
    prob.eqn.sim.true <- function(cval) { 
      
      ubstar <- ystar.mean + cval * ystar.G
      lbstar <- ystar.mean - cval * ystar.G
      
      prob.matrix <- t(sapply(1:n.boot, function(x){pnorm(ubstar[x,], y.mean, y.G) - pnorm(lbstar[x,], y.mean, y.G)}))
      prob.diff <- mean(apply(prob.matrix, MAR=1, FUN=min) >= prob) - (1 - alpha)
      
      return(prob.diff)
      
    }
    
    root.temp <- uniroot(prob.eqn.sim.true, root.int)$root
    sim.true <- solve.prob.eqn(root.temp, prob.eqn.sim.true)

  } 
  
  return(list(pwise.obs = pwise.obs,
              sim.obs = sim.obs,
              pwise.true = pwise.true,
              sim.true = sim.true))
  
}

###################################
#
# Function to compute tolerance bands
# Arguments:
# y = functional data that we need to construct tolerance bands for.
# alpha = parameter for confidence level such that 1-alpha = confidence level (default = .05).
# prob = content probability.
# simul = logical argument to indicate simultaneous tolerance bands is constructed or not. 
# n.boot = size of the bootstrap sample to compute bootstrap tolerance factors (default = 500).
#
# Values:
# A list of constructed simultaneous tolerance bands and/or pointwise tolerance bands and estimated parameters. 
# 


TB.calc <- function(y, alpha = .05, prob, simul = TRUE, n.boot = 500){
  
  I <- dim(y)[1] #number of subjects
  D <- dim(y)[2] #number of observations for the subjects (both obseved and missing)
  
  col.names <- as.integer(colnames(y))
  arg <- seq(col.names[1], col.names[length(col.names)], 1) 
 
  root.int <- c(-20,20)
  
  
  set.seed(1234)
  
  require(mgcv)
  # estimate sample mean and sd
  y.param <- param.estimate(Y = y, I = I, D = D, argvals = arg)
  y.mean <- y.param$mean.hat
  y.sigma <- sqrt(y.param$sigma.sq.hat)
  y.G <- sqrt(y.param$G.sq.hat)
 

 
  # compute bootstrap tolerance factors 
  k.boot <- boot.tolfactor(n.boot = n.boot, y = y, y.mean = y.mean, y.sigma = y.sigma, y.G = y.G, root.int = root.int, prob = prob, alpha = alpha, simul = simul, I = I, D = D, argvals = arg)
  

  # the two upper TIs
  
  onesided.boot.obs.u <- y.mean + k.boot[[1]]$pwise.obs * y.sigma
  onesided.boot.true.u <- y.mean + k.boot[[1]]$pwise.true * y.G
  
  #dataframe containing indicator for onesided upper tolerance interval >prob
  onesided.pwise <- cbind(onesided.boot.obs.u, onesided.boot.true.u)
  
  if (simul == T){
    
    #the two simultaneous TIs
    onesided.boot.obs.sim <- y.mean + k.boot[[1]]$sim.obs* y.sigma
    onesided.boot.true.sim <- y.mean + k.boot[[1]]$sim.true * y.G  
    
    onesided.sim <- cbind(onesided.boot.obs.sim, onesided.boot.true.sim)
    
  }
  
  # the two sided pointwise TIs
  twosided.boot.obs.u <- y.mean + k.boot[[2]]$pwise.obs * y.sigma
  twosided.boot.true.u <- y.mean + k.boot[[2]]$pwise.true * y.G
  
  twosided.boot.obs.l <- y.mean - k.boot[[2]]$pwise.obs * y.sigma
  twosided.boot.true.l <- y.mean - k.boot[[2]]$pwise.true * y.G
  
  twosided.pwise <- cbind(twosided.boot.obs.l, twosided.boot.obs.u , twosided.boot.true.l, twosided.boot.true.u)
  
  #the twosided bootstrap simultaneous TIs
  
  if (simul == T){
    
    twosided.boot.obs.sim.u <- y.mean + k.boot[[2]]$sim.obs * y.sigma
    twosided.boot.obs.sim.l <- y.mean - k.boot[[2]]$sim.obs * y.sigma
    
    twosided.boot.true.sim.u <- y.mean + k.boot[[2]]$sim.true * y.G
    twosided.boot.true.sim.l <- y.mean - k.boot[[2]]$sim.true * y.G
    
    #dataframe containing indicator function for simultaneous TI
    twosided.sim <- cbind(twosided.boot.obs.sim.l, twosided.boot.obs.sim.u,
                          twosided.boot.true.sim.l, twosided.boot.true.sim.u)
    
  }  
  
  if (simul == T){
    
    result <- list(onesided.pwise, onesided.sim, twosided.pwise, twosided.sim, cbind(y.mean, y.sigma, y.G))
    names(result) <- c("onesided.pwise TB", "onesided.sim TB", "twosided.pwise TB", "twosided.sim TB", "estimated parameters")
    
  }else {
    
    result <- list(onesided.pwise, twosided.pwise, cbind(y.mean, y.sigma, y.G))
    names(result) <- c("onesided.pwise TB", "twosided.pwise TB", "estimated parameters")
    
  }
  
  return(result)
  
}