library(viridis); library(akima)
library(fields); library(MASS)     ;     library(rgl)       ; 
library(plot3D); library(spatstat) ;     library(ggplot2)   ;
library(nloptr); library(lhs)      ;     library(DiceDesign);
library(GPfit); library(miscTools)

#### This file contains code to run:
#### sim_initial_XY: generate maximin LHD and data for given function
#### GPfit: fitting GP using MCMC
#### MCMCsummary: needed when generating predictions and AEI values for summary of MCMC iterations
#### yhatstd:  generate predictions at one or multiple points
#### fhat: implements yhatstd but for one point, allows fixing variables so yhatstd becomes a function of fewer variables
#### GPderiv:  calculates derivative of GP at one or more points
#### fhat.prime: implements GPderiv at one point, allows fixing variables
#### fhat.opt: optimizes GP with potentially fixed variables, uses fhat and fhat.prime and line search
#### makepts: generates Qt candidate points for local variable importance
#### LVS: performs local variable selection algorithm
#### search.space: generates AEI search spaces
#### AEI: calculates AEI at one or more points
#### fAEI: implements AEI at one point, allows fixing variables
#### AEIderiv: calculates derivative of AEI at one or more points
#### fAEI.prime: implements AEIderiv at one point, allows fixing variables
#### LocalAEI.opt: performs localized AEI optimization
#### SOLID: performs SOLID algorithm for Nadd points
#### GVS: performs GVS algorithm for Nadd points
#### None: performs None algorithm for Nadd points.  Also used for Oracle but with subset of variables.

####################################
sim_initial_XY <- function( name , N, P, tau , debugg = F){
  # initial design for simulation 
  
  if(name != 'robot'){# P = 8
    XX = maximinLHS( n = N, k = P)
    XX = maximinSA_LHS( design = XX, it = 50, Imax = 50)$design
    
  } else {
    if(!debugg) these = sample( 1:nrow(XXX), 3000 ) else these = 
        sample( 1:nrow(XXX), N*2 )
    small = XXX[these ,];
    try <- cover.design( small ,   N
                         , num.nn=  40 )$best.id
    
    XX = small[try,]
  }  
  
  YY = camel(  XX, name = name, P = P ) + rnorm(N, 0, tau)
  
  return(list(XX = XX, YY = YY ))
}# end initial XY



  
colMedians <- function( x4 ){
  p = ncol(x4) ; if(is.null(p)) p = 1
  
  if( p > 1)  med = apply( x4, MARGIN = 2, FUN = 'median') else
    med = median(x4)
  return( med )
} #end function




#Fast compuation of log multivariate normal likelihood
mvn_fast <- function(Y, mu, eta, U){ 
  # U is the cholesky decomposition (upper triangular)
  l = .5*( 
    length(Y) * log(2*pi) + 
      2*sum( log( diag(U) ) ) - length(Y)*log(eta) +
      eta*
      c(crossprod(
        forwardsolve( U, Y - mu , upper = T, transpose = T)
      ))
  )
  
  return(-l)
}


#Returns Winv and cholesky of W
inv <- function( X ,gamma,ratio, giveInv = TRUE){
  stopifnot( is.numeric(ratio)) 
  K<-exp(-rdist(sweep(X,2,sqrt(gamma),"*"))^2)
  W<-ratio*K+(1-ratio)*diag(nrow(X))
  
  C    <- chol( W )
  PREC <- NULL ; if(giveInv) PREC <- chol2inv( C )
  
  return(list(
    PREC = PREC, 
    C    = C
  ))
} # end inv function 


#Updates bs terms
var_active <- function( X, Y, mu, eta, gamma, us, bs, ratio, theta , j){
  
  ###---- calculates the probability of variables being globally active
  ###---- j^th covariate
  
  cang    = gamma
  
  #-- first calculating log likelihood under gamma_j = 0 (i.e. bs[j] = 0)
  #-- the X matrix can still be full, the gamma[j] = 0 means X[,j] ignored
  cang[j] = 0 
  Uzero   = inv( X, cang, ratio, giveInv = FALSE)$C
  loglik0 = mvn_fast(Y, mu, eta, Uzero )
  
  
  #-- then calculating log likelihood under gamma_j = u_j (i.e. bs[j] = 1)
  cang[j] =  us[j]  
  Uone    =  inv(X, cang, ratio, giveInv = FALSE)$C
  loglik1 =  mvn_fast( Y, mu, eta, Uone )
  
  
  p0.p1 = exp( loglik0 + log(1-theta) - 
                 (loglik1 + log(  theta)  ) )
  
  bk1   = 1 / ( 1 + p0.p1 )
  
  
  ## updating bs[j] and gamma[j]
  bs[j]    <- rbinom( 1 , 1 ,  bk1     )
  gamma[j] <- abs(    us[j] * bs[j]    )  
  
  
  #-- updating U matrix (cholesky)
  if(bs[j] == 1 )  U = Uone
  if(bs[j] == 0 )  U = Uzero
  
  
  return(list( U = U, gamma = gamma, bs = bs, bk1 = bk1 ))
  
}#end var active


#Updates theta parameter
calc_theta <- function( bs, theta_a = 1, theta_b = 1){
  ###---- updates global probability of any variable being active
  
  p = length(bs)
  theta <- rbeta(1,
                 theta_a     + sum(bs) ,
                 theta_b + p - sum(bs) )
  
  return(theta)  
}# end calculate theta


#Update eta parameter
new_calc_eta <- function( U, Y, mu, eta_a = .1, eta_b  = .1){
  ###---- updates eta value, given sum of squares and gamma conjugate
  ###---- U is the cholesky of Xgamma
  n = length(Y)
  xprod <- crossprod( backsolve( U, Y - rep(mu, n )
                                 , upper = T, transpose =T )) 
  eta   <- rgamma(1,
                  shape= n/2 + eta_a ,
                  rate= eta_b + 0.5 * xprod 
  )
  return(eta)
}# end new calc eta


#Updates r parameter
new_sample_r <- function( Y, X, gamma, eta, mu, ratio , ra = 10, rb = 1){
  
  ##--- updates 'ratio' using metropolis sampling
  
  U <- inv(X , gamma , ratio , giveInv = FALSE)$C
  
  ### propose
  ## sampling from beta(ra, rb), does not depend on previous values
  can.ratio <- rbeta(1, 10, 1 ) 
  if(can.ratio > .99995) can.ratio = .99995
  
  #Have to calc U (cholesky) under candidate ratio
  can.U <- inv(X,gamma,can.ratio, giveInv = FALSE)$C
  
  Accept.prob <- min(1,
                     exp(
                       (mvn_fast(Y, mu, eta, can.U )-
                          mvn_fast(Y, mu, eta, U     )
                       )+ #likelihood
                         (
                           dbeta(can.ratio,ra,rb,log=TRUE)-
                             dbeta(ratio    ,ra,rb,log=TRUE) )+ # prior
                         (
                           dbeta(ratio    ,10,1,log=TRUE)-
                             dbeta(can.ratio,10,1,log=TRUE))  # candidate
                     )
  )
  
  Accept.ratio <- rbinom(1,1, prob = Accept.prob)
  
  if(Accept.ratio){
    ratio  <-  can.ratio
    U      <-  inv(X,gamma,ratio, giveInv = FALSE)$C
    
  }  #end accept 
  
  return( list( U = U, ratio = ratio ))
}# end function


#Updates beta (just mu for this application)
new_calc_beta <- function( U , Y, eta, mu_sd = 100 ){
  ##--- updates beta (mu) term, given Gibbs sampling, normal conjugate
  
  Uinv<-solve(U)
  Winv<-Uinv%*%t(Uinv)
  w   <- sum(Winv)
  
  sywi <- sum(Y%*%Winv)
  
  mu <- rnorm(1 , mean = (eta * sywi)/(mu_sd^(-2)+eta*w)
              ,   sd = sqrt(1/(mu_sd^(-2)+eta*w) ) )
  
  return(mu)
}# end new_calc_beta


#Proposal distribution for u's
propose_us_range <- function( us){
  #' creates proposal range for us values
  
  
  #Needs a minimum threshold for computation
  low<-exp(log(us)-1)
  upp<-exp(log(us)+0.25)
  
  #Prevents us going to 0
  if(log(low) < -25){
    low<-1e-10
    upp<-1
  }
  
  return( list( low =  low, upp = upp ))  
}# end propose_us_range

new_sample_us <- function( Y, X, mu, eta, bs, us, gamma, ratio, U
                           , ua = 1, ub = .1, j = 1
                           #, mul = 50  thresh = 30, maxx = 5
                           ){
  #' @param Y vector of observations
  #' @param X matrix of inputs
  #' @param mu mean term
  #' @param eta related to variance
  #' @param bs vector of 1s/0s indicating globally active/inactive
  #' @param us gamma[j] = us[j] * bs[j]
  #' @param gamma correlation parameter 
  #' @param ratio value between 0 and 1, related to noise
  #' @param U cholesky decomposition of X %*% gamma 
  #' @param ua if inactive, gamma sampled from gamma(ua, ub)
  #' @param thresh if us < thresh, then sampling from Uniform(0, maxx)
  #' @param mul if us >= thresh, then sampling from Uniform( us - mul, us + mul)
 
  ####-- proposes new 'us' values based on metropolis sampling
  ####-- uses the 'propose_us_range' to come up with lower/upper bounds
  
      rr      = propose_us_range( us[j] )
      
      can.u.low = rr$low ;  
      can.u.upp = rr$upp ;  
      
      # candidate u
      can.u <- runif(1, can.u.low , can.u.upp )
      
      # Need bounds for proposal probability of old | can.u
        rr.can<-propose_us_range(can.u)
        prop.can.low<-rr.can$low
        prop.can.upp<-rr.can$upp
        
        #Check that us[j] is in these bound, otherwise its proposal prob=0
        prop.check<- (us[j] >= prop.can.low & us[j] <= prop.can.upp)
        #prop.check=1
        
      if(prop.check){
        # Calculate W for proposal
        cang       <- gamma
        cang[j]    <- bs[j] * can.u
        Ucan       <- inv(X,cang,ratio, giveInv = FALSE)$C
        
        Accept.prob<-min(1,
                         exp(
                           #likelihood  
                           (mvn_fast(Y, mu, eta, Ucan )-mvn_fast(Y, mu, eta, U))
                             #prior
                             +(dgamma( can.u , ua, ub , log = TRUE)-dgamma( us[j] , ua, ub , log = TRUE))
                           #proposal (divides proposal prob of old|candidate by candidate|old)
                           #only works for uniform distribution
                           +(log(1/(prop.can.upp-prop.can.low))-log(1/(can.u.upp-can.u.low)))
                         )
        )
      }else{
       Accept.prob=0
      }  
      
        #For debugging
      #if(is.na(Accept.prob)){
      #  Accept.prob=0
      #  print("Accept Prob = NA")
      #  print(paste("us[",j,"]=",us[j]))
      #  print("Prop Check =",prop.check)
      #  print(paste("Prop Low=",prop.can.low))
      #  print(paste("Prop High=",prop.can.upp))
      #}
      
      Accept.ratio<- rbinom( 1, 1 , prob = Accept.prob )
      
      if(Accept.ratio){ 
        
        us[j]    <-  can.u
        gamma[j] <-  bs[j]*us[j]
        
        #Have to update U again!
        U <-  inv(X,gamma,ratio, giveInv = FALSE)$C
        
      }# end accept
      
          
          
  return(list( 
        U = U, gamma = gamma, us = us
      , can.u = can.u , Accept.prob = Accept.prob
      ))
  
}# end new sample us


#Initial MLE estimates, assumes no nugget effect
new_mle <- function( Y, X ){
  ##-- 
  ##-- code to estimate MLE terms in beginning of problem
  
  require(mlegp)
  #Starts with no nugget model
  test  <- mlegp(X,Y, nugget = NULL,verbose = 0)
  mu    <- test$mu
  gamma <- test$beta
  bs    <- as.numeric( gamma > 0 ) #Should always be 1
  us    <- gamma
  eta   <- 1/test$sig2
  ratio <- 0.99
  
  
  theta <- 0.25 #as recommended in Linkletter
  
  return(list( gamma = gamma, bs = bs, us = us, eta = eta
               , ratio = ratio, theta = theta , mu = mu ))
  
}#end new_mle


####################################################
#### GPFIT assumes constant mean
####################################################
### this function estimates the parameters of the Gaussian Process
GPfit <- function(Y,X, mu_sd = 100 
                  , eta_a      = 0.1, eta_b = 0.1## eta prior distribution
                  , theta_a    = 1, theta_b = 1  ## theta prior distribution
                  , ra         = 20,  rb    = 1  ## ratio prior distribution
                  , ua         = 1, ub = .02     ## us prior distribution
                  , iters  = 400, thin   = 5 
                  , MLE  = TRUE ## initial max for gamma_k estimates 
                  , usepast  = NULL ### usepast <- GPfit(X,Y)
                  , initial =  NULL ### set to TRUE if you want to supple own initial values in a list
                  ,varselect = TRUE ## variable selection
){
  require(fields); require(MASS); require(mlegp)
  # Bookkeeping
  #Forces X to be a matrix
  if(!is.matrix(X)) X<-as.matrix(X)
  n     <- length(Y); 
  p     <- ncol(X)
  
  ##################################
  #  Initial Values
  ##################################

  ### use past values
  if(!is.null(usepast)){
    # usepast = full
    last_iter <- usepast$iters
    ## matrices
    mu    <- usepast$mu[   last_iter, ]
    gamma <- usepast$gamma[last_iter, ] 
    bs    <- usepast$bs[   last_iter, ]  
    us    <- usepast$us[   last_iter, ]  
    # vectors ... 
    beta  <- usepast$beta[ last_iter]
    sigma <- usepast$sigma[last_iter  ]
    eta   <- usepast$eta[  last_iter  ]
    ratio <- usepast$ratio[last_iter  ] 
    theta <- usepast$theta[last_iter  ]
    
    
  } else{
    # Initial values
    
    if(!is.null(initial)){
      gamma  = initial$gamma
      bs     = initial$bs
      us     = initial$gamma
      eta    = initial$eta
      ratio  = initial$ratio
      theta  = initial$theta
      mu     = initial$mu
    }else{
    
    #Use MLEGP function
    #-----------------------------
    mle    = new_mle(Y = Y, X = X)
    #-----------------------------
    gamma  = mle$gamma
    bs     = mle$bs
    us     = mle$gamma
    eta    = mle$eta
    ratio  = mle$ratio
    theta  = 0.90
    mu     = mle$mu
    }
  } ### end continue 
  
  
  ##################################
  ## Storage
  ##################################
  keep.beta  <- matrix(0,iters, 1)
  keep.mu    <- matrix(0,iters, n)
  keep.eta   <- rep(0,iters)
  keep.gamma <- matrix(0,iters,p)
  keep.us    <- matrix(0,iters,p)
  keep.bs    <- matrix(0,iters,p)
  keep.ratio <- rep(0,iters)
  keep.theta <- rep(0,iters)
  keep.llik  <- rep(0,iters)
  tick       <- proc.time()[3]
  
  
  ################################
  ### Calculate U = chol(W)
  ################################
  U<-inv(X,gamma,ratio,giveInv=TRUE)$C
  
  
  ##################################
  # MCMC!        
  ##################################
  
  #First iteration values are the initial values
  iter              <- 1
  keep.beta[iter,]  <- mean(mu)
  keep.mu[iter,  ]  <- mu
  keep.eta[iter]    <- eta
  keep.gamma[iter,] <- gamma
  keep.us[iter,]    <- us
  keep.bs[iter,]    <- bs
  keep.ratio[iter]  <- ratio
  keep.theta[iter]  <- theta
  keep.llik[iter]   <- mvn_fast(Y,mu,eta, U) 
  
  for(iter in 2:iters){
    for(thn in 0:thin  ){
      
      ##################################
      # Sample bs  (Gibbs)
      ##################################
      if(varselect){ #otherwise bs=1 always so gamma=us
        
        for(j in sample(1:p)){# j = 1
          
          #---------------
          rr = var_active(X, Y, mu, eta, gamma, us, bs, ratio, theta, j = j)  
          #---------------
          U     = rr$U
          gamma = rr$gamma
          bs    = rr$bs  # new active status
          bk1   = rr$bk1 # probability of being active
          #print(c(round(j),round(log(initial$gamma[j]),4),round(bk1,3)))
        }# end covariate loop
        
      }# end variable selection
      
      ##################################
      # Sample us      
      ##################################    
      
      for(j in sample(1:p) ){
        
        #----------------------
        rr = new_sample_us( Y, X, mu, eta, bs, us, gamma
                            ,ratio , U = U, ua = ua, ub = ub, j = j
                            # ,thresh = thresh, maxx = maxx , mul = mul
        )
        #----------------------
        U     = rr$U  
        gamma = rr$gamma
        us    = rr$us 
        can.u = rr$can.u ## just FYI 
        Accept.prob = rr$Accept.prob ## just FYI
        
      }# loop covariates
      
      ##################################
      # Sample theta (Gibbs)
      ##################################    
      
      theta = calc_theta( bs, theta_a = theta_a, theta_b = theta_b)
      
      
      ##################################
      # Sample r (Metropolis Hastings)
      ##################################
   
      rr    = new_sample_r( Y, X, gamma, eta, mu, ratio, ra = ra, rb = rb)
      ratio = rr$ratio
      U     = rr$U 
      
      
      ##################################
      # Sample mu (Gibbs)
      ##################################
      
      mu = new_calc_beta( U = U, Y = Y, eta, mu_sd = mu_sd )
      
      ##################################
      # Sample eta (Gibbs)
      ##################################
      
      eta = new_calc_eta( U, Y, mu , eta_a = eta_a, eta_b = eta_b  )
      
      ############################
      # End thinning
      ############################
    }# end thin
    
    ################################
    # Store everything    
    ################################

    keep.beta[iter,]  <- mean(mu)
    keep.mu[iter,  ]  <- mu
    keep.eta[iter]    <- eta
    keep.gamma[iter,] <- gamma
    keep.us[iter,]    <- us
    keep.bs[iter,]    <- bs
    keep.ratio[iter]  <- ratio
    keep.theta[iter]  <- theta
    keep.llik[iter]   <- mvn_fast(Y,mu,eta, U) 
    
    ################################
    # Update Message
    ################################    
    if(iter %% 1500 == 0 ) print(
      paste("Iteration", iter,";", 100*round(iter/iters,2),"%",";",
            round((proc.time()[3] - tick)/60,2)," minutes" ))
    
    ##################################
  } # END MCMC
  ##################################
  
  
  tock <- proc.time()[3]
  
  ################################
  # Output
  ################################   
  
  list(      mu    = keep.mu,
             eta   = keep.eta,
             gamma = keep.gamma,
             iters = iters,
             minutes= (tock-tick)/60,
             us    = keep.us,
             bs    = keep.bs,
             ratio = keep.ratio,
             theta = keep.theta,
             llik  = keep.llik,
             sigma = keep.ratio / keep.eta, 
             beta  = keep.beta
  ) 
} ## end function




###################################
## MCMC Summary - to prevent repeated summary calculations
###################################
#If you supply ms then it gives vmat for each row
#summary= iteration #, "median", or "mean"
MCMCsummary<-function(Y,X,full,summary,burn=0,ms=NULL){
  n = length(Y) ; p = length(X) / n
  
  if(p == 1 ){ 
    X = matrix( X , ncol = 1 )
  }# end p = 1  
  
  if(burn > 0){
    burn<-1:burn
    full$gamma<-as.matrix(full$gamma[-burn,])
    full$eta  <-full$eta[-burn]
    full$sigma<-full$sigma[-burn]
    full$ratio<-full$ratio[-burn]
    full$mu   <-full$mu[-burn,]
  }
  
  ################## 
  ## Terms and Covariance Structure
  #################
    
    #Grabs values for specific MCMC iteration
    if(is.numeric(summary)){
      gamma = full$gamma[summary,]
      eta   = full$eta[summary]
      sig2  = full$sigma[summary]
      r     = full$ratio[summary]
      mu    = full$mu[summary,1]
      tau2  = (1-r)/eta 
    }
    
    #Grabs median values across MCMC iteration
    if(summary=="median"){
      
      if(p > 1 ){     gamma = colMedians(full$gamma)  } else
        if(p == 1){   gamma = median(full$gamma)   }
      
      eta  = median(full$eta,na.rm=T)
      r    = median(full$ratio,  na.rm = T )  
      sig2 = r/eta
      mu   = median( full$mu[,1] )
      tau2 = (1-r)/eta #need to do it this way
    }
    
    if(summary=="mean"){
      if(p > 1 ){     gamma = colMeans(full$gamma)  } else
        if(p == 1){   gamma =     mean(full$gamma)  }
      
      eta   = mean(full$eta,na.rm=T)
      r     = mean(full$ratio,  na.rm = T )
      sig2  = r/eta
      mu    = mean( full$mu[,1] )
      tau2  = (1-r)/eta #need to do it this way
    }
    
    #if ms=NULL then it just does X
    if(p>1){
      Xp = sweep( rbind(X,ms), MARGIN = 2, sqrt(gamma)  , "*")  
    }else{
      Xp = rbind(X,ms)*sqrt(gamma)
    }
    
    
    #Correlation Matrix of points in X and ms (if ms!=NULL)
    K<-exp(-rdist(Xp)^2)
    V<-1/eta*(r*K[1:n,1:n]+(1-r)*diag(n))
    Vinv<-chol2inv(chol(V))
    if(is.null(ms)){
      return(list("gamma"=gamma,"eta"=eta,"sig2"=sig2,"r"=r,"mu"=mu,"tau2"=tau2,"Vinv"=Vinv))
    }else{
      vmat<-c(sig2*K[-(1:n),1:n])
      return(list("gamma"=gamma,"eta"=eta,"sig2"=sig2,"r"=r,"mu"=mu,"tau2"=tau2,"Vinv"=Vinv,"vmat"=vmat))
    }
    
}


###################################################
### Predition and standard error variances at single point "par"
### Note: a true prediction variance adds tau^2 to the standard error variance
###################################################
### This is a funciton of variables in X that are not "fixed".  This is necessary to do localized optimization
### Let pstar=length(par) so ncols(X) = p = pstar + qstar where qstar = length(fix[[1]])
### Y, X 


###################################
### If we want to use yhatstd for localized optimization we need it to be a function of only the active variables
### This is done by defining par for only the active variables and adding
### fix  = list of [[1]] indices correspond to columns of X that we want to fix 
###               and  [[2]] the values you want to fix them to
### par could be a single point or multiple points
yhatstd <- function(par,
                    fix=NULL,
                    Y,
                    X,
                    parms, #if anything is supplied after this, performs MCMCsummary inside function
                    full=NULL, 
                    summary=NULL,
                    burn=0){
  
   n = length(Y) ; p = length(X) / n ;  
   
   #Appends fixed values to par to match number of columns of X
   #Needed to make yhatstd a function of only locally active variables
   
   if(p==1){
     X<-matrix(X,nrow=n,ncol=1,byrow=T)
   }
   
   
   if(!is.null(fix)){
     active<-setdiff(1:p,fix[[1]])
     paractive<-par
     m<-length(par)/length(active)
     #Single point prediction?
     if(m==1){
       par<-rep(NA,p)
       par[active]<-paractive
       par[fix[[1]]]<-fix[[2]] 
     }else{
       #Multiple point prediciton
       par<-matrix(NA,nrow=m,ncol=p)
       par[,active]<-paractive
       par[,fix[[1]]]<-matrix(rep(fix[[2]],m),nrow=m,ncol=length(fix[[1]]),byrow=TRUE)
     }
   }else{
     m<-length(par)/p
   }
   
  #Checks if MCMC parameter values have been supplied, otherwise it runs MCMCsummary inside this function
  if(is.null(parms)){
    #vmat is generated here
    parms<-MCMCsummary(Y,X,full,summary,burn,par)
    attach(parms)
  }else{
    attach(parms)
    #Need to calculate vmat for par
    if(p>1){
      Xp = sweep( rbind(X,par), MARGIN = 2, sqrt(gamma)  , "*")
    }else{
      Xp<-rbind(X,par)*sqrt(gamma)
    }
    
    
    #Correlation Matrix of points in X and ms (if ms!=NULL)
    if(m==1){
      K<-exp(-rdist(Xp[1:n,],t(as.matrix(Xp[-c(1:n),])))^2)  
    }else{
      K<-exp(-rdist(Xp[1:n,],Xp[-c(1:n),])^2)  
    }
    
    vmat<-sig2*K
  }
  
  pred<-c(mu+t(vmat)%*%Vinv%*%(Y-mu))
  std <-c(sig2-diag(t(vmat)%*%Vinv%*%vmat))   
  
  detach(parms)
  
  return(list("pred"=pred,"std"=std))
  
}# end yhatstd function


###################################################
## Same yhatstd but just uses one input
## Needed to run optim function
###################################################
fhat <- function(par,fix=NULL,Y,X,parms,full=NULL,summary=NULL,burn=0){
  if(!is.null(parms)){
    yhatstd(par=par,fix=fix,Y=Y,X=X,parms=parms)$pred  
  }else{
    if(!is.null(summary)){
      yhatstd(par=par,fix=fix,Y=Y,X=X,parms=NULL,full=full,summary=summary,burn=burn)$pred  
    }else{
      iters<-(1:full$iters)
      if(burn>0){
        iters<-iters[-c(1:burn)]  
      }
      #Thin to produce at most 100 iterations (for computational speed)
      iters<-floor(seq(min(iters),max(iters),length.out=min(100,length(iters))))
      preds<-lapply(iters,function(x){
        yhatstd(par=par,fix=fix,Y=Y,X=X,parms=NULL,full=full,summary=x,burn=0)$pred  
      })
      mean(unlist(preds))
    }
  }
  
}
###################################################


#################################
#################################
#### This function gives the first derivative of marginal GP (averaged over iters) at one point par
###      : requires yhatstd( ... ) formula
###  assumes constant mean and squared exponential correlation function
###  includes nugget term but only applies if design has replicated points
###  if using fix!=NULL then par only involves active factors in ascending order
GPderiv <- function(par,
                    fix=NULL,
                    Y,
                    X,
                    parms, #if anything is supplied after this, performs MCMCsummary inside function
                    full=NULL, 
                    summary=NULL,
                    burn=0){
  n = length(Y) ; p = length(X) / n
  
  if(p == 1 ){ 
    X = matrix( X , ncol = 1 )  ; 
  }# end p = 1  
  
  if(!is.null(fix)){
    active<-setdiff(1:p,fix[[1]])
    paractive<-par
    m<-length(par)/length(active)
    #Single point prediction?
    if(m==1){
      par<-rep(NA,p)
      par[active]<-paractive
      par[fix[[1]]]<-fix[[2]] 
    }else{
      #Multiple point prediciton
      par<-matrix(NA,nrow=m,ncol=p)
      par[,active]<-paractive
      par[,fix[[1]]]<-matrix(rep(fix[[2]],m),nrow=m,ncol=length(fix[[1]]),byrow=TRUE)
    }
  }else{
    m<-length(par)/p
  }
  
  
  #Checks if MCMC parameter values have been supplied, otherwise it runs MCMCsummary inside this function
  if(is.null(parms)){
    #vmat is generated here
    parms<-MCMCsummary(Y,X,full,summary,burn,par)
    attach(parms)
  }else{
    attach(parms)
    #Need to calculate vmat for par
    if(p>1){
      Xp = sweep( rbind(X,par), MARGIN = 2, sqrt(gamma)  , "*")
    }else{
      Xp=rbind(X,par)*sqrt(gamma)
    }
    
    
    #Correlation Matrix of points in X and ms (if ms!=NULL)
    if(m==1){
      K<-exp(-rdist(Xp[1:n,],t(as.matrix(Xp[-c(1:n),])))^2)  
    }else{
      K<-exp(-rdist(Xp[1:n,],Xp[-c(1:n),])^2)  
    }
    
    vmat<-sig2*K
  }
    if(m==1){
      if(p>1){
        deriv<- -2*diag(gamma)%*%apply(X,1,function(x){par-x})%*%(vmat*Vinv%*%(Y-mu))    
      }else{
        deriv<- -2*gamma*t(par-X)%*%(vmat*Vinv%*%(Y-mu))    
      }
    }else{
      deriv<-lapply(1:m,function(y){
        if(p>1){
          -2*diag(gamma)%*%apply(X,1,function(x){par[y,]-x})%*%(vmat[,y]*Vinv%*%(Y-mu))    
        }else{
          -2*gamma*t(par[y,]-X)%*%(vmat[,y]*Vinv%*%(Y-mu))    
        }
        
      })
      deriv<-matrix(unlist(deriv),nrow=p,ncol=m)
    }
    
    if(!is.null(fix)){
      if(m==1){
        deriv<-deriv[active] #active defined earlier in code  
      }else{
        deriv<-deriv[active,]
      }
      
    }
    
    detach(parms)
    
    return(list("deriv"=deriv))
  
} # end function

#Needed for optim, just grabs deriv from GPderiv
#If !is.null(fix) then par only involves the active factors
fhat.prime<-function(par,fix=NULL,Y,X,parms,full=NULL,summary=NULL,burn=0){
  if(!is.null(parms)){
    GPderiv(par,
            fix=fix,
            Y,
            X,
            parms)$deriv  
  }else{
    if(!is.null(summary)){
      GPderiv(par=par,fix=fix,Y=Y,X=X,parms=NULL,full=full,summary=summary,burn=burn)$deriv
    }else{
      iters<-(1:full$iters)
      if(burn>0){
        iters<-iters[-c(1:burn)]  
      }
      #Thin to produce 100 iterations (for computational speed)
      iters<-floor(seq(min(iters),max(iters),length.out=min(100,length(iters))))
      derivs<-lapply(iters,function(x){
        GPderiv(par,
                fix=fix,
                Y,
                X,
                parms=NULL,
                full=full,
                summary=x,
                burn=0)$deriv  
      })
      
        derivs<-matrix(unlist(derivs),nrow=length(iters),ncol=length(derivs[[1]]),byrow=TRUE)
        colMeans(derivs)  
      
    }
  }
  
  
  
}


#Max estimation with line search of 5 largest observed values
#summary can be t, "median", "mean", or NULL
#NULL gives the max of fhat as defined in the paper

fhat.opt<-function(fix,Y,X,parms,full=NULL,summary=NULL,burn=0,lb=0,ub=1){
  
  if(!is.matrix(X)) X<-as.matrix(X)
  n     <- length(Y); 
  p     <- ncol(X)
  
  if(p>1){
    starts<-X[sort(Y,decreasing=TRUE,index.return=TRUE)$ix,][1:5,]  
  }else{
    starts<-X[sort(Y,decreasing=TRUE,index.return=TRUE)$ix][1:5]  
  }
  
  if(!is.null(fix)){
    starts=starts[,-fix[[1]]]
  }
  
  if(p>1){
    maxs<-apply(starts,1,function(x){
      maxloc<-optim(par=x,fhat,gr=fhat.prime,
                    method="L-BFGS-B",lower=lb,upper=ub,
                    control=list(fnscale=-1),
                    fix=fix,
                    Y=Y,
                    X=X,
                    parms=parms,
                    full=full,
                    summary=summary,
                    burn=burn)
      return(list(maxloc$par,maxloc$value))
    }
    )
    
    #Pulls max values from line searches and finds max of them all
    #Note: if there are many maximum it will pick the first it encounters
    #which will equal the observation with the largest observed value
    maxval<-which.max(unlist(lapply(maxs,function(x){x[[2]]})))
    maxloc<-maxs[[maxval]]
    if(!is.null(fix)){
      expand.max<-rep(NA,p)
      expand.max[fix[[1]]]=fix[[2]]
      expand.max[-fix[[1]]]=maxloc[[1]]
      maxloc[[1]]=expand.max
    }
    
  }else{
    maxs<-sapply(starts,function(x){
      maxloc<-optim(par=x,fhat,gr=fhat.prime,
                    method="L-BFGS-B",lower=lb,upper=ub,
                    control=list(fnscale=-1),
                    fix=fix,
                    Y=Y,
                    X=X,
                    parms=parms,
                    full=full,
                    summary=summary,
                    burn=burn)
      return(list(maxloc$par,maxloc$value))
    }
    )
    
    #Pulls max values from line searches and finds max of them all
    #Note: if there are many maximum it will pick the first it encounters
    #which will equal the observation with the largest observed value
    maxval<-which.max(unlist(maxs[2,]))
    maxloc<-unlist(maxs[1,1])
    
  }
  
  
  
  return(maxloc)
}



#### function creates mm^2 prediction points on
### grid with specified x and y value ranges
### and adds runif points for specified dimension, p
# 4/06/17: even if p = 1 
# 3/24/17: including an option for normally distributed around MAXLOC 
# 2/02/17: random seed 
# 1/31/17: handles p > 2 dimensions
makepts <- function( maxloc = NULL , ep = .10, p,
                     mm = 30, normal = FALSE){
  require(truncnorm)
  #Option 1: Randomly generated
  if(!normal & is.null(maxloc)){
    pts<-matrix(runif(mm*p),nrow=mm,ncol=p)
  }#End Option 1
  
  #Option 2: Randomly generated about maxloc in -/+ ep rectangle
  if(!normal & !is.null(maxloc)){
        #does first coordinate
        pts<-matrix(runif(mm,
                          max(0,maxloc[1]-ep), #lower
                          min(1,maxloc[1]+ep)  #upper
                         ),nrow=mm,ncol=1
                  )
        #does others if available
        if(p>1){
          for(j in 2:p){
            pts<-cbind(pts,runif(mm,
                                 max(0,maxloc[j]-ep),
                                 min(1,maxloc[j]+ep)
                                 )
                       )  
          }
        }
    
  } #End Option 2
  
  #Option 3: Truncated normal about maxloc by ep
  
  if(normal & !is.null(maxloc)){
   pts<-matrix(rtruncnorm(mm*p,
                      mean=maxloc, 
                      sd=ep,       
                      a = 0,
                      b = 1
                ),nrow=mm,ncol=p,byrow=TRUE
    )
  }#End Option 3
  
    return( pts )
  
} # end function


#Step 7 of Algorithm 2

LVS<-function(Y,X,full,delta,rho,m,mm,burn=0){
  #delta = localized range
  #rho   = local importance cutoff
  #m     = # posterior samples to look at (after burn in)
  n<-length(Y)
  p<-length(X)/n
  
  #Step 1: choose MCMC samples by thinning, not random sample
      iters<-full$iters-burn
      if(m > iters){m=iters}
      thin<-floor(iters/m)
      test<-seq(1,iters,by=thin)
      m<-length(test)
      
  #Steps 2 - 7, calculate Rkt^2
      
    R2s<-lapply(1:m,function(x){
        t<-test[x]
        #Calculate parms
        parms<-MCMCsummary(Y,X,full,summary=t,burn=burn)
        # Step 3: Estimate max with line search
          maxloc<-fhat.opt(fix=NULL,Y=Y,X=X,parms=parms)[[1]]
          
          # Step 4: Construct prediction points
          
          Qt<-makepts(maxloc=maxloc,ep=delta,p=p,mm=mm,normal=TRUE)
          
          # Step 5: Baseline predictions
          base<-yhatstd(Qt,fix=NULL,Y=Y,X=X,parms)$pred
                  
          # Step 6-7: Alternative predictions
          alt<-matrix(NA,nrow=length(base),ncol=p)
          for(j in 1:p){
            zeroparms<-parms
            zeroparms$gamma[j]=0
            alt[,j]<-yhatstd(Qt,fix=NULL,Y=Y,X=X,parms=zeroparms)$pred
          }
          
          R2<-c(cor(alt,base))^2
          return(cbind(R2,maxloc))
        }
        )
    #Each element of R2t has one row for R2 values and another for max loc
    Stackem<-matrix(unlist(R2s),nrow=2*m,ncol=p,byrow=TRUE)
    
    #Odd rows are R2
    R2t<-Stackem[1+2*seq(0,(m-1)),]
    
    #Even rows are maxdraws
    maxdraws<-Stackem[2*seq(1,m),]
  
  #Step 8: Calculate L_k
    
    L<-1-apply(R2t,2,mean,na.rm=TRUE)
  
  #Step 9: Find set A of locally important
  
    A<-which(L>rho)
    
  #Calculate restricted search spaces
    
    
  return(list("maxdraws"=maxdraws,"R2t"=R2t,"L"=L,"A"=A))
  
}

#A2, Step 8: Search spaces
search.space<-function(maxdraws,delta,A,maxloc){
      p<-dim(maxdraws)[[2]]
      
      #For inactive variables fix to maxloc value
      inactive<-setdiff(1:p,A)
      
      #Rdelta search space (restriced in locally active)
      #first column is lower bound, second is upper bound
      Rdelta<-matrix(NA,nrow=p,ncol=2)
        #Inactive variables fixed
        Rdelta[inactive,]<-maxloc[inactive]
        #Active variables lower bound
        Rdelta[A,1]<-apply(maxdraws[,A],2,function(x){max(min(x)-delta,0)})
        #Active variables upper bound
        Rdelta[A,2]<-apply(maxdraws[,A],2,function(x){min(max(x)+delta,1)})
        
      #RA search space (unrestricted in locally active)
      RA<-Rdelta #will overwrite active variables
        RA[A,]<-cbind(rep(0,length(A)),rep(1,length(A)))
      
      return(list("Rdelta"=Rdelta,"RA"=RA))
    }


#AEI function (allow localized evaluation)
#Note: Huang formula gives minimization problem, we flip it so we take (fhat-fhat.star)

AEI <- function(par,fix=NULL, Y, X, parms, full=NULL, summary=NULL,burn=0,nu=1,fhatstar=NULL){
  #nu is the factor in determining xopt
  

  n = length(Y) ; p = length(X) / n ;  

  if(p == 1 ){ 
    X = matrix( X , ncol = 1 )  ; 
  }# end p = 1  
  
    
  if(!is.null(fix)){
    active<-setdiff(1:p,fix[[1]])
    paractive<-par
    m<-length(par)/length(active)
    #Single point prediction?
    if(m==1){
      par<-rep(NA,p)
      par[active]<-paractive
      par[fix[[1]]]<-fix[[2]] 
    }else{
      #Multiple point prediciton
      par<-matrix(NA,nrow=m,ncol=p)
      par[,active]<-paractive
      par[,fix[[1]]]<-matrix(rep(fix[[2]],m),nrow=m,ncol=length(fix[[1]]),byrow=TRUE)
    }
  }else{
    
    m<-length(par)/p
  }
  
  
  #Checks if MCMC parameter values have been supplied, otherwise it runs MCMCsummary inside this function
  if(is.null(parms)){
    #vmat is generated here
    parms<-MCMCsummary(Y,X,full,summary,burn)
  }
  
  #Find x** (page 450 of Huang et al 2006)
  # Recommend only optimizing over observed points
  # No localization here, even if fix is set so we set fix=NULL always here
  
  PredObs<-yhatstd(par=X,fix=NULL,Y=Y,X=X,parms=parms,full=full,summary=summary,burn=burn)
  xstar.index<-which.max(PredObs$pred-nu*sqrt(PredObs$std))
  xstar<-X[xstar.index,]
  f.xstar<-PredObs$pred[xstar.index]
  
  #Localization here, go back to paractive
  if(!is.null(fix)){
    Pred.par<-yhatstd(paractive,fix=fix,Y=Y,X=X,parms=parms)  
  }else{
    Pred.par<-yhatstd(par,fix=NULL,Y=Y,X=X,parms=parms)  
  }
  
  f.xstar.fx<-Pred.par$pred-f.xstar
  f.xstar.fx.std<-f.xstar.fx/sqrt(Pred.par$std)
  
  Term1<-f.xstar.fx*
               pnorm(f.xstar.fx.std)+
                 sqrt(Pred.par$std)*dnorm(f.xstar.fx.std)
  Term2<- 1-sqrt(parms$tau2)/sqrt(Pred.par$std+parms$tau2)
  
  return(list("AEI"=Term1*Term2,"fhatstar"=f.xstar))
  
}

#For optim  
fAEI<-function(par,fix=NULL, Y, X, parms, full=NULL, summary=NULL,burn=0,nu=1,fhatstar=NULL){
  AEI(par,fix=fix,Y=Y,X=X,parms=parms,full=full,summary=summary,burn=burn,nu=nu,fhatstar=fhatstar)$AEI
}

#AEI gradient
    #Only use for a single par point
AEIderiv <- function(par,fix=NULL,Y,X,parms,full=NULL,summary=NULL,burn=0,fhatstar){
   
  n = length(Y) ; p = length(X) / n ;  
  
  if(p == 1 ){ 
    X = matrix( X , ncol = 1 )  ; 
  }# end p = 1  
  
  #Checks if MCMC parameter values have been supplied, otherwise it runs MCMCsummary inside this function
  if(is.null(parms)){
    #vmat is generated here
    parms<-MCMCsummary(Y,X,full,summary,burn,par)
    attach(parms)
  }else{
    attach(parms)
    #Need to calculate vmat for par
    if(!is.null(fix)){
      active<-setdiff(1:p,fix[[1]])
      par.all<-rep(NA,p)
      par.all[active]=par
      par.all[fix[[1]]]<-fix[[2]]
    
      Xp = sweep( rbind(X,par.all), MARGIN = 2, sqrt(gamma)  , "*")
    }else{
      if(p>1){
        Xp = sweep( rbind(X,par), MARGIN = 2, sqrt(gamma)  , "*")  
      }else{
        Xp = rbind(X,par)*sqrt(gamma)
      }
      
    }
    
    #Correlation Matrix of points in X and ms (if ms!=NULL)
    K<-exp(-rdist(Xp[1:n,],t(as.matrix(Xp[(n+1),])))^2)
    vmat<-c(sig2*K)
    detach(parms)
  }
  
  ###################
  #Calc f, s2, s
  ###################
  stuff<-yhatstd(par,fix=fix,Y,X,parms=parms)
  
  f   <-stuff$pred
  s2  <-stuff$std
  s   <-sqrt(s2)
  
  ###################
  #Calc s.prime, Term 2 is derivative of s2(par)
  ###################
  attach(parms)
  if(!is.null(fix)){
    active<-setdiff(1:p,fix[[1]])
    par.all<-rep(NA,p)
    par.all[active]=par
    par.all[fix[[1]]]<-fix[[2]]
    
    s2.prime<-4*diag(gamma)%*%apply(X,1,function(x){(par.all-x)})%*%diag(vmat)%*%
    Vinv%*%vmat
    s2.prime<-s2.prime[active,]
  }else{
    if(p>1){
      s2.prime<-4*diag(gamma)%*%apply(X,1,function(x){(par-x)})%*%diag(vmat)%*%
        Vinv%*%vmat
    }else{
      s2.prime<-4*gamma%*%t(par-X)%*%diag(vmat)%*%
        Vinv%*%vmat
    }
    
  }
  detach(parms)
  s.prime<-1/2*(s2)^(-1/2)*s2.prime
  
  ###################
  #Calc u=(fhat-fhatstar)/s and du
  ###################
  u<-(f-fhatstar)/s
  fprime<-fhat.prime(par,fix=fix,Y=Y,X=X,parms)
  du<-(sqrt(s2)*fprime-(f-fhatstar)*s.prime)/s2
  
  ###################
  #Calc (1-tau2/(sqrt(s2+tau2))) and its derivative
  ###################
  attach(parms)
  AEI.Term2<-1-sqrt(tau2)/sqrt(s2+tau2)
  AEI.Term2.prime<-sqrt(tau2)/(s2+tau2) * (1/2*(s2+tau2)^(-1/2)) * s2.prime
  detach(parms)
  
  ###################
  ## Final Calculation is like product rule
  ## uses ddnorm() function
  ###################
  AEI.Term1<-(f-fhatstar)*pnorm(u)+s*dnorm(u)
  AEI.Term1.prime<-(f-fhatstar)*dnorm(u)*du+pnorm(u)*fprime+
                   s*ddnorm(u)*du+dnorm(u)*s.prime
  deriv<-AEI.Term1*(AEI.Term2.prime)+(AEI.Term2)*AEI.Term1.prime
  
  
  return(list("deriv"=deriv))
}    

fAEI.prime<-function(par,fix=NULL,Y,X,parms,full=NULL,summary=NULL,burn=0,fhatstar){
  AEIderiv(par,fix=fix,Y=Y,X=X,parms=parms,full=full,summary=summary,burn=burn,fhatstar=fhatstar)$deriv
}
    
#Localized AEI optimization
LocalAEI.opt<-function(LVSresults,delta=delta,search.pts,fix=NULL,Y,X,parms,
                       full=NULL,summary=NULL,burn=0,maxloc){
  
  if(!is.matrix(X)) X<-as.matrix(X)
  
  n<-length(Y); p<-length(X)/n
  
  
  
  #Search spaces at maxloc, depends on whether A is empty or not
  if(length(LVSresults$A)>0){
    search<-search.space(LVSresults$maxdraws,delta=delta,A=LVSresults$A,maxloc=maxloc)
    CA    <-matrix(runif(search.pts*length(LVSresults$A),0,1),nrow=search.pts,ncol=length(LVSresults$A))
    Cdelta<-apply(search$Rdelta[LVSresults$A,],1,function(x){runif(search.pts,x[1],x[2])})
    
    #Evaluates AEI at Cdelta and CA
    AEI.CA<-AEI(par=CA,fix=fix,Y=Y,X=X,parms=parms)
    max.CA<-max(AEI.CA$AEI)
    AEI.Cdelta<-AEI(par=Cdelta,fix=fix,Y=Y,X=X,parms=parms)
    max.Cdelta<-max(AEI.Cdelta$AEI)
    
    #Choose set with largest AEI value
    if(max.CA>max.Cdelta){C<-CA;AEI.choice<-AEI.CA$AEI;R<-search$RA}else{C<-Cdelta;AEI.choice<-AEI.Cdelta$AEI;R<-search$Rdelta}   
  }else{ #if all inactive, search over all variables
    CA    <-matrix(runif(search.pts*p,0,1),nrow=search.pts,ncol=p)
    AEI.CA<-AEI(par=CA,fix=fix,Y=Y,X=X,parms=parms)
    AEI.choice<-AEI.CA$AEI
    R    <-matrix(rep(0:1,p),nrow=p,ncol=2,byrow=T)
    C    <-CA
  }
  
  #Localized AEI optimization using previous set
  
    #Line search optimization in either Cdelta or CA
    #Top 5 AEI values
    if(p>1){
      starts<-C[sort(AEI.choice,decreasing=TRUE,index.return=TRUE)$ix,][1:5,]
    }else{
      starts<-C[sort(AEI.choice,decreasing=TRUE,index.return=TRUE)$ix][1:5]
    }
    
    fhatstar<-AEI.CA$fhatstar
    
    if(p>1){
      maxs<-apply(starts,1,function(x){
        maxloc<-optim(par=x,fAEI,gr=fAEI.prime,
                      method="L-BFGS-B",lower=c(R[LVSresults$A,1]),upper=c(R[LVSresults$A,2]),
                      control=list(fnscale=-1),
                      fix=fix,
                      Y=Y,
                      X=X,
                      parms=parms,
                      full=full,
                      summary=summary,
                      burn=burn,
                      fhatstar=fhatstar)
        return(list(maxloc$par,maxloc$value))
      }
      )
    }else{
      maxs<-sapply(starts,function(x){
        maxloc<-optim(par=x,fAEI,gr=fAEI.prime,
                      method="L-BFGS-B",lower=c(R[LVSresults$A,1]),upper=c(R[LVSresults$A,2]),
                      control=list(fnscale=-1),
                      fix=fix,
                      Y=Y,
                      X=X,
                      parms=parms,
                      full=full,
                      summary=summary,
                      burn=burn,
                      fhatstar=fhatstar)
        return(list(maxloc$par,maxloc$value))
      }
      )
    }
    
  
  #Pulls max values from line searches and finds max of them all
  #Note: if there are many maximum it will pick the first it encounters
  #which will equal the observation with the largest observed value
  maxval<-which.max(unlist(lapply(maxs,function(x){x[[2]]})))
  maxloc<-maxs[[maxval]]
  if(!is.null(fix)){
    expand.max<-rep(NA,p)
    expand.max[fix[[1]]]=fix[[2]]
    expand.max[-fix[[1]]]=maxloc[[1]]
    maxloc[[1]]=expand.max
  }
  
  return(list("maxloc"=maxloc[[1]],"maxval"=maxloc[[2]]))
}



#SOLID Algorithm for one iteration!
#Assumes initial Y and X already generated
#m = MCMC draws to use in LVS
SOLID<-function(Y,X,delta,rho,m,
                MCMC=list("iters"=1000,initial=NULL,"thin"=5,"burn"=100,"theta.a"=1,"theta.b"=1,
                          "ua"=1,"ub"=0.1,"eta.a"=0.1,"eta.b"=0.1,"ra"=20,"rb"=.5)
                ){
  if(!is.matrix(X)) X=as.matrix(X)
  P<-c(1:dim(X)[2])
  Pold<-c(1:dim(X)[2])
  Pnew<-NULL
  Xold<-X
  
  attach(MCMC)
  
  #Set up storage
  Results<-list()
   #--> List of globally variable selection performed
   #--> The last element is the final fit we use to maximize AEI
   #   --->Each element in this sublist includes MCMC results, current active variable list, and
   #       maxloc.  The last element also has LVSresults and AEI results.
    
    
    GVS.Stage<-1
    stop=0
    while(stop==0){
        fullfit = GPfit(Y,Xold, iters  = iters,thin=thin,
                        initial=initial
                        , varselect = TRUE ## variable selection
                        ,theta_a=theta.a,theta_b=theta.b
                        ,ua=ua,ub=ub,
                        eta_a=eta.a,eta_b=eta.b,
                        ra=ra,rb=rb
        )
        
        #Estimate maximum using multiple MCMC
        maxloc<-fhat.opt(fix=NULL,Y=Y,X=Xold,parms=NULL,full=fullfit,summary=NULL,burn=burn)[[1]]
        
        if(length(maxloc)<max(P)){ #never happens on first iteration
          maxloc.fix[Pold]<-maxloc
          maxloc<-maxloc.fix
        }
        
        Results[[paste("Stage",GVS.Stage,sep="")]]<-list("MCMC"=fullfit,"maxloc"=maxloc)
        
        #Remove variables that don't meet criterion
        #Pnew<-Pold[which(colMeans(fullfit$bs[-c(1:burn),])>c(quantile(fullfit$theta[-c(1:burn)],0.01)))]
        if(length(Pold)>1){
          Pnew<-Pold[which(colMeans(fullfit$bs[-c(1:burn),])>0.05)]  
        }else{
          Pnew=Pold
        }
        Results[[paste("Stage",GVS.Stage,sep="")]][["Pnew"]]=Pnew
        
        if(length(setdiff(Pold,Pnew))==0){
          stop=1
        }else{
          maxloc.fix<-rep(NA,max(P))
          maxloc.fix[-Pnew]=Results[[paste("Stage",GVS.Stage,sep="")]]$maxloc[-Pnew]
          GVS.Stage<-GVS.Stage+1
          Pold<-Pnew
          Xold<-X[,Pnew]
          print("Kept:")
          print(Pnew)
        }
        
        
    }#End while loop
    
    #Find locally active dimensions
    if(length(Pnew)>1){
      LVSresults<-LVS(Y,Xold,full=fullfit,delta=delta,rho=rho,m=m,mm=100,burn=burn)  
    }else{
      LVSresults<-list()
      LVSresults$A<-Pold
    }
    
    Results[[paste("Stage",GVS.Stage,sep="")]][["LVSresults"]]=LVSresults
    
    #Fix values that are not locally active, unless everything is locally inactive
    #Fix values are in terms of index of Pnew
    #First set fix to NULL, change otherwise
    fix<-NULL
    if(length(LVSresults$A)>0 & length(setdiff(1:length(Pnew),LVSresults$A))>0){
    fix<-list()
    fix[[1]]<-setdiff(1:length(Pnew),LVSresults$A)
    fix[[2]]<-maxloc[Pnew[fix[[1]]]]
    }
    
    Results[[paste("Stage",GVS.Stage,sep="")]][["fix"]]=fix
    
    #Localized optimization
    if(!is.null(fix)){
      maxloc[Pold]<-fhat.opt(fix=fix,Y=Y,X=Xold,parms=NULL,full=fullfit,summary=NULL,burn=burn)[[1]]  
    }
    
    Results[[paste("Stage",GVS.Stage,sep="")]][["maxloc"]]=maxloc
    
    #Search spaces at maxloc using median MCMC values
    parms<-MCMCsummary(Y=Y,X=Xold,full=fullfit,summary="median",burn=burn)
    if(length(Pnew)>1){
      AEI.opt<-LocalAEI.opt(LVSresults,delta=delta,search.pts=10000,fix=fix,Y=Y,X=Xold,parms=parms,maxloc=maxloc)  
    }else{
      search.pts<-10000
      CA    <-matrix(runif(search.pts*length(LVSresults$A),0,1),nrow=search.pts,ncol=length(LVSresults$A))
      AEI.CA<-AEI(par=CA,fix=fix,Y=Y,X=X,parms=parms)
      max.CA<-max(AEI.CA$AEI)
      AEI.opt<-list()
      AEI.opt$maxloc<-CA[which.max(AEI.CA$AEI)]
      AEI.opt$maxval<-max.CA
    }
    
    Results[[paste("Stage",GVS.Stage,sep="")]][["AEI.opt"]]=AEI.opt
    
    #Need original points in maxloc
    xnew<-rep(NA,max(P))
    xnew[Pnew]<-AEI.opt$maxloc
    xnew[setdiff(P,Pnew)]<-maxloc[setdiff(P,Pnew)]
    Results[[paste("Stage",GVS.Stage,sep="")]][["xnew"]]=xnew
  
    detach(MCMC)
    
    return(Results)
} #End SOLID function


GVS<-function(Y,X,
                MCMC=list("iters"=1000,initial=NULL,"thin"=5,"burn"=100,"theta.a"=1,"theta.b"=1,
                          "ua"=1,"ub"=0.1,"eta.a"=0.1,"eta.b"=0.1,"ra"=20,"rb"=.5)
){
  P<-c(1:dim(X)[2])
  Pold<-c(1:dim(X)[2])
  Pnew<-NULL
  Xold<-X
  
  attach(MCMC)
  
  #Set up storage
  Results<-list()
  #--> List of globally variable selection performed
  #--> The last element is the final fit we use to maximize AEI
  #   --->Each element in this sublist includes MCMC results, current active variable list, and
  #       maxloc.  The last element also has LVSresults and AEI results.
  
  
  GVS.Stage<-1
  stop=0
  while(stop==0){
    fullfit = GPfit(Y,Xold, iters  = iters,thin=thin,
                    initial=initial
                    , varselect = TRUE ## variable selection
                    ,theta_a=theta.a,theta_b=theta.b
                    ,ua=ua,ub=ub,
                    eta_a=eta.a,eta_b=eta.b,
                    ra=ra,rb=rb
    )
    
    #Estimate maximum using multiple MCMC
    maxloc<-fhat.opt(fix=NULL,Y=Y,X=Xold,parms=NULL,full=fullfit,summary=NULL,burn=burn)[[1]]
    
    if(length(maxloc)<max(P)){ #never happens on first iteration
      maxloc.fix[Pold]<-maxloc
      maxloc<-maxloc.fix
    }
    
    Results[[paste("Stage",GVS.Stage,sep="")]]<-list("MCMC"=fullfit,"maxloc"=maxloc)
    
    #Remove variables that don't meet criterion
    #Pnew<-Pold[which(colMeans(fullfit$bs[-c(1:burn),])>c(quantile(fullfit$theta[-c(1:burn)],0.01)))]
    Pnew<-Pold[which(colMeans(fullfit$bs[-c(1:burn),])>0.05)]
    Results[[paste("Stage",GVS.Stage,sep="")]][["Pnew"]]=Pnew
    
    if(length(setdiff(Pold,Pnew))==0){
      stop=1
    }else{
      maxloc.fix<-rep(NA,max(P))
      maxloc.fix[-Pnew]=Results[[paste("Stage",GVS.Stage,sep="")]]$maxloc[-Pnew]
      GVS.Stage<-GVS.Stage+1
      Pold<-Pnew
      Xold<-X[,Pnew]
      print("Kept:")
      print(Pnew)
    }
    
    
  }#End while loop
  
  
  #Line search optimization
  #Top 5 AEI values
  C<-matrix(runif(10000*length(Pnew)),nrow=10000,ncol=length(Pnew))
  parms<-MCMCsummary(Y=Y,X=Xold,full=fullfit,summary="median",burn=burn)
  AEIvals<-AEI(par=C,fix=NULL,Y,Xold,parms)
  fhatstar<-AEIvals$fhatstar
  
  starts<-C[sort(AEIvals$AEI,decreasing=TRUE,index.return=TRUE)$ix,][1:5,]
  
  maxs<-apply(starts,1,function(x){
    maxloc<-optim(par=x,fAEI,gr=fAEI.prime,
                  method="L-BFGS-B",lower=0,upper=1,
                  control=list(fnscale=-1),
                  fix=NULL,
                  Y=Y,
                  X=Xold,
                  parms=parms,
                  fhatstar=fhatstar)
    return(list(maxloc$par,maxloc$value))
  }
  )
  
  #Pulls max values from line searches and finds max of them all
  #Note: if there are many maximum it will pick the first it encounters
  #which will equal the observation with the largest observed value
  maxval<-which.max(unlist(lapply(maxs,function(x){x[[2]]})))
  AEI.opt<-maxs[[maxval]]
  
  Results[[paste("Stage",GVS.Stage,sep="")]][["AEI.opt"]]=AEI.opt
  
  #Need original points in maxloc1
  xnew<-rep(NA,max(P))
  xnew[Pnew]<-AEI.opt[[1]]
  xnew[setdiff(P,Pnew)]<-maxloc[setdiff(P,Pnew)]
  Results[[paste("Stage",GVS.Stage,sep="")]][["xnew"]]=xnew
  
  detach(MCMC)
  
  return(Results)
} #End GVS function


#Can use below with Oracle too, just change X
None<-function(Y,X,MCMC=list("iters"=1000,initial=NULL,"thin"=5,"burn"=100,"theta.a"=1,"theta.b"=1,
                             "ua"=1,"ub"=0.1,"eta.a"=0.1,"eta.b"=0.1,"ra"=20,"rb"=.5)){
    
    P<-1:dim(X)[2]  
  
    attach(MCMC)
    
    #Set up storage
    Results<-list()
    #--> List of globally variable selection performed
    #--> The last element is the final fit we use to maximize AEI
    #   --->Each element in this sublist includes MCMC results, current active variable list, and
    #       maxloc.  The last element also has LVSresults and AEI results.
    
    fullfit = GPfit(Y,X, iters  = iters,thin=thin,
                      initial=initial
                      , varselect = FALSE ## variable selection
                      ,theta_a=theta.a,theta_b=theta.b
                      ,ua=ua,ub=ub,
                      eta_a=eta.a,eta_b=eta.b,
                      ra=ra,rb=rb
    )
      
      #Estimate maximum using multiple MCMC
      maxloc<-fhat.opt(fix=NULL,Y=Y,X=X,parms=NULL,full=fullfit,summary=NULL,burn=burn)[[1]]
      
      Results[[paste("Stage",1,sep="")]]<-list("MCMC"=fullfit,"maxloc"=maxloc)
      
      Results[[paste("Stage",1,sep="")]][["Pnew"]]=P

      parms<-MCMCsummary(Y=Y,X=X,full=fullfit,summary="median",burn=burn)
      
      #Line search optimization
      #Top 5 AEI values
      C<-matrix(runif(10000*max(P)),nrow=10000,ncol=max(P))
      AEIvals<-AEI(par=C,fix=NULL,Y,X,parms)
      fhatstar<-AEIvals$fhatstar
      
      starts<-C[sort(AEIvals$AEI,decreasing=TRUE,index.return=TRUE)$ix,][1:5,]
      
      maxs<-apply(starts,1,function(x){
        maxloc<-optim(par=x,fAEI,gr=fAEI.prime,
                      method="L-BFGS-B",lower=0,upper=1,
                      control=list(fnscale=-1),
                      fix=NULL,
                      Y=Y,
                      X=X,
                      parms=parms,
                      fhatstar=fhatstar)
        return(list(maxloc$par,maxloc$value))
      }
      )
      
      #Pulls max values from line searches and finds max of them all
      #Note: if there are many maximum it will pick the first it encounters
      #which will equal the observation with the largest observed value
      maxval<-which.max(unlist(lapply(maxs,function(x){x[[2]]})))
      AEI.opt<-maxs[[maxval]]

    
    Results[[paste("Stage",1,sep="")]][["AEI.opt"]]=AEI.opt
    
    
    xnew<-AEI.opt[[1]]
    Results[[paste("Stage",1,sep="")]][["xnew"]]=xnew
    
    detach(MCMC)
    
    return(Results)
} #End None function














