#Packages
require(MASS)
require(fda)
require(splines)
require(caret)
require(fields)
require(nlme)
require(gglasso)
require(refund)
require(sfsmisc) 
require(plotly)
require(orthogonalsplinebasis)
require(oem)
require(rqPen)
require(parallel)


#####################################################################################
#Functions to restructure time series EMG data into functional data structure
#See section 4 in the main article, and section A of the Supplementary Materials PDF
#####################################################################################

  #Turns time series vector into matrix with rows that are overlapping windows
  Hist.cov <- function(X, delta){
    #X is vector of EMG values for one sensor
    #delta = lag
    #Returns matrix whose rows a recent past historical curves
    # observed at delta+1 time points
    obs <- length(X)-delta
    U <- matrix(NA, nrow = obs, ncol = delta+1)
    for (ii in 1 : obs){
      U[ii,] <-  X[ii : (ii + delta)] #total of delta+1 points
    }
    return(U)
  }
  
  
  
  #Standardize X matrix per column
  STND_SCALE <- function(X,sd=TRUE){
    #X is a vector, returns scaled vector
    #To be used following Hist.cov for the delta+1 time points
    if(sd){r <- (X - mean(X)) / sd(X)}
    if(sd==FALSE){r <- (X - mean(X))}
    return(r)
  }
  
  
  
  #Uses Hist.cov and STND_SCALE on matrix of k time series signals
  #Returns list with 2 elements
    #Xnew   = list of k elements, each equal to matrix of overlapping signals
    #Xmeans = list of k elements, each element is a vector of column means prior to standardization
    #         Is needed to backtransform results and predict on other data sets
  Xprep <- function(X,delta,sd=FALSE){
    #X is matrix of EMG signals
    Xnew <-list()
    K <- ncol(X)
    for(k in 1:K){
      Xnew[[k]] <- Hist.cov(X[,k],delta)  
    }
    Xmeans<-lapply(Xnew,function(x){apply(x,2,mean)})
    Xnew <- lapply(Xnew,function(x){apply(x,2,STND_SCALE,sd=sd)})
    return(list(Xnew,Xmeans))
  }
  

  
#####################################################################################
#Functions to smooth position data to generate velocity and acceleration estimates
#See section A of the Supplementary Materials PDF
#####################################################################################  
  
  #Input Y vector of original position data (referred to as zi in paper)
  #nbasis and norder are number of basis functions and order
  #lambdaFDA is the sequence of lambda values (base 10) for penalized estimation
  #sd1 and sd2 are desired derivatives to output, e.g., sd1=0 and sd2=1 returns smoothed position and velocity estimates
  #plot gives GCV plot if desired

  #Returns two smoothed vectors for sd1 and sd2, respectively
    
  Y.s <- function(Y, nbasis, norder, lambdaFDA, sd1 = 0, sd2 = 1,plot=FALSE){
    #Smooths response Y and outputs position and velocity
    
    argvals <- seq(0, 1, len = length(Y))
    
      # functional basis object
        basisobjY <- create.bspline.basis(c(0, 1), nbasis, norder = norder)                        
      # specification of basis function system
      #Penalizes 4th order derivative with lambdaFDA
        fdParobj <- fdPar(fdobj = basisobjY, Lfdobj = sd2+2)                        
      
      #fit above basis object to raw data Y
      #Finds smallest value (still works if just 1 number given)
      gcv.store<-sapply(lambdaFDA,FUN=function(x){fdParobj$lambda=10^(x);smooth.basis(argvals,Y,fdParobj)$gcv})
      if(plot){
        plot(x=lambdaFDA,y=gcv.store,type="l")  
      }
      fdParobj <- fdPar(fdobj = basisobjY, Lfdobj = sd2+2,lambda=10^lambdaFDA[which.min(gcv.store)])
      #Final smooth
      Y.smooth <- smooth.basis(argvals = argvals, y = Y, fdParobj = fdParobj)  
      
      
      #smooth function with sd1-th derivative
      Y.s1 <- t(as.matrix(eval.fd(evalarg = argvals, fdobj = Y.smooth$fd, Lfdobj = sd1)))       
      #smooth function with sd2-th derivative
      Y.s2 <- t(as.matrix(eval.fd(evalarg = argvals, fdobj = Y.smooth$fd, Lfdobj= sd2)))
    
    return(list(Y.s1 = Y.s1, Y.s2 = Y.s2))
  }
  
  
  
  #Wrapper for Y.s that also chops off the first delta values due to windowing approach
  #Outputs DY = sd2 smooth, Y = sd1 smooth, mDY = mean DY prior to centering, sDY = STD DY prior to scaling
  Yprep <- function(Y, nbasis, norder, lambdaFDA, sd1 = 0, sd2 = 1, delta,plot=FALSE){
    ## Y.prep gives velocity, positions, & its indices to be used in modeling
    
    #Smooth response
    Ys <- Y.s(Y, nbasis, norder, lambdaFDA, sd1 = sd1, sd2 = sd2,plot=plot)
    DY <- Ys$Y.s2[-c(1 : delta)]
    Y  <- Ys$Y.s1[-c(1 : delta)]
    
    #Standardize DY
    mDY <-mean(DY)
    sDY <-sd(DY)
    DY <-STND_SCALE(DY)
    
    return(list(DY = DY, Y = Y,mDY,sDY))
  }
  

  
  
  
#####################################################################################
#Functions to set up basis functions for functional predictors
#See section 4.1
#####################################################################################  
  
  Pmat.Stage1 <- function(knot1,knot2){
    #Goal: Produce basis function values and terms in penalty matrix Q
    #Generate basis functions to calculate tilde(X) (section 4) using Riemann sum
    #Calculate Omega.s, Omega.z for penalty using Riemann sum
    
    #Input:
    #s1, s2 are sequences for S and Z direction to approx integral
    #knot1 and knot2 generate desired bases
    
    #Output:
    #B1 = orthogonal spline function for s direction
    #B2 = orthogonal spline function for z direction
    #Omega1 = approximated penalty for second derivative wrt s
    #Omega2 = approximed penalty for second derivative wrt z
    
    B1 <- OrthogonalSplineBasis(knots=knot1)
    
    B2 <- OrthogonalSplineBasis(knots=knot2)
    
    Omega1 <-OuterProdSecondDerivative(B1)
    
    Omega2 = OuterProdSecondDerivative(B2)
    
    return(list("B1" = B1, "B2" = B2, "Omega1" = Omega1, "Omega2" = Omega2))
  }
  
  #Takes kronecker product to construct Xtilde matrices
  kron.pair <-function(X,Y){
    Xtilde<-do.call(rbind, lapply(1:nrow(X), function(i) kronecker(X[i,], Y[i,])))
    return(Xtilde)
  }

#####################################################################################
# Wrapper function to set up analysis for a given movement data set
# movement="H" or "W" and nm=1,2,3,4,5,6 where 1,2,3 (4,5,6) correspond to fixed (random) movement
# Corresponds to how data are stored in data_Hand16.Rdata and data_Wrist16.Rdata
#####################################################################################    

data.process<-function(movement,
                       set=1,
                       delta=40,
                       sd=FALSE,
                       thin,
                       L,
                       M,
                       plot=TRUE,
                       sd1=0,
                       sd2=1,
                       nbasis=2000,
                       norder=6,
                       lambdaFDA=10^-12){
  
  if(movement=="H"){load("data_hand16.Rdata")}
  if(movement=="W"){load("data_wrist16.Rdata")}
  
  #Movement data
    #Column 1 in Angle0 is time which we ignore
    W <- Angle0[[set]][,2]  #Wrist joint angles     
    H <- Angle0[[set]][,3]  #Hand joint angles
  
  #EMG data
    EMG00 <- EMG0[[set]][, -1] #-1 removes the time column
  
  #If interested in exploring concurrent correlations of EMG signals
  #See Figure 4 in article
    Lag0cors<-cor(EMG00)
  
  #Creates list of the K EMG signals,uncentered
    Xprep <- Xprep(EMG00,delta,sd)
    X.all <- Xprep[[1]]
    Xmeans <- Xprep[[2]]
  
  #Prepare response
  YY <- Yprep(Y = eval(parse(text=movement)), nbasis = nbasis, norder = norder, lambdaFDA = lambdaFDA, sd1 = sd1, sd2 = sd2, delta=delta,plot=plot)
    Y.all <- YY$DY  #Velocity(sd2=1)/Acceleration(sd2=2) is response of interest
    Z.all <- YY$Y   #Position is covariate of interest 
    mDY   <- YY[[3]]
    sDY   <- YY[[4]]
  
  if(plot){
    plot(x=Z.all,y=eval(parse(text=movement))[-c(1:(delta))],
         xlab="Est Position",ylab="True Position")
    plot(Y.all,type="l",col=1,ylab="Velocity (Black)")
    lines(STND_SCALE(eval(parse(text=movement))[-c(1:(delta+1))]),col=2)
    lines(STND_SCALE(Z.all),col=4)
  }
  
  #Subset data if interested, otherwise set thin=1
    subseq<-seq(1,length(Y.all),by=thin)
    X <- lapply(X.all,function(X){X[subseq,]})
    Y <- Y.all[subseq]
    Z <- Z.all[subseq]

  if(plot){points(x=subseq,y=Y,col=3,pch=1)}
  
  if(plot){
    par(mfrow=c(1,1))
    plot.seq<-(1:300)+80
    matplot(EMG00[plot.seq,],type="l",
            ylab="Normalized EMG",
            xlab="Second",
            axes=FALSE,ylim=c(0,1),
            col=rep(1:4,4),
            lty=c(1:4,c(2,3,4,1),c(3,4,1,2),c(4,1,2,3)),
            lwd=2)
    axis(1,at=seq(0,300,by=60),labels=c("0.0","0.5","1.0","1.5","2.0","2.5"))
    axis(2,at=seq(0,1.0,by=0.2))
    legend("top",paste("EMG",1:16),lwd=2,col=rep(1:4,4),
           lty=c(1:4,c(2,3,4,1),c(3,4,1,2),c(4,1,2,3)),ncol=8,
           cex=0.5)
  }
  
  #Basis functions for functional covariates
    knot1<- c(0,0,0,seq(0,1,length=L-2),1,1,1) 
    knot2<- c(0,0,0,seq(0,1,length=M-2),1,1,1) 
  
  #The above knots give orthogonality for magnitude penalty matrix
    P1  <- Pmat.Stage1(knot1=knot1,knot2=knot2)
    B1  <- evaluate(P1$B1,seq(0,1,length=delta+1))
    B2  <- evaluate(P1$B2,(Z-min(Z))/(max(Z)-min(Z)))
    Omega1<- P1$Omega1
    Omega2<- P1$Omega2
  
  #X transformations
    Xomega<-lapply(lapply(X,t),crossprod,y=(1/delta)*B1)
    Xtilde<-lapply(Xomega,kron.pair,Y=B2)
  
  #return
  return(list(X=X,Y=Y,Z=Z,P1=P1,B1=B1,B2=B2,
              Omega1=Omega1,Omega2=Omega2,Xomega=Xomega,Xtilde=Xtilde
              ,Xmeans=Xmeans,mDY=mDY,sDY=sDY,Lag0cors=Lag0cors
              ))
}
  

#################################################################
# Smoothness only fitting for final stage of SAFE(z)
# or potentially a way to generate initial weights
# For this application, we do not recommend Ridge initial weights
#################################################################
  
  #X=Xtilde or Xomega
  #Y=smoothed velocity or acceleration response
  #lambda = penalty on squared magnitude if desirable (not recommended), keep at 0
  #phi1   = penalty for s direction
  #phi2   = penalty for z direction (set to 0 if using Xomega)
  #nblocks = number of CV folds for block cross validation
  #pos.effect = The input for pos.effect is a matrix of basis functions, e.g. B2 output from Pmat.Stage1
  
  RidgeReg.CV <- function(X,Y,Omega1,Omega2,lambda=0,phi1=1,phi2=1,nblocks=1,pos.effect=NA){
    
    L <- nrow(Omega1)
    IL <- diag(1,L)
    M<-1;IM<-1
    if(min(phi2)>0){
      M <- nrow(Omega2)
      IM <- diag(1,M)  
    }
    
    if(length(X)>1){
      W<-do.call(cbind,X); K<-length(X)
    }else{
      W<-X[[1]]; K<-1
    }
    I<-diag(rep(1,L*M*K))
    alls <- kronecker(diag(1,K),kronecker(Omega1,IM))
    #alls<-cbind(0,rbind(0,alls))
    if(min(phi2)>0){ 
      allz <- kronecker(diag(1,K),kronecker(IL,Omega2))
      #allz<-cbind(0,rbind(0,allz))
    }
    
    if(length(pos.effect)>1){
      W<-cbind(W,pos.effect)
      I<-bdiag(I,diag(1,dim(Omega2)[1]))
      alls <- bdiag(alls,Omega2)
      if(min(phi2)>0){
        allz <- bdiag(allz,Omega2*0)  
      }
    }
    
    #################
    ##   Block CV  ##
    #################
    b.size <- floor(length(Y)/nblocks)
    blocks <- list()
    for(i in 1:nblocks){blocks[[i]]=(1+(i-1)*b.size):(i*b.size)}
    
    #Make predictions
    if(min(phi2)>0){
      phis<-expand.grid(lambda,phi1,phi2)
      pred<-matrix(NA,nrow=dim(phis)[1],ncol=nblocks)  
    }else{
      phis<-expand.grid(lambda,phi1)
      pred<-matrix(NA,nrow=length(phi1),ncol=nblocks)
    }
    phis=cbind(phis,NA)  
    
    for(i in 1:nrow(phis)){
      if(min(phi2)>0){
        sol<-lapply(blocks,function(x){solve(crossprod(W[-x,])+phis[i,1]*I+phis[i,2]*alls+phis[i,3]*allz,t(W[-x,])%*%Y[-x])})
        pred[i,]<-unlist(lapply(1:nblocks,function(x){mean((Y[blocks[[x]]]-W[blocks[[x]],]%*%sol[[x]])^2)}))  
        sol<-solve(crossprod(W)+phis[i,1]*I+phis[i,2]*alls+phis[i,3]*allz,t(W)%*%Y)
        phis[i,4]<-t(sol)%*%(I+alls+allz)%*%sol
      }else{
        sol<-lapply(blocks,function(x){solve(crossprod(W[-x,])+phis[i,1]*I+phis[i,2]*alls,t(W[-x,])%*%Y[-x])})
        pred[i,]<-unlist(lapply(1:nblocks,function(x){mean((Y[blocks[[x]]]-W[blocks[[x]],]%*%sol[[x]])^2)}))
        sol<-solve(crossprod(W)+phis[i,1]*I+phis[i,2]*alls,t(W)%*%Y)
        phis[i,3]<-t(sol)%*%(I+alls)%*%sol
      }
      print(i)
    }
    
    #3 column summary of CV mean, SE, and CVM+SE
    summary<-cbind(apply(pred,1,mean),apply(pred,1,sd))
    summary<-cbind(summary,summary[,1]+summary[,2])
    OPT.min<-which.min(apply(pred,1,mean))
    #Finds smallest mean and grabs its CVM+SE
    OPT.1se<-summary[which.min(apply(pred,1,mean)),3]
    #Finds largest combo of parameters with CVmean less than CVM+SE
    Cands<-which(summary[,1]<OPT.1se)
    #Choose one with smallest penalty
    OPT.1se<-Cands[which.min(phis[Cands,ifelse(min(phi2)>0,4,3)])]
    
    if(min(phi2)>0){
      #Computes solution using these phis
      sol<-solve(crossprod(W) + phis[OPT.1se,1]*I+phis[OPT.1se,2]*alls + phis[OPT.1se,3]*allz,t(W)%*%Y)  
    }else{
      sol<-solve(crossprod(W) + phis[OPT.1se,1]*I+phis[OPT.1se,2]*alls,t(W)%*%Y)  
    }
    
    #EMG coefficients
    sol.mat<-matrix(sol[1:(K*L*M)],nrow=K,ncol=L*M,byrow=TRUE)
    sol.pos<-sol[-c(1:(K*L*M))]
    
    #Adaptive Weights
    f <- sqrt(diag(sol.mat%*%t(sol.mat)))
    g <- sqrt(abs(diag(sol.mat%*%kronecker(Omega1,IM)%*%t(sol.mat))))
    if(min(phi2)>0) h <- sqrt(diag(sol.mat%*%kronecker(IL,Omega2)%*%t(sol.mat)))
    

    f<-1/f 
    g<-1/g 
    if(min(phi2)>0) h<-1/h #so smallest weight = 1
    #}
    if(min(phi2)>0){
      if(length(pos.effect)>1){
        return(list(f,g,h,sol.mat,phis[OPT.1se,],summary,sol.pos))  
      }else{
        return(list(f,g,h,sol.mat,phis[OPT.1se,],summary))
      }
    }else{
      if(length(pos.effect)>1){
        return(list(f,g,sol.mat,phis[OPT.1se,],summary,sol.pos))
      }else{
        return(list(f,g,sol.mat,phis[OPT.1se,],summary))  
      }
    }
  }
  
  
  
#####################################################################################
# Calculates Q and R matrices for adaptive weights (weights=1 gives unadapted for)
# See sections 4.2 and 5.1
  
# X here will be Xtilde if SAFE(z), Xomega for competitors
# WT is a list of 3 elements for SAFE(z) and 2 elements for competitors
       #First element = vector of fk weights
       #Second element = vector of gk weights
       #Third element = vector of hk weights
# Omega1 and Omega2 from data.process function
# phi1 = penalty for s second derivative
# phi2 = penalty for z second derivative, if equal to 0 it assumes no penalty is applied
         #and ignores WT[[3]] if it exists
# pos.effect is for the competitors to SAFE having the form alpha(zi), see section 6
#    The input for pos.effect is a matrix of basis functions, e.g. B2 output from Pmat.Stage1
#####################################################################################    
  
  Pmat.Stage2_Adap <- function(X,WT,Omega1, Omega2, phi1, phi2,pos.effect=NA){
    K<-length(X)
    W<-vector("list",K+ifelse(length(pos.effect)>1,1,0))
    
    L <- nrow(Omega1)
      IL <- diag(1,L)
    M<-1;IM<-1
    if(phi2>0){
    M <- nrow(Omega2)
      IM <- diag(1,M)
    }

    #Reparms for EMG
    f <- WT[[1]]; g <- WT[[2]]; if(phi2 >0){h <- WT[[3]]}else{h<-rep(0,K)}
    if(phi2==0){
      Q<-lapply(1:K,function(x){f[x]*kronecker(IL,IM)+g[x]*phi1*kronecker(Omega1,IM)+h[x]*phi2*kronecker(IL,1)})  
    }else{
      Q<-lapply(1:K,function(x){f[x]*kronecker(IL,IM)+g[x]*phi1*kronecker(Omega1,IM)+h[x]*phi2*kronecker(IL,Omega2)})  
    }
    
    R<-lapply(Q,chol)
    iR<-lapply(R,backsolve,x=diag(1,L*M))
    W[1:K]<- lapply(1:K,function(x){X[[x]]%*%iR[[x]]})
    
    #Reparms for position effect
    if(length(pos.effect)>1){
      f2<-WT[[4]]; g2<-WT[[5]] 
      M <- nrow(Omega2)
      IM <- diag(1,M)
      Q<-f2*IM+g2*phi1*Omega2
      R<-chol(Q)
      iR.pos<-backsolve(R,x=IM)
      W[[K+1]]<-pos.effect%*%iR.pos
    }
    if(length(pos.effect)>1){
      return(list(W,iR,iR.pos))  
    }else{
      return(list(W,iR))
    }
    
  }

  

#####################################################################################
# SAFE(z) functions
#####################################################################################        
  
  #Wrapper function for multiple stages
  # X = Xtilde
  # Y = Y from data.process (usually velocity or acceleration)
  # Omega1, Omega2, phi1, phi2, WT explained in Pmat.Stage2_Adap
  # nblocks is number of folds in block CV
  # lseq = lambda sequence
  # nstage = number of stages for SAFE
  # plot=TRUE gives plots as function runs
  # cvmult = allows modification of standard error rule (1 = 1 x SE, 2 = 2 x SE)
  SAFEglasso <- function(X,Y,Omega1,Omega2,phi1,phi2,WT,nblocks,lseq,nstage=NA,plot=TRUE,cvmult=1){
    
    #Needed values
    K<-1:length(X)
    L <- nrow(Omega1)
    M <- nrow(Omega2)
    
    #Set up CV blocks
    b.size <- floor(length(Y)/nblocks)
    blocks <- sort(rep(1:nblocks,b.size)) #to be used in foldid later
    
    #First iteration
    Stage<-list()
    Stage[[1]]<-glasso.step(X=X,Y=Y,K=K,WT=WT,
                            phi1=phi1,phi2=phi2,
                            blocks=blocks,
                            lseq=lseq,
                            Omega1=Omega1,Omega2=Omega2,plot=plot,cvmult=cvmult)  
    print("Stage 1")
    output<-round(data.frame(do.call(rbind,Stage[[1]]$WTsub)),4)
    colnames(output)=Stage[[1]]$Ksub
    rownames(output)=c("L2","L2.D1","L2.D2")
    print(output)
    finish<-0
    stage<-1
    while(finish==0){
      stage<-stage+1
      Knext<-Stage[[stage-1]]$Ksub
      Stage[[stage]]=glasso.step(X=X[Knext],
                                 Y=Y,
                                 K=Knext,
                                 WT=Stage[[stage-1]]$WTsub,
                                 phi1=phi1,phi2=phi2,
                                 blocks=blocks,
                                 lseq=lseq,
                                 Omega1=Omega1,Omega2=Omega2,plot=plot,cvmult=cvmult)
      if(is.na(nstage) & identical(Knext,Stage[[stage]]$Ksub)){finish=1}
      if(stage==nstage){finish=1}#if nstage=NA this will never be true
      print(paste("Stage",stage))
      output<-round(data.frame(do.call(rbind,Stage[[stage]]$WTsub)),4)
      colnames(output)=Stage[[stage]]$Ksub
      rownames(output)=c("L2","L2.D1","L2.D2")
      print(output)
    }
    
    ################################
    #### FINAL STAGE
    ################################
    
    #Ridge solution using only variable set and CV
    FinalSol<-RidgeReg.CV(X=X[Stage[[stage]]$Ksub],
                          Y=Y,
                          Omega1=Omega1,Omega2=Omega2,
                          phi1=phi1,phi2=phi2,nblocks=nblocks)
    output <- list("Stages"=Stage,"FinalSol"=FinalSol)
    return(output)
  }
  
  #Single stage function for SAFE
  #See input description from SAFEglasso function
  
  #Returns CV table for all combinations of lambda,phi1,and phi2
  #solution matrix for optimal combination with CV rule (see Section 4.3)
  glasso.step<-function(X,Y,K,WT,phi1,phi2,blocks,lseq=lseq,Omega1=Omega1,Omega2=Omega2,
                        plot=TRUE,cvmult=1){
    
    L<-dim(Omega1)[1]
    M<-dim(Omega2)[1]
    
    Tune<-expand.grid(lseq,phi2,phi1)[,3:1]
    
    groups<-sort(rep(1:length(K),L*M))
    
    #Run cv.gglasso for each phi1,phi2
    Tune<-cbind(Tune,0,0,0,0,0) 
    colnames(Tune)=c("Phi1","Phi2","Lambda","CVM","CVSE","CVM1SE","DF","Penalty")
    #fifth column stores average predict error; sixth is average model size
    #Columns 1, 2, and 3 are phi1,phi2, and lambda values
    #Columns 4 and 5 are cvmean and cvse
    #Column  6 is cvmean+cvse*cvmult
    #Column  7 is number of sig covariates for full fit
    #Column  8 is measure of model complexity for full fit
    iter<-1
    est=list()
    for(i in phi1){
      for(j in phi2){
        sol<-cv.gglasso(X=X,Y=Y,group=groups,blocks=blocks,intercept=FALSE,
                        lseq=lseq,
                        Omega1=Omega1,Omega2=Omega2,phi1=i,phi2=j,WT=WT,plot=plot,cvmult=cvmult)
        indx<-which(Tune[,1]==i & Tune[,2]==j)
        Tune[indx,4:5]=cbind(sol$cvm,sol$cvsd)
        Tune[indx,6]<-Tune[indx,4]+cvmult*Tune[indx,5]
        Tune[indx,7]<-unlist(sol$df)
        print(iter)
        print(unlist(sol$df)[which.min(sol$cvm)])
        
        est[[iter]]= sol$gglasso.fit$beta
        #estimates are in backwards order for lseq
        P=diag(t(est[[iter]])%*%(diag(1,L*M*length(K))+kronecker(diag(1,length(K)),kronecker(Omega1,diag(1,M)))+kronecker(diag(1,L*length(K)),Omega2))%*%est[[iter]])
        Tune[indx,8]=P[length(P):1]
        
        iter<-iter+1
      }
    }    
    OPT.min<-which.min(Tune[,4])
    OPT.1se<-Tune[OPT.min,6]
    Cands<-which(Tune[,4]<=OPT.1se)
    
    #Choose model with smallest complexity
    OptCand=Cands[which.min(Tune[Cands,8])]
    
    #Optimal tuning parameters
    OPT.1se<-as.numeric(Tune[OptCand,1:3])
    est.OPT<-est[[floor(OptCand/length(lseq))+1]][,length(lseq)-OptCand%%length(lseq)+1]
    
    #Final Fit
    W <- do.call(cbind,Pmat.Stage2_Adap(X=X,WT=WT,Omega1=Omega1,Omega2=Omega2,phi1=OPT.1se[1],phi2=OPT.1se[2])[[1]])
    iRs <-bdiag(Pmat.Stage2_Adap(X=X,WT=WT,Omega1=Omega1,Omega2=Omega2,phi1=OPT.1se[1],phi2=OPT.1se[2])[[2]])
    
    sol.fit<-iRs%*%est.OPT
    solmat<-matrix(sol.fit,nrow=length(K),ncol=L*M,byrow=TRUE)
    
    #Find important variables
    Knext<-K[which(diag(crossprod(t(solmat)))>0)]
    #Subset design matrix and solution matrix
    Xnext <- X[Knext]
    solmat<-solmat[which(diag(crossprod(t(solmat)))>0),]
    if(length(Knext)==1){solmat<-t(as.matrix(solmat))}
    
    #Adaptive Weights
    WTnext<-list()
    f <- sqrt(diag(crossprod(t(solmat))))
    WTnext[[1]]=1/f
    g <- sqrt(abs(diag(solmat%*%kronecker(Omega1,diag(1,M))%*%t(solmat))))
    if(isTRUE(all.equal(max(g),0))){
      WTnext[[2]]=rep(max(phi1),length(Knext))
    }else{
      WTnext[[2]]<-1/g
      WTnext[[2]][which(is.infinite(WTnext[[2]]) | is.nan(WTnext[[2]]))]=max(phi1)
      for(i in 1:length(Knext)){
        WTnext[[2]][i]=min(WTnext[[2]][i],max(phi1))
      }
    }
    
    h <- sqrt(abs(diag(solmat%*%kronecker(diag(1,L),Omega2)%*%t(solmat))))
    if(isTRUE(all.equal(max(h),0))){
      WTnext[[3]]=rep(max(phi2),length(Knext))
    }else{
      WTnext[[3]]<-1/h
      WTnext[[3]][which(is.infinite(WTnext[[3]]) | is.nan(WTnext[[3]]))]=max(phi2)
      for(i in 1:length(Knext)){
        WTnext[[3]][i]=min(WTnext[[3]][i],max(phi2))
      }
    }
    return(list("Tune"=Tune,"solmat"=solmat,"Ksub"=Knext,"WTsub"=WTnext))
  }
  
  
  
  
  #CV function for SAFE
  # performs cross-validation for lseq for fixed values of phi1 and phi2
  # returns table described in glasso.step
  # potentially parallelizable with mclapply, but default cores set to 1
  cv.gglasso<- function(X,Y,group,blocks,intercept,lseq,Omega1,Omega2,phi1,phi2,WT,plot=TRUE,cvmult=1){
    L<-dim(Omega1)[1]
    M<-dim(Omega2)[1]
    blocks2<-lapply(1:max(blocks),function(x){which(blocks==x)})
    
    W<-do.call(cbind,Pmat.Stage2_Adap(X=X,WT=WT,Omega1=Omega1,Omega2=Omega2,phi1=phi1,phi2=phi2)[[1]])
    
    CV<-mclapply(blocks2,function(x){
      fit<-gglasso(x=W[-x,],
                   y=Y[-x],
                   group=group,
                   intercept=FALSE,
                   lambda=lseq)
      sq.resid<-apply(fit$beta,2,function(y){(Y[x]-W[x,]%*%y)^2})
      return(sq.resid)
    },mc.cores=1)
    sol<-list()
    cvraw<-do.call(rbind,CV)
    #Calculate within fold means
    FoldMeans<-do.call(rbind,lapply(blocks2,function(x){apply(cvraw[x,],2,mean)}))
    #Overall mean
    sol$cvm<-apply(FoldMeans,2,mean)
    sol$cvm<-sol$cvm[(length(lseq):1)]
    #Calculate standard error using fold means
    sol$cvsd<-apply(FoldMeans,2,sd)/sqrt(length(blocks2))	
    sol$cvsd<-sol$cvsd[(length(lseq):1)]
    sol$lambda<-lseq
    sol$lambda.min<-lseq[which.min(sol$cvm)]
    #Full Fit
    sol$gglasso.fit<-gglasso(x=W,y=Y,group=group,intercept=FALSE,lambda=lseq)
    sol$df<-sol$gglasso.fit$df[(length(lseq):1)]/(L*M)
    
    if(plot){
      plot(log(sol$lambda),sol$cvm,ylim=c(0,(max(sol$cvm+sol$cvsd))),
           xlab="Log(Lambda)",ylab="CV Mean",main=paste("Log(Phi1)=",log(phi1),"Log(Phi2)=",log(phi2)))
      lines(log(sol$lambda),y=(sol$cvm+sol$cvsd),col=2)
      lines(log(sol$lambda),y=(sol$cvm-sol$cvsd),col=2)
    }
    return(sol)
  }
  
  
  

#####################################################################################
# AGL functions
#  Function inputs similar to those for SAFE(z), see them for details
#####################################################################################        

  aglasso<-function(X,Y,Omega1,Omega2=1,phi1,WT,nblocks,lseq,nstage,plot=TRUE,cvmult=1,pos.effect=NA){
    #Needed values
    K<-1:length(X)     #number of EMG
    L <- nrow(Omega1)  #number of basis per EMG
    
    #Set up CV blocks
    b.size <- floor(length(Y)/nblocks)
    blocks <- sort(rep(1:nblocks,b.size)) #to be used in foldid later
    
    #First iteration
    Stage<-list()
    Stage[[1]]<-aglasso.step(X=X,Y=Y,K=K,WT=WT,
                             phi1=phi1,
                             blocks=blocks,
                             lseq=lseq,
                             Omega1=Omega1,Omega2=Omega2,plot=plot,cvmult=cvmult,
                             pos.effect=pos.effect)  
    print("Stage 1")
    output<-round(data.frame(do.call(rbind,Stage[[1]]$WTsub[1:2])),4)
    colnames(output)=Stage[[1]]$Ksub
    rownames(output)=c("L2","L2.D1")
    print(output)
    if(length(pos.effect)>1){
      output<-t(round(do.call(rbind,Stage[[1]]$WTsub[4:5]),4))
      colnames(output)=c("Pos L2","Pos L2.D1")
      print(output)
    }
    finish<-0
    stage<-1
    while(finish==0){
      stage<-stage+1
      Knext<-Stage[[stage-1]]$Ksub
      Stage[[stage]]=aglasso.step(X=X[Knext],
                                  Y=Y,
                                  K=Knext,
                                  WT=Stage[[stage-1]]$WTsub,
                                  phi1=phi1,
                                  blocks=blocks,
                                  lseq=lseq,
                                  Omega1=Omega1,Omega2=Omega2,plot=plot,cvmult=cvmult,
                                  pos.effect=Stage[[stage-1]]$pos.effect,
                                  cv.base=Stage[[stage-1]]$cv.base)
      if(is.na(nstage) & identical(Knext,Stage[[stage]]$Ksub)){finish=1}
      if(stage==nstage){finish=1}#if nstage=NA this will never be true
      if(length(Stage[[stage]]$solmat)==1){finish=1}
      print(paste("Stage",stage))
      output<-round(data.frame(do.call(rbind,Stage[[stage]]$WTsub[1:2])),4)
      colnames(output)=Stage[[stage]]$Ksub
      rownames(output)=c("L2","L2.D1")
      print(output)
      if(length(Stage[[stage]]$pos.effect)>1){
        output<-t(round(do.call(rbind,Stage[[stage]]$WTsub[4:5]),4))
        colnames(output)=c("Pos L2","Pos L2.D1")
        print(output)
      }
    }
    
    ################################
    #### FINAL STAGE
    ################################
    
    #Ridge solution using only variable set and CV
    FinalSol<-RidgeReg.CV(X=X[Stage[[stage]]$Ksub],
                          Y=Y,
                          Omega1=Omega1,Omega2=Omega2,
                          phi1=phi1,phi2=0,nblocks=nblocks,
                          pos.effect=Stage[[stage]]$pos.effect)
    output <- list("Stages"=Stage,"FinalSol"=FinalSol)
    return(output)
  }

  
  aglasso.step<-function(X,Y,K,WT,phi1,blocks,lseq=lseq,Omega1=Omega1,Omega2=1,plot=TRUE,cvmult=1,pos.effect=NA,cv.base=NA){
    
    L<-dim(Omega1)[1]
    
    Tune<-expand.grid(lseq,phi1)[,2:1]
    
    groups<-sort(rep(1:length(K),L))
    if(length(pos.effect)>1){groups<-c(groups,rep(length(K)+1,dim(Omega2)[1]))}
    
    iter<-1
    est=list()
    #Run cv.gglasso for each phi1
    Tune<-cbind(Tune,0,0,0,0,0) 
    colnames(Tune)=c("Phi1","Lambda","CVM","CVSE","CVM1SE","DF","Penalty")
    #Columns 1 and 2 are phi1 and lambda values
    #Columns 3 and 4 are cvmean and cvse
    #Column  5 is cvmean+cvse*cvmult
    #Column  6 is model size for full data
    #Column  7 is penalty
    for(i in phi1){
      sol<-cv.aglasso(X=X,Y=Y,group=groups,blocks=blocks,intercept=FALSE,
                      lseq=lseq,
                      Omega1=Omega1,Omega2=Omega2,phi1=i,WT=WT,plot=plot,cvmult=cvmult,pos.effect=pos.effect)
      indx<-which(Tune[,1]==i)
      Tune[indx,3:4]=cbind(sol$cvm,sol$cvsd)
      Tune[indx,5]<-Tune[indx,3]+cvmult*Tune[indx,4]
      Tune[indx,6]<-unlist(sol$df)
      print(iter)
      print(unlist(sol$df)[which.min(sol$cvm)])
      
      est[[iter]]= sol$gglasso.fit$beta
      #Calculate penalties for each estimate
      if(length(pos.effect)>1){
        P<-diag(t(est[[iter]])%*%(diag(1,nrow(est[[iter]]))+bdiag(kronecker(diag(1,length(K)),Omega1),Omega2))%*%est[[iter]])
      }else{
        P<-diag(t(est[[iter]])%*%(diag(1,nrow(est[[iter]]))+kronecker(diag(1,length(K)),Omega1))%*%est[[iter]])
      }
      Tune[indx,7]=P[length(lseq):1]
      iter<-iter+1
    }    
    OPT.min<-which.min(Tune[,5])
    #Check if OPT.min is smaller than baseline cv mean
    if(length(cv.base)==1 | (length(cv.base)>1 & Tune[OPT.min,5]<=(cv.base[1]+cv.base[2]))){
      cv.base<-as.numeric(Tune[OPT.min,3:4])
      #I let OPT.1se take on two different purposes
      OPT.1se<-Tune[OPT.min,5]
      Cands<-which(Tune[,3]<=OPT.1se) 
      
      OptCand=Cands[which.min(Tune[Cands,7])]
      OPT.1se<-as.numeric(Tune[OptCand,1:2]) #Smallest penalty
      
      #Final Fit
      #if(smooth.adj==FALSE){
      W <- do.call(cbind,Pmat.Stage2_Adap(X=X,WT=WT,Omega1=Omega1,Omega2=Omega2,phi1=OPT.1se[1],phi2=0,pos.effect=pos.effect)[[1]])
      sol<-gglasso(x=W,y=Y,group=groups,lambda=OPT.1se[2],intercept=FALSE)
      #Transform back to original scale
      iRs <-bdiag(Pmat.Stage2_Adap(X=X,WT=WT,Omega1=Omega1,Omega2=Omega2,phi1=OPT.1se[1],phi2=0,pos.effect=pos.effect)[[2]])
      if(length(pos.effect)>1){
        iRs<-bdiag(iRs,Pmat.Stage2_Adap(X=X,WT=WT,Omega1=Omega1,Omega2=Omega2,phi1=OPT.1se[1],phi2=0,pos.effect=pos.effect)[[3]])  
      }
      
      #}
      
      sol.fit<-iRs%*%sol$beta
      
      
      #EMG coefficients
      solmat<-matrix(sol.fit[1:(L*length(K))],nrow=length(K),ncol=L,byrow=TRUE)
      #Find important variables
      Knext<-K[which(diag(crossprod(t(solmat)))>0)]
      #Subset design matrix and solution matrix
      Xnext <- X[Knext]
      solmat<-solmat[which(diag(crossprod(t(solmat)))>0),]
      if(length(Knext)==1){solmat<-t(as.matrix(solmat))}
      
      #Adaptive Weights
      WTnext<-list()
      f <- sqrt(diag(crossprod(t(solmat))))
      #WTnext[[1]]<-max(f)/f
      WTnext[[1]]<-1/f
      g <- sqrt(abs(diag(solmat%*%Omega1%*%t(solmat))))
      if(
        isTRUE(all.equal(max(g),0))){WTnext[[2]]=rep(max(phi1),length(Knext))
      }else{
        #WTnext[[2]]<-max(g)/g
        WTnext[[2]]<-1/g
        WTnext[[2]][which(is.infinite(WTnext[[2]]) | is.nan(WTnext[[2]]))]=max(phi1)
        for(i in 1:length(Knext)){
          WTnext[[2]][i]=min(WTnext[[2]][i],max(phi1))
        }
      }
      WTnext[[3]]=rep(0,length(K))
      
      #Position coefficient
      if(length(pos.effect)>1){
        sol.pos<-sol.fit[-(1:(L*length(K)))]
        if(sum(sol.pos^2)==0){
          pos.effect=NA; WTnext[[4]]=max(phi1); WTnext[[5]]=max(phi1)
        }else{
          WTnext[[4]]<-1/sqrt(sum(sol.pos^2))
          g2<-as.numeric(sqrt(abs(t(sol.pos)%*%Omega2%*%sol.pos)))
          if(g2==0){WTnext[[5]]=max(phi1)
          }else{
            WTnext[[5]]<-1/g2
          }
        }
      }
      
    }else{ #if OPT.min > cv.base[1]
      OPT.1se<-cv.base[1]+cv.base[2]
      Cands<-which(Tune[,3]<=OPT.1se) 
      if(length(Cands)>1){ #Check that there is model in optimal CV range
        OptCand=Cands[which.min(Tune[Cands,7])]
        OPT.1se<-as.numeric(Tune[OptCand,1:2]) #Smallest model size
        
        #Final Fit
        #if(smooth.adj==FALSE){
        W <- do.call(cbind,Pmat.Stage2_Adap(X=X,WT=WT,Omega1=Omega1,Omega2=Omega2,phi1=OPT.1se[1],phi2=0,pos.effect=pos.effect)[[1]])
        sol<-gglasso(x=W,y=Y,group=groups,lambda=OPT.1se[2],intercept=FALSE)
        #Transform back to original scale
        iRs <-bdiag(Pmat.Stage2_Adap(X=X,WT=WT,Omega1=Omega1,Omega2=Omega2,phi1=OPT.1se[1],phi2=0,pos.effect=pos.effect)[[2]])
        if(length(pos.effect)>1){
          iRs<-bdiag(iRs,Pmat.Stage2_Adap(X=X,WT=WT,Omega1=Omega1,Omega2=Omega2,phi1=OPT.1se[1],phi2=0,pos.effect=pos.effect)[[3]])  
        }
        
        #}
        
        sol.fit<-iRs%*%sol$beta
        
        
        #EMG coefficients
        solmat<-matrix(sol.fit[1:(L*length(K))],nrow=length(K),ncol=L,byrow=TRUE)
        #Find important variables
        Knext<-K[which(diag(crossprod(t(solmat)))>0)]
        #Subset design matrix and solution matrix
        Xnext <- X[Knext]
        solmat<-solmat[which(diag(crossprod(t(solmat)))>0),]
        if(length(Knext)==1){solmat<-t(as.matrix(solmat))}
        
        #Adaptive Weights
        WTnext<-list()
        f <- sqrt(diag(crossprod(t(solmat))))
        #WTnext[[1]]<-max(f)/f
        WTnext[[1]]<-1/f
        g <- sqrt(abs(diag(solmat%*%Omega1%*%t(solmat))))
        if(
          isTRUE(all.equal(max(g),0))){WTnext[[2]]=rep(max(phi1),length(Knext))
        }else{
          #WTnext[[2]]<-max(g)/g
          WTnext[[2]]<-1/g
          WTnext[[2]][which(is.infinite(WTnext[[2]]) | is.nan(WTnext[[2]]))]=max(phi1)
          for(i in 1:length(Knext)){
            WTnext[[2]][i]=min(WTnext[[2]][i],max(phi1))
          }
        }
        WTnext[[3]]=rep(0,length(K))
        
        #Position coefficient
        if(length(pos.effect)>1){
          sol.pos<-sol.fit[-(1:(L*length(K)))]
          if(sum(sol.pos^2)==0){
            pos.effect=NA; WTnext[[4]]=max(phi1); WTnext[[5]]=max(phi1)
          }else{
            WTnext[[4]]<-1/sqrt(sum(sol.pos^2))
            g2<-as.numeric(sqrt(abs(t(sol.pos)%*%Omega2%*%sol.pos)))
            if(g2==0){WTnext[[5]]=max(phi1)
            }else{
              WTnext[[5]]<-1/g2
            }
          }
        }
      }else{#If no fits are in 1 se of cv.base mean
        print("No models within 1se of cv.base mean")
        solmat<-NA
        Knext<-K
        WTnext<-WT
        sol.pos<-NA
      }
      
      
    }
    
    
    if(length(pos.effect)>1){
      return(list("Tune"=Tune,"solmat"=solmat,"Ksub"=Knext,"WTsub"=WTnext,"pos.effect"=pos.effect,"sol.pos"=sol.pos,"cv.base"=cv.base))  
    }else{
      return(list("Tune"=Tune,"solmat"=solmat,"Ksub"=Knext,"WTsub"=WTnext,"pos.effect"=pos.effect,"cv.base"=cv.base))  
    }
    
  }
  
  
  cv.aglasso<- function(X,Y,group,blocks,intercept,lseq,Omega1,phi1,WT,plot=TRUE,cvmult=1,Omega2=1,pos.effect=NA){
    blocks2<-lapply(1:max(blocks),function(x){which(blocks==x)})
    
    W<-do.call(cbind,Pmat.Stage2_Adap(X=X,WT=WT,Omega1=Omega1,Omega2=Omega2,phi1=phi1,phi2=0,pos.effect=pos.effect)[[1]])
    
      
    CV<-mclapply(blocks2,function(x){
        fit<-gglasso(x=W[-x,],
                     y=Y[-x],
                     group=group,
                     intercept=FALSE,
                     lambda=lseq)
        mse<-apply(fit$beta,2,function(y){(Y[x]-W[x,]%*%y)^2})
          return(mse)
      },mc.cores=1)
      sol<-list()
         cvraw<-do.call(rbind,CV)
         #Calculate within fold means
         FoldMeans<-do.call(rbind,lapply(blocks2,function(x){apply(cvraw[x,],2,mean)}))
         #Overall mean
         sol$cvm<-apply(FoldMeans,2,mean)
          	sol$cvm<-sol$cvm[(length(lseq):1)]
         #Calculate standard error using fold means
         sol$cvsd<-apply(FoldMeans,2,sd)/sqrt(length(blocks2))	
          	sol$cvsd<-sol$cvsd[(length(lseq):1)]
         sol$lambda<-lseq
         sol$lambda.min<-lseq[which.min(sol$cvm)]
      #Full Fit
      sol$gglasso.fit<-gglasso(x=W,y=Y,group=group,intercept=FALSE,lambda=lseq)
      sol$df<-sol$gglasso.fit$df[(length(lseq):1)]/dim(Omega1)[1] #Not accurate if pos.effect used unless dim(Omega2)=dim(Omega1)
        #Do 1 se rule on sqrt scale
      #iCand<-which(sol$cvm<=(sol$cvm+cvmult*sol$cvsd)[which(sol$lambda==sol$lambda.min)] & (sol$lambda >= sol$lambda.min))
      #sol$lambda.1se<-sol$lambda[max(iCand)]
    
      
    if(plot){
    plot(log(sol$lambda),sol$cvm,ylim=c(0,(max(sol$cvm+sol$cvsd))),
         xlab="Log(Lambda)",ylab="CV Mean",main=paste("Log(Phi1)=",log(phi1)))
    lines(log(sol$lambda),y=(sol$cvm+sol$cvsd),col=2)
    lines(log(sol$lambda),y=(sol$cvm-sol$cvsd),col=2)
    }
    return(sol)
  }
  
  
  
  
  

#####################################################################################
# LAD functions
#  Function inputs similar to those for SAFE(z), see them for details
#####################################################################################          
  
  LADglasso<-function(X,Y,Omega1,Omega2=1,phi1,WT,nblocks,lseq,nstage,plot=TRUE,cvmult=1,pos.effect=NA){
    #Needed values
    K<-1:length(X)
    L <- nrow(Omega1)
    
    #Set up CV blocks
    b.size <- floor(length(Y)/nblocks)
    blocks <- sort(rep(1:nblocks,b.size)) #to be used in foldid later
    
    #First iteration
    Stage<-list()
    Stage[[1]]<-LADglasso.step(X=X,Y=Y,K=K,WT=WT,
                               phi1=phi1,
                               blocks=blocks,
                               lseq=lseq,
                               Omega1=Omega1,Omega2=Omega2,plot=plot,cvmult=cvmult,pos.effect=pos.effect)  
    print("Stage 1")
    output<-round(data.frame(do.call(rbind,Stage[[1]]$WTsub[1:2])),4)
    colnames(output)=Stage[[1]]$Ksub
    rownames(output)=c("L2","L2.D1")
    print(output)
    if(length(pos.effect)>1){
      output<-t(round(do.call(rbind,Stage[[1]]$WTsub[4:5]),4))
      colnames(output)=c("Pos L2","Pos L2.D1")
      print(output)
    }
    finish<-0
    stage<-1
    while(finish==0){
      stage<-stage+1
      Knext<-Stage[[stage-1]]$Ksub
      Stage[[stage]]=LADglasso.step(X=X[Knext],
                                    Y=Y,
                                    K=Knext,
                                    WT=Stage[[stage-1]]$WTsub,
                                    phi1=phi1,
                                    blocks=blocks,
                                    lseq=lseq,
                                    Omega1=Omega1,Omega2=Omega2,plot=plot,cvmult=cvmult,
                                    pos.effect=Stage[[stage-1]]$pos.effect)
      if(is.na(nstage) & identical(Knext,Stage[[stage]]$Ksub)){finish=1}
      if(stage==nstage){finish=1}#if nstage=NA this will never be true
      print(paste("Stage",stage))
      output<-round(data.frame(do.call(rbind,Stage[[stage]]$WTsub[1:2])),4)
      colnames(output)=Stage[[stage]]$Ksub
      rownames(output)=c("L2","L2.D1")
      print(output)
      if(length(Stage[[stage]]$pos.effect)>1){
        output<-t(round(do.call(rbind,Stage[[stage]]$WTsub[4:5]),4))
        colnames(output)=c("Pos L2","Pos L2.D1")
        print(output)
      }
    }
    
    ################################
    #### FINAL STAGE
    ################################
    
    #Ridge solution using only variable set and CV
    FinalSol<-RidgeReg.CV(X=X[Stage[[stage]]$Ksub],
                          Y=Y,
                          Omega1=Omega1,Omega2=Omega2,
                          phi1=phi1,phi2=0,nblocks=nblocks,
                          pos.effect=Stage[[stage]]$pos.effect)
    output <- list("Stages"=Stage,"FinalSol"=FinalSol)
    return(output)
  }
  
  
  LADglasso.step<-function(X,Y,K,WT,phi1,blocks,lseq=lseq,Omega1=Omega1,Omega2=1,plot=TRUE,cvmult=1,pos.effect=NA,cv.base=NA){
    
    L<-dim(Omega1)[1]
    
    Tune<-expand.grid(lseq,phi1)[,2:1]
    
    groups<-sort(rep(1:length(K),L))
    if(length(pos.effect)>1){groups<-c(groups,rep(length(K)+1,dim(Omega2)[1]))}
    
    #Run cv.gglasso for each phi1
    Tune<-cbind(Tune,0,0,0,0,0) #fifth column stores average predict error; sixth is average model size
    colnames(Tune)=c("Phi1","Lambda","CVM","CVSE","CVM1SE","DF","Penalty")
    iter<-1
    est<-list()
    for(i in phi1){
      sol<-cv.LADglasso(X=X,Y=Y,group=groups,blocks=blocks,intercept=FALSE,
                        lseq=lseq,
                        Omega1=Omega1,Omega2=Omega2,phi1=i,WT=WT,plot=plot,cvmult=cvmult,pos.effect=pos.effect)
      indx<-which(Tune[,1]==i)
      Tune[indx,3:4]=cbind(sol$cvm,sol$cvsd)
      grab<-which(sol$lambda==sol$lambda.1se)
      #Tune[iter,2:4]<-c(sol$lambda.1se,unlist(sol$cvm)[grab],unlist(sol$cvsd)[grab])
      Tune[indx,5]<-Tune[indx,3]+cvmult*Tune[indx,4]
      Tune[indx,6]<-unlist(sol$df)
      est<-sol$beta
      #Calculate penalties for each estimate
      if(length(pos.effect)>1){
        P<-diag(t(est)%*%est)+diag(t(est)%*%bdiag(kronecker(diag(1,length(X)),Omega1),Omega2)%*%est)
      }else{
        P<-diag(t(est)%*%est)+diag(t(est)%*%kronecker(diag(1,length(X)),Omega1)%*%est)
      }
      Tune[indx,7]=P #No need to reverse order like we did in SAFE and aglasso
      print(iter)
      print(unlist(sol$df)[which.min(sol$cvm)])
      iter<-iter+1
    }    
    OPT.min<-which.min(Tune[,3])
    #Check if OPT.min is smaller than baseline cvm+cvsd
    if(length(cv.base)==1 | (length(cv.base)>1 & Tune[OPT.min,5]<=(cv.base[1]+cv.base[2]))){
      cv.base<-as.numeric(Tune[OPT.min,3:4])
      #I let OPT.1se take on two different purposes
      OPT.1se<-Tune[OPT.min,5]
      Cands<-which(Tune[,3]<=OPT.1se) 
      
      OptCand=Cands[which.min(Tune[Cands,7])]
      OPT.1se<-as.numeric(Tune[OptCand,1:2])
      
      #Final Fit
      #if(smooth.adj==FALSE){
      W <- do.call(cbind,Pmat.Stage2_Adap(X=X,WT=WT,Omega1=Omega1,Omega2=Omega2,phi1=OPT.1se[1],phi2=0,pos.effect=pos.effect)[[1]])
      sol<-rq.group.fit(x = W, y = Y, groups = groups, tau = 0.5, lambda = OPT.1se[2], 
                        intercept = FALSE, penalty = "LASSO", alg = "QICD",  penGroups = NULL)
      #Transform back to original scale
      iRs <-bdiag(Pmat.Stage2_Adap(X=X,WT=WT,Omega1=Omega1,Omega2=Omega2,phi1=OPT.1se[1],phi2=0,pos.effect=pos.effect)[[2]])
      if(length(pos.effect)>1){
        iRs<-bdiag(iRs,Pmat.Stage2_Adap(X=X,WT=WT,Omega1=Omega1,Omega2=Omega2,phi1=OPT.1se[1],phi2=0,pos.effect=pos.effect)[[3]])  
      }
      #}
      
      sol.fit<-iRs%*%sol$coefficients
      
      #EMG coefficients
      solmat<-matrix(sol.fit[1:(L*length(K))],nrow=length(K),ncol=L,byrow=TRUE)
      #Find important variables
      Knext<-K[which(diag(crossprod(t(solmat)))>0)]
      #Subset design matrix and solution matrix
      Xnext <- X[Knext]
      solmat<-solmat[which(diag(crossprod(t(solmat)))>0),]
      if(length(Knext)==1){solmat<-t(as.matrix(solmat))}
      
      #Adaptive Weights
      WTnext<-list()
      f <- sqrt(diag(crossprod(t(solmat))))
      #WTnext[[1]]<-max(f)/f
      WTnext[[1]]<-1/f
      g <- sqrt(abs(diag(solmat%*%Omega1%*%t(solmat))))
      if(
        isTRUE(all.equal(max(g),0))){WTnext[[2]]=rep(max(phi1),length(Knext))
      }else{
        #WTnext[[2]]<-max(g)/g
        WTnext[[2]]<-1/g
        WTnext[[2]][which(is.infinite(WTnext[[2]]) | is.nan(WTnext[[2]]))]=max(phi1)
        for(i in 1:length(Knext)){
          WTnext[[2]][i]=min(WTnext[[2]][i],max(phi1))
        }
      }
      WTnext[[3]]=rep(0,length(K))
      
      #Position coefficient
      if(length(pos.effect)>1){
        sol.pos<-sol.fit[-(1:(L*length(K)))]
        if(sum(sol.pos^2)==0){
          pos.effect=NA; WTnext[[4]]=max(phi1); WTnext[[5]]=max(phi1)
        }else{
          WTnext[[4]]<-1/sqrt(sum(sol.pos^2))
          g2<-as.numeric(sqrt(abs(t(sol.pos)%*%Omega2%*%sol.pos)))
          if(g2==0){WTnext[[5]]=max(phi1)
          }else{
            WTnext[[5]]<-1/g2
          }
        }
      }
      
    }else{ #if OPT.min > cv.base[1]
      OPT.1se<-cv.base[1]+cv.base[2]
      Cands<-which(Tune[,3]<=OPT.1se) 
      if(length(Cands)>1){ #Check that there is model in optimal CV range
        OptCand=Cands[which.min(Tune[Cands,7])]
        OPT.1se<-as.numeric(Tune[OptCand,1:2]) #Smallest model size
        
        #Final Fit
        #if(smooth.adj==FALSE){
        W <- do.call(cbind,Pmat.Stage2_Adap(X=X,WT=WT,Omega1=Omega1,Omega2=Omega2,phi1=OPT.1se[1],phi2=0,pos.effect=pos.effect)[[1]])
        sol<-rq.group.fit(x = W, y = Y, groups = groups, tau = 0.5, lambda = OPT.1se[2], 
                          intercept = FALSE, penalty = "LASSO", alg = "QICD",  penGroups = NULL)
        #Transform back to original scale
        iRs <-bdiag(Pmat.Stage2_Adap(X=X,WT=WT,Omega1=Omega1,Omega2=Omega2,phi1=OPT.1se[1],phi2=0,pos.effect=pos.effect)[[2]])
        if(length(pos.effect)>1){
          iRs<-bdiag(iRs,Pmat.Stage2_Adap(X=X,WT=WT,Omega1=Omega1,Omega2=Omega2,phi1=OPT.1se[1],phi2=0,pos.effect=pos.effect)[[3]])  
        }
        #}
        
        sol.fit<-iRs%*%sol$coefficients
        
        #EMG coefficients
        solmat<-matrix(sol.fit[1:(L*length(K))],nrow=length(K),ncol=L,byrow=TRUE)
        #Find important variables
        Knext<-K[which(diag(crossprod(t(solmat)))>0)]
        #Subset design matrix and solution matrix
        Xnext <- X[Knext]
        solmat<-solmat[which(diag(crossprod(t(solmat)))>0),]
        if(length(Knext)==1){solmat<-t(as.matrix(solmat))}
        
        #Adaptive Weights
        WTnext<-list()
        f <- sqrt(diag(crossprod(t(solmat))))
        #WTnext[[1]]<-max(f)/f
        WTnext[[1]]<-1/f
        g <- sqrt(abs(diag(solmat%*%Omega1%*%t(solmat))))
        if(
          isTRUE(all.equal(max(g),0))){WTnext[[2]]=rep(max(phi1),length(Knext))
        }else{
          #WTnext[[2]]<-max(g)/g
          WTnext[[2]]<-1/g
          WTnext[[2]][which(is.infinite(WTnext[[2]]) | is.nan(WTnext[[2]]))]=max(phi1)
          for(i in 1:length(Knext)){
            WTnext[[2]][i]=min(WTnext[[2]][i],max(phi1))
          }
        }
        WTnext[[3]]=rep(0,length(K))
        
        #Position coefficient
        if(length(pos.effect)>1){
          sol.pos<-sol.fit[-(1:(L*length(K)))]
          if(sum(sol.pos^2)==0){
            pos.effect=NA; WTnext[[4]]=max(phi1); WTnext[[5]]=max(phi1)
          }else{
            WTnext[[4]]<-1/sqrt(sum(sol.pos^2))
            g2<-as.numeric(sqrt(abs(t(sol.pos)%*%Omega2%*%sol.pos)))
            if(g2==0){WTnext[[5]]=max(phi1)
            }else{
              WTnext[[5]]<-1/g2
            }
          }
        }
      }else{#If no fits are in 1 se of cv.base mean
        print("No models within 1se of cv.base mean")
        solmat<-NA
        Knext<-K
        WTnext<-WT
        sol.pos<-NA
      }
      
      
    }
    
    
    if(length(pos.effect)>1){
      return(list("Tune"=Tune,"solmat"=solmat,"Ksub"=Knext,"WTsub"=WTnext,"pos.effect"=pos.effect,"sol.pos"=sol.pos,"cv.base"=cv.base))  
    }else{
      return(list("Tune"=Tune,"solmat"=solmat,"Ksub"=Knext,"WTsub"=WTnext,"pos.effect"=pos.effect,"cv.base"=cv.base))  
    }
  }
  
  
  cv.LADglasso<- function(X,Y,group,blocks,intercept,lseq,Omega1,phi1,WT,plot=TRUE,cvmult=1,Omega2=1,pos.effect=NA){
    blocks2<-lapply(1:max(blocks),function(x){which(blocks==x)})
    
    W<-do.call(cbind,Pmat.Stage2_Adap(X=X,WT=WT,Omega1=Omega1,Omega2=Omega2,phi1=phi1,phi2=0,pos.effect=pos.effect)[[1]])
    
    CV.store<-matrix(NA,nrow=length(lseq),ncol=4)
    CV.store[,1]=lseq
    est<-matrix(NA,ncol=length(lseq),nrow=ncol(W))
    for(i in 1:length(lseq)){
      temp.store<-matrix(NA,nrow=max(blocks),ncol=2)
      #Overall fit for second-1se rule
      est[,i]<-(rq.group.fit(x = W, y = Y, groups = group, tau = 0.5, lambda=lseq[i],
                             intercept = FALSE, penalty = "LASSO",  alg="QICD",penGroups = NULL)$coefficients)
      for(j in 1:max(blocks)){
        sol<-unlist(rq.group.fit(x = W[-blocks2[[j]],], y = Y[-blocks2[[j]]], groups = group, tau = 0.5, lambda=lseq[i],
                                 intercept = FALSE, penalty = "LASSO",  alg="QICD",penGroups = NULL)$coefficients)
        temp.store[j,1]=0
        for(m in 1:max(group)){
          temp.store[j,1]=temp.store[j,1]+(max(abs(sol[which(group==m)]))>0) #Counts df
        }
        temp.store[j,2]=mean(abs(Y[blocks2[[j]]]-W[blocks2[[j]],]%*%sol)) #absolute residuals
      }
      CV.store[i,2:4]=c(max(temp.store[,1]),mean(temp.store[,2]),sd(temp.store[,2])/sqrt(max(blocks)))
    }
    
    sol<-list()          
    sol$cvm<-CV.store[,3]
    sol$cvsd<-CV.store[,4]
    sol$lambda<-lseq
    sol$lambda.min<-lseq[which.min(sol$cvm)]
    sol$df<-CV.store[,2]
    sol$beta<-est
    #sol$P<-P
    #Do 1 se rule
    #iCand<-which(sol$cvm<=(sol$cvm+cvmult*sol$cvsd)[which(sol$lambda==sol$lambda.min)] & (sol$lambda >= sol$lambda.min))
    #sol$lambda.1se<-sol$lambda[max(iCand)]
    
    
    if(plot){
    plot(log(sol$lambda),sol$cvm,ylim=c(0,(max(sol$cvm+sol$cvsd))),
         xlab="Log(Lambda)",ylab="CV Mean",main=paste("Log(Phi1)=",log(phi1)))
    lines(log(sol$lambda),y=(sol$cvm+sol$cvsd),col=2)
    lines(log(sol$lambda),y=(sol$cvm-sol$cvsd),col=2)
    }
    return(sol)
  }
  

  
#####################################################################################
# FAR functions
# Function inputs similar to those for SAFE(z), see them for details
# Performs just two stages of analysis
# Calls on functions contained in FARgSCAD_functions.R provided by corresponding author of 
# delta = same delta as data.process
# alpha = 1, always
#####################################################################################            
  
  FARgSCAD<-function(X,Y,L,delta,nblocks,alpha=1,lam.path,plot=TRUE,cvmult=1,pos.effect=NA){
    #Needed values
    K<-1:length(X)
    
    #Set up CV blocks
    b.size <- floor(length(Y)/nblocks)
    foldid <- sort(rep(1:nblocks,b.size)) #to be used in foldid later
    
    #First iteration
    Stage<-list()
    Stage[[1]]<-cv.farfunc(y=Y,X=X,K=K,pena="grSCAD", 
                           time=seq(0,1,length.out=delta+1), 
                           df.beta=L, 
                           lam.path=lam.path,
                           foldid=foldid,
                           alpha=alpha,
                           intercept=FALSE,plot=plot,cvmult=cvmult,pos.effect=pos.effect) 
    Knext<-Stage[[1]]$Ksub
    print("Stage 1")
    print(Stage[[1]]$Ksub)
    
    Stage[[2]]<-cv.farfunc(y=Y,X=X[Knext],K=Knext,pena="grSCAD", 
                           time=seq(0,1,length.out=delta+1), 
                           df.beta=L, 
                           lam.path=lam.path,
                           foldid=foldid,
                           alpha=alpha,
                           intercept=FALSE,plot=plot,cvmult=cvmult,
                           pos.effect=Stage[[1]]$pos.effect) 
    print("Stage 2")
    print(Stage[[2]]$Ksub)
    
    return(Stage)
  }
  
  
  
  
  
  

#####################################################
#  Diagnostics for SAFE object created by SAFE.glasso
#####################################################  
  
  norm.track<-function(SAFE,L,M){
    stages=length(SAFE[[1]])
    if(length(SAFE[[1]][[stages]]$solmat)==1){stages<-stages-1}
    
    f<-matrix(0,nrow=stages,ncol=16)
    g<-f
    h<-f
    for(i in 1:stages){
      solmat<-SAFE[[1]][[i]]$solmat
      f[i,SAFE[[1]][[i]]$Ksub] <- diag(crossprod(t(solmat)))
      g[i,SAFE[[1]][[i]]$Ksub] <- abs(diag(solmat%*%kronecker(Omega1,diag(1,M))%*%t(solmat)))
      if(M>1){h[i,SAFE[[1]][[i]]$Ksub] <- abs(diag(solmat%*%kronecker(diag(1,L),Omega2)%*%t(solmat)))}
    }
    sub.plot<-SAFE[[1]][[1]]$Ksub
    temp.cols<-rainbow(length(sub.plot))
    par(mfrow=c(2,2),mar=c(5,5,1,1))
    if(stages>1){
    matplot(sqrt(f)[,sub.plot],type="b",ylab=bquote("||"~hat(gamma)[k]~"||"),xlab="Stage",lwd=3,pch=16,lty=1,col=temp.cols) 
    #legend("topright",legend=sub.plot,col=temp.cols,lty=1,lwd=3)
    matplot(sqrt(g)[,sub.plot],type="b",ylab=bquote("||"~hat(gamma)[ks]~"''||"),xlab="Stage",lwd=3,pch=16,lty=1,col=temp.cols)
    #legend("topright",legend=sub.plot,col=temp.cols,lty=1,lwd=3)
    if(M>1){
      matplot(sqrt(h)[,sub.plot],type="b",ylab=bquote("||"~hat(gamma)[kz]~"''||"),xlab="Stage",lwd=3,pch=16,lty=1,col=temp.cols)
    }
    }else{
      plot(sqrt(f)[sub.plot],type="b",ylab=bquote("||"~hat(gamma)[k]~"||"),xlab="X",lwd=3,pch=16,lty=1,col=temp.cols,axes=FALSE) 
        box()
        axis(1,at=1:length(sub.plot),labels=sub.plot)
        axis(2)
        
      #legend("topright",legend=sub.plot,col=temp.cols,lty=1,lwd=3)
      plot(sqrt(g)[sub.plot],type="b",ylab=bquote("||"~hat(gamma)[ks]~"''||"),xlab="X",lwd=3,pch=16,lty=1,col=temp.cols,axes=FALSE)
        box()
        axis(1,at=1:length(sub.plot),labels=sub.plot)
        axis(2)
      #legend("topright",legend=sub.plot,col=temp.cols,lty=1,lwd=3)
      if(M>1){
        plot(sqrt(h)[sub.plot],type="b",ylab=bquote("||"~hat(gamma)[kz]~"''||"),xlab="X",lwd=3,pch=16,lty=1,col=temp.cols,axes=FALSE)
        box()
        axis(1,at=1:length(sub.plot),labels=sub.plot)
        axis(2)
      }
    }
      #legend("topright",legend=sub.plot,col=temp.cols,lty=1,lwd=3)
    plot.new()
    legend("center",legend=paste("X",sub.plot,sep=""),col=temp.cols,lty=1,lwd=3)
  }
  
  
#####################################################
#  Plots of coefficients for different methods
#  If stage="LASSO" then select substage
#  stage="Final" gives estimates based on smoothing penalties alone
#####################################################
  
  
  SAFE.plot<-function(SAFE,stage="Final",substage=1){
    #Stage choices are "Final", "LASSO"
    #Substage choice between 1 and length(SAFE[[1]])
    if(stage=="Final"){B<-SAFE[[2]][[4]];K<-SAFE[[1]][[length(SAFE[[1]])]]$Ksub}
    else{B<-SAFE[[1]][[substage]]$solmat;K<-SAFE[[1]][[substage]]$Ksub}
    
    #Coefficient plots
    FCoef<-lapply(1:length(K),function(x){
      Bmat<-matrix(B[x,],nrow=L,ncol=M,byrow=TRUE)
      return(B1%*%Bmat%*%t(B2[sort(Z,index.return=TRUE)$ix,]))
    })
    
    par(mfrow=c(1,2),mar=c(4,3,3,3))
    for(i in 1:length(K)){
      filled.contour(x=seq(-delta,0),y=sort(Z+rnorm(length(Z),0,sd=0.000001)),z=FCoef[[i]],color=heat.colors,
                     main=paste("EMG",K[i])
                     #,zlim=c(min(unlist(FCoef)),max(unlist(FCoef)))
                     )
      #plot_ly(x=seq(-delta,0),y=sort(Z),z=t(FCoef),type="contour")
    }
    
    #Predictions
    par(mfrow=c(1,1))
    Yhat<-do.call(cbind,Xtilde[K])%*%c(t(B))
    plot(Y,Yhat)
    abline(a=0,b=1,col=2,lwd=3)
    
    #Residuals
    r=Y-Yhat
    par(mfrow=c(1,2))
    hist(r)
    acf(r)
    
    #plot(x=Yhat,y=r)
    
    par(mfrow=c(1,1),mar=c(5,4,4,4))
    plot(Y,type="l")
    lines(Yhat,col=2)
    plot(r,type="l")
  }
  
  ag.plot<-function(ag,stage="Final",substage=1){
    #Stage choices are "Final", "LASSO"
    #Substage choice between 1 and length(SAFE[[1]])
    #Assume Z data has been loaded
    if(stage=="Final"){B<-ag[[2]][[3]];K<-ag[[1]][[length(ag[[1]])]]$Ksub}
    else{B<-ag[[1]][[substage]]$solmat;K<-ag[[1]][[substage]]$Ksub}
    
    #Coefficient plots
    par(mfrow=c(1,1),mar=c(5,4,4,4))
    FCoef<-B1%*%t(B)
    for(i in 1:length(K)){
      plot(x=1:dim(FCoef)[1],FCoef[,i],main=paste("EMG",K[i]),type="l",lwd=3)
    }
    pos.effect<-ag[[1]][[substage]]$pos.effect
    sol.pos<-0
    if(length(pos.effect)>1){
      if(stage=="Final"){
        sol.pos<-ag[[2]][[6]]
      }else{
        sol.pos<-ag[[1]][[substage]]$sol.pos
      }
      plot(x=sort(Z),pos.effect[sort(Z,index.return=TRUE)$ix,]%*%sol.pos,main="Position Effect",type="l",lwd=3,
           xlab="Position",ylab="Effect")
    }
    
    #Predictions
    par(mfrow=c(1,1))
    Yhat<-do.call(cbind,Xomega[K])%*%as.numeric(t(B))
    if(length(pos.effect)>1)Yhat<-Yhat+pos.effect%*%sol.pos
    
    plot(Y,Yhat)
    abline(a=0,b=1,col=2,lwd=3)
    
    #Residuals
    r=Y-Yhat
    par(mfrow=c(1,2))
    hist(r)
    acf(r)
    
    #plot(x=Yhat,y=r)
    
    par(mfrow=c(1,1),mar=c(5,4,4,4))
    plot(Y,type="l")
    lines(Yhat,col=2)
    plot(r,type="l")
  }
  
 FAR.plot<-function(FAR,stage=2){
    
    B<-FAR[[stage]]$solmat
    K<-FAR[[stage]]$Ksub
    
    #Coefficient plots
    par(mfrow=c(1,1),mar=c(5,4,4,4))
    for(i in 1:length(K)){
      plot(x=1:dim(B)[2],B[i,],main=paste("EMG",K[i]),type="l",lwd=3,
           ylim=c(min(B),max(B)))
    }
    pos.effect<-FAR[[stage]]$pos.effect
    sol.pos<-0
    if(length(pos.effect)>1){
      sol.pos<-FAR[[stage]]$sol.pos
      plot(x=sort(Z),pos.effect[sort(Z,index.return=TRUE)$ix,]%*%sol.pos,main="Position Effect",type="l",lwd=3,
           xlab="Position",ylab="Effect")
    }
    
    #Predictions
    par(mfrow=c(1,1))
    Yhat<-do.call(cbind,X[K])%*%as.numeric(t(B))*1/(delta+1)
    if(length(pos.effect)>1)Yhat<-Yhat+pos.effect%*%sol.pos
    plot(Y,Yhat)
    abline(a=0,b=1,col=2,lwd=3)
    
    #Residuals
    r=Y-Yhat
    par(mfrow=c(1,2))
    hist(r)
    acf(r)
    
    #plot(x=Yhat,y=r)
    
    par(mfrow=c(1,1),mar=c(5,4,4,4))
    plot(Y,type="l")
    lines(Yhat,col=2)
    plot(r,type="l")
  }

 

  
  