##################################################################
# GStruct Function
##################################################################
# INPUTS
# X = full GO-SSD matrix where first orthogonal group is error columns (may or may not include intercept)
#
# OUTPUTS
# X      = matrix with just factor columns
# Xe     = matrix of error columns including intercept if in original X matrix
# groups = list of group indices for X

GStruct<-function(X){
    p<-ncol(X)
    #Find groups
    
    Ind<-abs(crossprod(X))>0
    groups<-list()
    factors<-1:p
    k<-1
    drop<-c()
    while(length(factors)>1){
      groups[[k]]<-which(Ind[min(factors),])  
      for(i in factors){
        if(length(intersect(groups[[k]],which(Ind[i,])))>0)
          groups[[k]]<-union(groups[[k]],which(Ind[i,]))
      }
      drop<-c(drop,groups[[k]])
      factors<-(1:p)[-drop]
      k<-k+1
    }
    
    Xe <- X[,groups[[1]]]
    X  <- X[,unlist(groups[-1])]
    
    #Rest of factor columns
    groups<-groups[-1]
    groups<-lapply(groups,function(x){x-dim(Xe)[2]})

    return(list("X"=X, "Xe"=Xe, "groups"=groups))
}





##################################################################
# GScreen Function
#NOTE: The code below requires group orthogonality and preemptively checks for this structure.
##################################################################
# INPUTS
#X          = design matrix (No intercept or error columns)
#y          = vector of responses
#groups     = list of column indices for the groups for X
#Xe         = error columns (may or may not include intercept)
#alpha      = group significance level
#alpha2     = factor significance level
#pool       = whether to pool groups found to be insignificant
#BackElim   = whether pooling should involve backwards elimination
#FactorPool = whether pooling should be done with factor screening
#Jones      = original Jones et al (2019) analysis for factor screening: c(Do method?,Use all subsets?)
#MaxPower  = analyze with MaxPower method which requires allsubsets and pooling factors into model
#
# OUTPUTS
#ANOVA     = complete ANOVA table (no MS)
#ANOVApool = ANOVA table after pooling and removing groups (no MS)
#model     = final set of factors
#g.keep    = groups deemed significant


GScreen <- function(X,y,groups,Xe,alpha=0.10,alpha2=0.10,pool=TRUE,
                    BackElim=TRUE,FactorPool=TRUE,Jones=c(FALSE,FALSE),MaxPower=TRUE){
  require(Matrix)
  require(MASS)
  
  #Creates orthogonal projection matrix
  Proj <-function(X){
    return(X%*%ginv(crossprod(X))%*%t(X))
  }
  
  n <- length(y)      #run size
  g <- length(groups) #number of factor groups
  I <- diag(1,n)      #Identity matrix
  J <- matrix(1,nrow=n,ncol=n)  #n x n matrix of 1's
  SS <- rep(0,g)      #SS for factor groups
  Pr <- list()        #Projection matrices
  df <- rep(0,g)      #df for factor groups
  Pmodel<-1/n*J       #Starts with projection onto intercept
  
  
  #Checks method
  if(Jones[1] & MaxPower){print(paste("Both Jones and MaxPower method selected.  Pick one."));stop}
  
  #Check groups with X
  checkg<-unlist(groups)
  if(!length(intersect(checkg,1:dim(X)[2]))==dim(X)[2]){
    print(paste("Not all factors specified in groups"));stop
  }
  
  #Creates g list of X submatrices for groups
  Xg <- list()
  for(i in 1:g){Xg[[i]] <- X[,groups[[i]]]}
  
  #Check orthogonality of factor groups with intercept
  for(i in 1:g){if(sum(abs(t(Xg[[i]])%*%matrix(1,nrow=n,ncol=1)))!=0){print(paste("Nonorthogonality between group",i,"and intercept.")); stop}}
  
  #Check orthogonality of factor groups with error
  for(i in 1:g){if(sum(abs(t(Xg[[i]])%*%Xe))!=0){print(paste("Nonorthogonality between group",i,"and error.")); stop}}
  
  #Check orthogonality between factor groups
  for(i in 1:(g-1)){
    for(j in (i+1):g){
      if(sum(abs(t(Xg[[i]])%*%Xg[[j]]))!=0){print(paste("Nonorthogonality between groups",i,"and",j)); stop}}  
  }
  
  
  ############################
  # GROUP SCREENING
  ############################
  
    
    #Creates list of projections and vectors for SS and df
    Pr<-lapply(Xg,Proj)
    SS<-unlist(lapply(Pr,function(x){t(y)%*%x%*%y}))
    df<-unlist(lapply(Xg,rankMatrix))
    
    #Error columns adjusted for intercept
    XE  <- (I-Pmodel)%*%Xe
    
    #calculate SSE and df
    SSE <- t(y)%*%Proj(XE)%*%y
    dfE <- rankMatrix(XE)[[1]]
    MSE <- SSE/dfE
    MSE1 <- MSE #For output
    
    #Calculate F-ratios and p-values with unpooled MSE
    F.ratio <- (SS/df)/c(MSE)
    pvalues <- pf(F.ratio,df1=df,df2=dfE,lower.tail=FALSE)
    
    #Identify sig and insig groups     
    g.keep <- which(pvalues<alpha)  #Sig groups
    g.drop <- which(pvalues>=alpha) #Insig groups
    
    #ANOVA table
    Sources<-c(1:g)
    ANOVA<-cbind(Sources,SS,df,F.ratio,pvalues)
    ANOVA<-rbind(ANOVA,c(NA,SSE,dfE,NA,NA))
    ANOVApool<-ANOVA #Initially set ANOVApool to ANOVA (same if no pooling done)
    
    #################
    #GROUP POOLING#
    #################
        if(length(g.drop)>0 & pool==TRUE){
          #Backward elimination pooling
          if(BackElim==TRUE){ 
            while(length(g.drop)>0){  #Always pool insignificant factors
              g.d<-which.max(pvalues) #Group to drop has highest pvalue
              SSEnew<-SSE+SS[g.d]        #Due to orthogonality just add SS
              dfEnew<-dfE+df[g.d]        #Add df
              MSEnew<-SSEnew/dfEnew            #Recalculate MSE
              #Should we pool?
              poolcrit<-(MSEnew/MSE)<(qf(1-alpha,df[g.d],dfE)/qf(1-alpha,df[g.d],dfEnew))
              if(poolcrit){SSE<-SSEnew;dfE<-dfEnew;MSE<-MSEnew}
              Sources<-Sources[-g.d]  #Drop g.d from Sources
              SS     <-SS[-g.d]       #Drop SS for g.d
              df     <-df[-g.d]       #Drop df for g.d
              F.ratio <- (SS/df)/c(MSE) #Recalc F
              pvalues <- pf(F.ratio,df1=df,df2=dfE,lower.tail=FALSE) #Recalc p
              g.drop <-Sources[which(pvalues>=alpha)] #Recalc groups to drop  
            }
            g.keep=Sources          #Update g.keep for what's left over
            g.drop<-c(1:g)[-g.keep] #Update g.drop to be everything not in g.keep
            ANOVApool<-cbind(Sources=g.keep,SS,df,F.ratio,pvalues) #Update ANOVApool
            ANOVApool<-rbind(ANOVApool,c(NA,SSE,dfE,NA,NA))        #Update ANOVApool with error info
            
          }else{ #All pooling
            SSE <- SSE+sum(SS[g.drop])
            dfE <- dfE+sum(df[g.drop])
            MSE <- SSE/dfE
            SS  <-SS[g.keep]
            df  <-df[g.keep]
            F.ratio <- (SS/df)/c(MSE)
            pvalues <- pf(F.ratio,df1=df,df2=dfE,lower.tail=FALSE)
            ANOVApool<-cbind(Sources=g.keep,SS,df,F.ratio,pvalues)
            ANOVApool<-rbind(ANOVApool,c(NA,SSE,dfE,NA,NA))
          }
          
        }#END POOLING#  
  
  
  #####################
  #FACTOR SCREENING
  #####################
    
      model=c()
      
      #Check if anything is significant    
      if(length(g.keep)==0){
        return(list(ANOVA=ANOVA,ANOVApool=ANOVApool,model=model,g.keep,MSE1=MSE1,MSE=MSE))
      }else{ #Individual Factor Screening
        
        #Individual factor analysis by group in g.keep
        #Sort g.keep groups by least sig (low F) to most sig (high F)
        if(pool==TRUE){g.keep <- g.keep[sort(F.ratio,index.return=TRUE,decreasing=FALSE)$ix]}
        
        #Go through each group one by one, increasing SS order
        for(i in g.keep){
          Xsub <- Xg[[i]] 
          g.ms <- length(groups[[i]]) #number of factors
          g.r <- c(rankMatrix(Xsub))  #rank
          
          if(Jones[[1]]){ #if Jones method = TRUE, do up to rank g.r
            stop<-g.r  
          }else{ #if MaxPower method, do up to floor(g.r/2), add 1 for while statement
            stop<-max(1,floor(g.r/2))+1  
          }
          
          j <- 1   #Counts model size  
          pF1 <- 0 #Initializes p-value check for lack of fit test
          bestmod<-c() #keeps track of bestmod
          
          if(g.r>1){ #Need rank >1, handles special confounding cases
              while(pF1<alpha2 & j<stop){
                
                ######################
                # Generate candidate models of size j
                ######################
                if(Jones[1] & !Jones[2]){ #Forward selection for Jones method
                  if(length(bestmod)>0){
                    mods <- combn((1:g.ms)[-bestmod],j-length(bestmod))  #forward selection
                    mods <- rbind(
                      matrix(rep(bestmod,ncol(mods)),ncol=ncol(mods)),
                      mods)
                  }else{ #if j=1 do this
                    mods <- combn((1:g.ms),1)
                  }
                }else{ #All subsets
                  mods <- combn(1:g.ms,j)      #Generates all possible models of size j
                }
                
                ######################
                # Calculate RSS and lack-of-fit F ratios
                ######################
                RFs<-apply(mods,2,function(x){
                  SSMs<-t(y)%*%(Proj(as.matrix(Xsub[,x])))%*%y
                  Rs<-t(y)%*%(Proj(Xsub)-Proj(as.matrix(Xsub[,x])))%*%y
                  Fs<-(Rs/(g.r-j))/MSE
                  return(c(Rs,Fs,SSMs))
                }
                )
                
                if(MaxPower){
                  #Find which models are below critical value for lack of fit
                  Fcrit<-qf(alpha2,g.r-j,dfE,lower.tail=FALSE)
                  cmods<-which(RFs[2,]<Fcrit)
                  #terms
                  bestmod<-unique(c(mods[,cmods]))
                  select <- which.min(RFs[1,])      #Which model has smallest Rs, used later
                }else{ #Jones method picks smallest Rss model
                  select <- which.min(RFs[1,])      #Which model has smallest Rs
                  bestmod <- mods[,select]          #Best model  
                }
                
                #Determines whether more testing needed by looking at smallest Rss
                pF1 <- pf(RFs[2,select],g.r-j,dfE,lower.tail=FALSE) #Recalc pF1
                
                j <- j+1 #Go to next model size  
                
              } #End while loop through all model sizes for given group
              
              #Determine whether all factors should be pooled together
              if(pF1<alpha2 & j==stop){
                bestmod<-1:g.ms
              } #All factors included if rank = g.r
          }else{ #from g.r>1 check, so if g.r=1 we accept whole group
            bestmod<-1:g.ms
          }
          model<- c(model,groups[[i]][bestmod]) #Update model with those selected
          
          if(length(bestmod)==1) {
            sub.r=1
          }else{
            sub.r<-c(rankMatrix(Xsub[,bestmod])) #Rank of best model
          }
          
          if(sub.r < g.r & FactorPool==TRUE){ #Add more df for error for next group
            SSEnew <- SSE+RFs[1,select]
            dfEnew <- dfE+g.r-sub.r
            MSEnew <- SSEnew/dfEnew
            #Should we pool?
            poolcrit<-(MSEnew/MSE)<(qf(1-alpha2,g.r-sub.r,dfE)/qf(1-alpha2,g.r-sub.r,dfEnew))
            if(poolcrit){SSE<-SSEnew;dfE<-dfEnew;MSE<-MSEnew}
          }
        }#End factor screening loop
    return(list(ANOVA=ANOVA,ANOVApool=ANOVApool,model=sort(model),sort(g.keep),MSE1=MSE1,MSE=MSE))
    
  }#END INDIVIDUAL FACTOR SCREENING IF
  
} #End GScreen function
