#Penalty functions
mcp <- function(lam, x){max(c(3.7*lam - abs(x), 0))/(3.7*lam)}
scad <- function(lam,x){((abs(x) <= lam) + max(3.7*lam - abs(x), 0)*(abs(x) > lam)/(2.7*lam))}
sica <- function(x,a){a*(a + 1)/(a + abs(x))^2}

#Generate ortho bases
obs.deriv <- function(x, df, deriv=0){
  knot.l = rep(min(x), 4)
  knot.r = rep(max(x), 4)
  knot.int = quantile(x, seq(0, 1, length = (df - 2))[-c(1, df - 2)])
  if(deriv==0){
    obase=OBasis(c(knot.l, knot.int, knot.r))
  }else{if(deriv==1)
    obase=deriv(OBasis(c(knot.l, knot.int, knot.r))) 
  }
  obs=array(0,c(length(x),df))
  obs[order(x),] = evaluate(obase, x[order(x)])
}

#Transform X into Theta's
get.Theta <- function(X,B){
  
  Theta=list()
  p=length(X)
  for (j in 1:p){
    Theta[[j]]=t(apply(X[[j]],1,function(x){solve(t(B)%*%B)%*%t(B)%*%x})) 
    #Theta[[j]]=STND_SCALE(Theta[[j]])
  }
  
  return(Theta)
}

#Ridge projection (for fitting algorithm)
ridgedProj <- function(U,D=1,y){
  iprod <- rep(0,ncol(U))
  p <- rep(0,length(y))
  
  iprod <- D*(t(U) %*% y)
  p <- U %*% iprod
  return(p)
}

#Fit for single lambda value, assumes X transformations already done (here X = Theta)
ridgedSingleFit <- function(y, index, pena, lambda = 1, thresh = 0.0001, maxit = 1000, Us, Ds,Vs,oldFit, resid, isActive, alpha=1, is.pen){
  
  numGroups <- length(unique(index))
  groupLen <- rep(0,numGroups)
  for(i in 1:numGroups){
    groupLen[i] <- length(which(index == unique(index)[i]))
  }
  
  newD <- list(c(1,2,3))  # stupid workaround...
  dfs <- rep(0,numGroups)
  
  #The newD is somehow creating a ridged penalty for smoothness
  
  for(j in 1:numGroups){
    newD[[j+1]]<- Ds[[j]]^2/(Ds[[j]]^2 + (1-alpha)*lambda*is.pen[j])
    dfs[j] <- sum(newD[[j+1]])
  }
  for(j in 1:numGroups){  # stupid workaround...
    newD[[j]] <- newD[[j+1]]
  }
  
  diff <- 1
  iter <- 1
  innerConverged <- 1
  useGroups <- 1:numGroups
  groupChange <- 1
  qqq <- 0
  
  while(groupChange == 1){
    
    groupChange <- 0
    innerConverged <- 1
    diff <- 1
    
    while(diff > thresh && iter < maxit){
      iter <- iter + 1
      diff <- 0
      
      if(innerConverged == 1){
        useGroups <- 1:numGroups
      }
      if(innerConverged == 0){
        useGroups <- which(isActive == 1)
      }
      
      for(i in useGroups){
        resid <- resid + oldFit[,i]
        
        if(pena=="grLASSO"){penaShrink <-  1}
        temp=sqrt(sum(oldFit[,i])^2)
        if(pena=="grSCAD"){penaShrink <-  scad(lambda/length(y),temp)}
        
        proj <- ridgedProj(Us[[i]], newD[[i]], resid)
        normProj <- sqrt(sum((proj)^2))
        #
        shrinkage <-  max(c((1-sqrt(dfs[i])*lambda*alpha*penaShrink*is.pen[i] / normProj),0))
        newFit <- shrinkage * proj
        resid <- resid - newFit
        diff <- diff + sum(abs(oldFit[,i] - newFit))
        
        if(max(abs(oldFit[,i])) == 0 && max(abs(newFit)) > 0){
          isActive[i] <- 1
          groupChange <- 1
        }
        oldFit[,i] <- newFit
      }
      innerConverged <- 0
      diff <- diff / length(y)
    }
  }
  
  Etas <- list()
  
  for(i in 1:numGroups)
  {
    fit.ind <- which(abs(Ds[[i]]) > 10^(-5))
    eta <- Vs[[i]][,fit.ind] %*% ((1/Ds[[i]][fit.ind]) * t(Us[[i]][,fit.ind]) %*% oldFit[,i]) 
    Etas[[i]] <-  eta
  }
  Etas<-unlist(Etas)
  return(list(Etas=Etas, oldFit = oldFit, resid = resid, isActive = isActive))
}

#Fit for lambda path, assumes X transformations already done (here X = Theta)
ridgedGL <- function(y, Theta, index, pena, lam.path, thresh = 10^(-4), maxit = 1000, alpha=1,is.pen = rep(1,length(index))){
  
  numGroups <- length(unique(index))
  groupLen <- rep(0,numGroups)
  for(i in 1:numGroups){
    groupLen[i] <- length(which(index == unique(index)[i]))
  }
  
  Us <- list()
  Vs <- list()
  Ds <- list()
  
  for(i in 1:numGroups)
  {
    groupiMat <- Theta[[i]]
    svdDecomp <- svd(groupiMat)
    Us[[i]] <- svdDecomp$u
    Vs[[i]] <- svdDecomp$v
    Ds[[i]] <- svdDecomp$d
  }
  
  oldFit <- matrix(0, ncol = length(index), nrow = length(y))
  resid <- y
  
  Etas <- matrix(0, ncol = length(lam.path), nrow = length(index))
  
  isActive = rep(0, numGroups)
  
  for(i in length(lam.path):1){
    
    if(pena == "grLASSO"){
      fit <- ridgedSingleFit(y, index, pena, lam.path[i], Us = Us, Vs = Vs, Ds = Ds, oldFit = oldFit, resid = resid, isActive = isActive, alpha = alpha, is.pen = is.pen)
      
      resid <- fit$resid
      oldFit <- fit$oldFit
      isActive <- fit$isActive
      Etas[,i] <- fit$Etas
    }
    
    if(pena =="grSCAD"){
      #get initial estimates
      fit <- ridgedSingleFit(y, index, pena="grLASSO", lam.path[i], Us = Us, Vs = Vs, Ds = Ds, oldFit = oldFit, resid = resid, isActive = isActive, alpha=alpha, is.pen = is.pen)
      resid <- fit$resid
      oldFit <- fit$oldFit
      isActive <- fit$isActive
      #run with grSCAD
      fit <- ridgedSingleFit(y, index, pena, lam.path[i], Us = Us, Vs = Vs, Ds = Ds, oldFit = oldFit, resid = resid, isActive = isActive, alpha=alpha, is.pen = is.pen)
      resid <- fit$resid
      oldFit <- fit$oldFit
      isActive <- fit$isActive
      Etas[,i] <- fit$Etas
    }
    
    
  }
  return(list(Etas = Etas, lam.path = lam.path))
}

#Linear FAR (does all necessary transformations and ridgedGL)
#y is a vector
#X is a list of functional covariates
#pena = grLASSO or grSCAD
#time are unique points of X curve
#df.beta is number of basis functions

farfunc <- function(y, X=NULL, Theta=NULL,pena, time, df.beta, 
                    intercept=TRUE,alpha=1,lam.path,pos.effect=NA) {
  #generate spline basis\n
  B = obs.deriv(x=time, df=df.beta) #generate orthonormal spline basis
  
  n = length(y) # number of oberservations\n
  
  #If Theta not provided, calculate it, otherwise set W=XTheta
  if(is.null(Theta)){
    Theta = get.Theta(X, B)
  }
  
  p=length(Theta)
  index.beta = ceiling(1:(df.beta * p)/df.beta) #index set showing the group info\n
  
  if(length(pos.effect)>1){
    Theta[["pos"]]=pos.effect
    index.beta =c(index.beta,rep(p+1,dim(pos.effect)[2]))
  }
  

  mu.y=mean(y); 
  if(intercept==FALSE){mu.y=0}
  y=y-mu.y
  ypred.val=0
  
  far.fit = ridgedGL(y=y, Theta=Theta, index=index.beta, pena=pena, alpha=alpha, lam.path=lam.path)
  
  Ksig <- apply(far.fit$Etas,2,function(x){ceiling(sum(abs(x)>0)/df.beta)})
  
  obj = list(Theta=Theta,Etas=far.fit$Etas,Ksig=Ksig,lambda=lam.path)
  return(obj)
}

#CV
cv.farfunc=function(y,X,K,pena, time, df.beta, lam.path=NULL,foldid,alpha=1,
                    intercept=TRUE,plot=TRUE,cvmult=1,pos.effect=NA){
  B = obs.deriv(x=time, df=df.beta) #generate orthonormal spline basis
  Theta = get.Theta(X, B)
  
  #Create foldid blocks
  blocks<-lapply(1:max(foldid),function(x){which(foldid==x)})
  CV.betas=list()
  Pred<-matrix(NA,nrow=max(foldid),ncol=length(lam.path))
  SigG<-matrix(NA,nrow=max(foldid),ncol=length(lam.path))
  for(i in 1:max(foldid)){
    Ysub=y[-blocks[[i]]]
    XSub=lapply(X,function(x){x[-blocks[[i]],]})
    pos.sub<-NA
    if(length(pos.effect)>1){
      pos.sub=pos.effect[-blocks[[i]],] 
    }
    CV.betas[[i]]=farfunc(y=Ysub,X=XSub,
                          pena=pena, time=time, df.beta=df.beta, lam.path=lam.path, alpha=alpha,
                          intercept=FALSE,pos.effect=pos.sub)
    ThetaPred=do.call(cbind,lapply(Theta,function(x){x[blocks[[i]],]}))
    if(length(pos.effect)>1){
      ThetaPred<-cbind(ThetaPred,pos.effect[blocks[[i]],])
    }
    Pred[i,]=apply(CV.betas[[i]]$Etas,2,function(x){mean(abs(y[blocks[[i]]]-ThetaPred%*%x))})#Min absolute residuals
    SigG[i,]=CV.betas[[i]]$Ksig
  }
  
  #Calculate prediction errors, means, sds
  sol=list()
  sol$cvm<-apply(Pred,2,mean)
  sol$cvsd<-apply(Pred,2,sd)/sqrt(max(foldid))
  sol$lambda<-lam.path
  sol$df<-apply(SigG,2,max)
  
  Tune<-cbind(sol$lambda,sol$cvm,sol$cvsd,sol$df)
  
  if(plot){
  plot(log(sol$lambda),sol$cvm,ylim=c(0,(max(sol$cvm+sol$cvsd))),
       xlab="Log(Lambda)",ylab="CV Mean")
  lines(log(sol$lambda),y=(sol$cvm+sol$cvsd),col=2)
  lines(log(sol$lambda),y=(sol$cvm-sol$cvsd),col=2)
  }
  
  #Use SE rule
  sol$lambda.min=sol$lambda[which.min(sol$cvm)]
  iCand<-which(sol$cvm<=(sol$cvm+cvmult*sol$cvsd)[which(sol$lambda==sol$lambda.min)] & (sol$lambda >= sol$lambda.min))
  sol$lambda.1se<-sol$lambda[max(iCand)]
  iCand<-which.min(sol$cvm)
  
  #Final fit
  FARfinal=farfunc(y=y,X=X,
                   pena=pena, time=time, df.beta=df.beta, lam.path=lam.path, alpha=alpha,
                   intercept=FALSE,pos.effect=pos.effect)
  
  #fits<-apply(FARfinal$Etas,2,function(x){mean((y-do.call(cbind,Theta)%*%x)^2)})
  #if(plot){
  #plot(log(lam.path),fits,ylim=c(0,(max(sol$cvm+sol$cvsd))),
  #     xlab="Log(Lambda)",ylab="CV Mean")
  #abline(v=log(lam.path[iCand]),col=2,lwd=2)
  #}
  
  #EMG Coefficients
  Betas <- kronecker(diag(1,length(Theta)),B)%*%FARfinal$Etas[1:(df.beta*length(X)),iCand]
  Etas  <- matrix(FARfinal$Etas[1:(df.beta*length(X)),iCand],nrow=length(Theta),ncol=df.beta,byrow=TRUE)
  solmat<- matrix(Betas,nrow=length(Theta),ncol=dim(B)[1],byrow=TRUE)
  
  #Find important variables
  Knext<-K[which(diag(crossprod(t(solmat)))>0)]
  #Subset design matrix and solution matrix
  Xnext <- X[Knext]
  Etas <- Etas[which(diag(crossprod(t(solmat)))>0),]
  solmat<-solmat[which(diag(crossprod(t(solmat)))>0),]
  if(length(Knext)==1){solmat<-t(as.matrix(solmat))}
  
  #Pos Effect
  if(length(pos.effect)>1){
    sol.pos<-FARfinal$Etas[-c(1:(df.beta*length(X))),iCand] 
    if(sum(sol.pos^2)==0){
      pos.effect=NA
    }
  }
  
  if(length(pos.effect)>1){
    return(list("Tune"=Tune,"solmat"=solmat,"Ksub"=Knext,"B"=B,"Etas"=Etas,"pos.effect"=pos.effect,"sol.pos"=sol.pos))  
  }else{
    return(list("Tune"=Tune,"solmat"=solmat,"Ksub"=Knext,"B"=B,"Etas"=Etas,"pos.effect"=pos.effect))
  }
  
}

