@@ -516,3 +516,314 @@ t_svd_reconstruct <- function(L){
516516resid <- fnorm(tnsr - est )
517517invisible (list (core = as.tensor(core ),est = est ,fnorm_resid = resid ,norm_percent = (1 - resid / fnorm(tnsr ))* 100 ))
518518}
519+
520+ # ' sparse (semi-)nonnegative Tucker decomposition
521+ # '
522+ # ' Decomposes nonnegative tensor \code{tnsr} into core optionally nonnegative tensor \code{Z} and sparse nonnegative factor matrices \code{U[n]}.
523+ # '@export
524+ # '@param tnsr nonnegative tensor with \code{K} modes
525+ # '@param ranks an integer vector of length \code{K} specifying the modes sizes for the output core tensor \code{Z}
526+ # '@param core_nonneg constrain core tensor \code{Z} to be nonnegative
527+ # '@param tol relative Frobenius norm error tolerance
528+ # '@param max_iter maximum number of iterations if error stays above \code{tol}
529+ # '@param max_time max running time
530+ # '@param lambda \code{K+1} vector of sparsity regularizer coefficients for the factor matrices and the core tensor
531+ # '@param L_min lower bound for Lipschitz constant for the gradients of residual error \eqn{l(Z,U) = fnorm(tnsr - ttl(Z, U))} by \code{Z} and each \code{U}
532+ # '@param rw controls the extrapolation weight
533+ # '@param bound upper bound for the elements of \code{Z} and \code{U[[n]]} (the ones that have zero regularization coefficient \code{lambda})
534+ # '@param U0 initial factor matrices, defaults to nonnegative Gaussian random matrices
535+ # '@param Z0 initial core tensor \code{Z}, defaults to nonnegative Gaussian random tensor
536+ # '@param verbose more output algorithm progress
537+ # '@param unfold_tnsr precalculate \code{tnsr} to matrix unfolding by every mode (speeds up calculation, but may require lots of memory)
538+ # '@return a list:\describe{
539+ # '\item{\code{U}}{nonnegative factor matrices}
540+ # '\item{\code{Z}}{nonnegative core tensor}
541+ # '\item{\code{est}}{estimate \eqn{Z \times_1 U_1 \ldots \times_K U_K}}
542+ # '\item{\code{conv}}{method convergence indicator}
543+ # '\item{\code{resid}}{the Frobenius norm of the residual error \code{l(Z,U)} plus regularization penalty (if any)}
544+ # '\item{\code{n_iter}}{number of iterations}
545+ # '\item{\code{n_redo}}{number of times Z and U were recalculated to avoid the increase in objective function}
546+ # '\item{\code{diag}}{convergence info for each iteration\describe{
547+ # '\item{\code{all_resids}}{residues}
548+ # '\item{\code{all_rel_resid_deltas}}{residue delta relative to the current residue}
549+ # '\item{\code{all_rel_resids}}{residue relative to the \code{sqrt(||tnsr||)}}
550+ # '}}}
551+ # '
552+ # '@details The function uses the alternating proximal gradient method to solve the following optimization problem:
553+ # ' \deqn{\min 0.5 \|tnsr - Z \times_1 U_1 \ldots \times_K U_K \|_{F^2} +
554+ # ' \sum_{n=1}^{K} \lambda_n \|U_n\|_1 + \lambda_{K+1} \|Z\|_1, \;\text{where}\; Z \geq 0, \, U_i \geq 0.}
555+ # ' If \code{core_nonneg} is \code{FALSE}, core tensor \code{Z} is allowed to have negative
556+ # ' elements and \eqn{z_{i,j}=max(0,z_{i,j}-\lambda_{K+1}/L_{K+1})} rule is replaced by \eqn{z_{i,j}=sign(z_{i,j})max(0,|z_{i,j}|-\lambda_{K+1}/L_{K+1})}.
557+ # ' The method stops if either the relative improvement of the error is below the tolerance \code{tol} for 3 consequitive iterations or
558+ # ' both the relative error improvement and relative error (wrt the \code{tnsr} norm) are below the tolerance.
559+ # ' Otherwise it stops if the maximal number of iterations or the time limit were reached.
560+ # '
561+ # '@note The implementation is based on ntds() MATLAB code by Yangyang Xu and Wotao Yin.
562+ # '@references Y. Xu, "Alternating proximal gradient method for sparse nonnegative Tucker decomposition", Math. Prog. Comp., 7, 39-70, 2013.
563+ # '@seealso \code{\link{tucker}}
564+ # '@seealso \url{http://www.caam.rice.edu/~optimization/bcu/}
565+ tucker.nonneg <- function (tnsr ,ranks ,core_nonneg = TRUE ,
566+ tol = 1e-4 ,hosvd = FALSE ,
567+ max_iter = 500 ,max_time = 0 ,
568+ lambda = rep.int(0 , length(ranks )+ 1 ),L_min = 1 ,rw = 0.9999 ,
569+ bound = Inf ,
570+ U0 = NULL ,Z0 = NULL ,verbose = FALSE ,
571+ unfold_tnsr = length(dim(tnsr ))* prod(dim(tnsr ))< 4000 ^ 2 )
572+ {
573+ # progress bar
574+ start_time <- proc.time()
575+ pb <- txtProgressBar(min = 0 ,max = max_iter ,style = 3 )
576+
577+ make_nonneg.tnsr <- function (tnsr )
578+ {
579+ tnsr @ data [tnsr @ data < 0 ]<- 0
580+ return (tnsr )
581+ }
582+ make_nonneg.mtx <- function (mtx )
583+ {
584+ mtx [mtx < 0 ]<- 0
585+ return (mtx )
586+ }
587+ # update core tensor
588+ # return new residual error
589+ makeZStep <- function (curZ )
590+ {
591+ gradZ <- ttl(curZ ,Usq , seq_len(K ))- TtU
592+ # update core vector
593+ Z <<- curZ - gradZ / L [[K + 1 ]]
594+ if (lambda [[K + 1 ]]> 0 ) {
595+ if (core_nonneg ) {
596+ Z <<- Z - lambda [[K + 1 ]]/ L [[K + 1 ]]
597+ }else {
598+ Z @ data <<- sign(Z @ data )* pmax(0 , abs(Z @ data )- lambda [[K + 1 ]]/ L [[K + 1 ]] )
599+ }
600+ }
601+ if (core_nonneg )Z <<- make_nonneg.tnsr(Z )
602+ # do projection
603+ if (doproj [[K + 1 ]] ) {
604+ mask <- abs(Z @ data )> bound
605+ Z @ data [mask ]<<- sign(Z @ data [mask ])* bound
606+ }
607+ return (invisible () )
608+ }
609+
610+ # update n-th factor matrix (U[[n]])
611+ # return new residual error
612+ makeUnStep <- function (curU ,n )
613+ {
614+ if (! is.null(Tmtx ) ) {
615+ B <- unfold( ttl(Z ,U [- n ], seq_len(K )[- n ] ),n , seq_len(K )[- n ] )
616+ Bsq <- tcrossprod(B @ data )
617+ TB <- tcrossprod(Tmtx [[n ]],B @ data )
618+ }else {
619+ B <- unfold( ttl(Z ,Usq [- n ], seq_len(K )[- n ] ),n , seq_len(K )[- n ] )
620+ TB <- unfold( ttl(tnsr ,U [- n ], seq_len(K )[- n ],transpose = TRUE ),n , seq_len(K )[- n ] )
621+ Zn <- unfold(Z ,n , seq_len(K )[- n ] )
622+
623+ Bsq <- tcrossprod(B @ data ,Zn @ data )
624+ TB <- tcrossprod(TB @ data ,Zn @ data )
625+ }
626+ # compute the gradient
627+ gradU <- curU %*% Bsq - TB
628+ # update Lipschitz constant
629+ L0 [[n ]]<<- L [[n ]]
630+ L [[n ]]<<- max(L_min , norm(Bsq ,' 2' ) )
631+ # update n-th factor matrix
632+ newU <- make_nonneg.mtx(curU - (gradU + lambda [[n ]])/ L [[n ]] )
633+ if (doproj [[n ]] )newU [newU > bound ]<- bound
634+
635+ # update U[[n]]
636+ U [[n ]]<<- newU
637+ Usq [[n ]]<<- crossprod(U [[n ]] )
638+ nrmUsq [[n ]]<<- norm(Usq [[n ]],' 2' )
639+
640+ # --- diagnostics, reporting, stopping checks ---
641+ newResid <- 0.5 * (sum(Usq [[n ]]* Bsq )- 2 * sum(U [[n ]]* TB )+ Tnrm ^ 2 )
642+ if (sparse.reg ) {
643+ newResid <- newResid + lambda %*% c( sapply(U ,sum ), sum(abs(Z @ data )) )
644+ }
645+ return (newResid )
646+ }
647+
648+ Kway <- dim(tnsr )# dimension of tnsr
649+ K <- length(Kway )# tnsr is an K-way tensor
650+
651+ if ( is.null(U0 ) ) {
652+ if (verbose ) message(' Generating random initial factor matrices estimates...' )
653+ U0 <- lapply( seq_len(K ),function (n ) make_nonneg.mtx(matrix ( rnorm(Kway [[n ]]* ranks [[n ]] ),ncol = ranks [[n ]] ) ) )
654+ }
655+ if ( is.null(Z0 ) ) {
656+ if (verbose ) message(' Generating random initial core tensor estimate...' )
657+ Z0 <- rand_tensor(modes = ranks ,drop = FALSE )
658+ }
659+ if (core_nonneg )Z0 <- make_nonneg.tnsr(Z0 )
660+
661+ # pre-process the starting point
662+ if (hosvd ) {
663+ if (verbose ) message(' Applying High Order SVD to improve initial U and Z...' )
664+ # "solve" Z = tnsr x_1 U_1' ... x_K U_K'
665+ U0 <- lapply( seq_len(K ),function (n ) {
666+ U0n_tilde <- unfold( ttl(tnsr ,U0 [- n ], seq_len(K )[- n ],transpose = TRUE ),
667+ row_idx = n ,col_idx = seq_len(K )[- n ] )@ data
668+ U0n_vecs <- svd(U0n_tilde ,nu = ranks [[n ]],nv = 0 )$ u
669+ U0n <- matrix ( unlist( lapply(U0n_vecs ,function (Uvec ) {
670+ # make the largest absolute element positive
671+ i <- which.max( abs(Uvec ) )
672+ if (Uvec [[i ]]< 0 )Uvec <- - Uvec
673+ # project to > 0
674+ Uvec <- pmax(.Machine $ double.eps ,Uvec )
675+ } ) ),ncol = ranks [[n ]] )
676+ return (U0n / sum(U0n ) )
677+ } )
678+ Z0 <- ttl(tnsr ,U0 , seq_len(K ),transpose = TRUE )
679+ }
680+ # check the existence of sparseness regularizer
681+ sparse.reg <- any(lambda > 0 )
682+ # add bound constraint for well-definedness
683+ doproj <- lambda == 0 & is.finite(bound )
684+
685+ Tnrm <- fnorm(tnsr )
686+
687+ # rescale the initial point according to the number of elements
688+ Knum <- Kway * ranks
689+ totalNum <- prod(ranks )+ sum(Knum )
690+ U0 <- lapply( seq_along(U0 ),function (n )U0 [[n ]]/ norm(U0 [[n ]]," F" )* Tnrm ^ (Knum [[n ]]/ totalNum ) )
691+ Usq0 <- lapply(U0 ,crossprod )
692+ nrmUsq <- sapply(Usq0 ,norm ,' 2' )
693+ Z0 <- Z0 / fnorm(Z0 )* Tnrm ^ (prod(ranks )/ totalNum )
694+
695+ resid0 <- 0.5 * fnorm(tnsr - ttl(Z0 ,U0 ,seq_len(K )) )^ 2
696+ if (sparse.reg )resid0 <- resid0 + lambda %*% c( sapply(U0 ,sum ), sum(abs(Z0 @ data )) )
697+ resid <- resid0
698+
699+ # Precompute matrix unfoldings of input tensor to save computing time if it is not too large
700+ if (unfold_tnsr ) {
701+ if (verbose ) message(' Precomputing input tensor unfoldings...' )
702+ Tmtx <- lapply( seq_len(K ),function (n ) unfold(tnsr ,row_idx = n ,col_idx = seq_len(K )[- n ] )@ data )
703+ }else {
704+ if (verbose ) message(' No precomputing of tensor unfoldings' )
705+ Tmtx <- NULL
706+ }
707+
708+ # Iterations of block-coordinate update
709+ # iteratively updated variables:
710+ # GradU: gradients with respect to each component matrix of U
711+ # GradZ: gradient with respect to Z
712+ # U,Z: new updates
713+ # U0,Z0: old updates
714+ # Um,Zm: extrapolations of U
715+ # L, L0: current and previous Lipschitz bounds
716+ # resid, resid0: current and previous residual error
717+ U <- U0
718+ Um <- U0
719+ Usq <- Usq0
720+ Z <- Z0
721+ Zm <- Z0
722+
723+ t0 <- rep.int(1 ,K + 1 )
724+ t <- t0
725+ wU <- rep.int(0 ,K + 1 )
726+ L0 <- rep.int(1 ,K + 1 )
727+ L <- L0
728+
729+ all_resids <- numeric (0 )
730+ all_rel_resid_deltas <- numeric (0 )
731+ all_rel_resids <- numeric (0 )
732+ n_stall <- 0
733+ n_redo <- 0
734+ conv <- FALSE
735+
736+ # do the iterations
737+ if (verbose ) message(' Starting iterations...' )
738+ for (n_iter in seq_len(max_iter )) {
739+ setTxtProgressBar(pb ,n_iter )
740+
741+ residn0 <- resid
742+ TtU0 <- list ( ttm(tnsr ,U0 [[1 ]],1 ,transpose = TRUE ) )
743+ for (n in 2 : K ) {
744+ TtU0 [[n ]]<- ttm(TtU0 [[n - 1 ]],U0 [[n ]],n ,transpose = TRUE )
745+ }
746+
747+ for (n in seq.int(from = K ,to = 1 ) ) {
748+ # -- update the core tensor Z --
749+ L0 [[K + 1 ]]<- L [[K + 1 ]]
750+ L [[K + 1 ]]<- pmax(L_min , prod(nrmUsq ) )
751+
752+ # try to make a step using extrapolated decompositon (Zm,Um)
753+ TtU <- if (n < K ) ttl(TtU0 [[n ]],U [(n + 1 ): K ], (n + 1 ): K ,transpose = TRUE )else TtU0 [[K ]]
754+ makeZStep(Zm )
755+ residn <- makeUnStep(Um [[n ]],n )
756+ if (residn > residn0 ) {
757+ # extrapolated Zm,Um decomposition lead to residual norm increase,
758+ # revert extrapolation and make a step using Z0,U0 to ensure
759+ # objective function is decreased
760+ n_redo <- n_redo + 1
761+ # re-update to make objective nonincreasing
762+ Usq [[n ]]<- Usq0 [[n ]]# Z update needs it
763+ makeZStep(Z0 )
764+ residn <- makeUnStep(U0 [[n ]],n )
765+ if (residn > residn0 ) warning(n_iter ,' : residue increase at redo step' )
766+ }
767+ # --- correction and extrapolation ---
768+ t [[n ]]<- (1 + sqrt(1 + 4 * t0 [[n ]]^ 2 ))/ 2
769+ # choose smaller weight of U[[n]] for convergence
770+ wU [[n ]]<- min( (t0 [[n ]]- 1 )/ t [[n ]],rw * sqrt(L0 [[n ]]/ L [[n ]]) )
771+ Um [[n ]]<- U [[n ]]+ wU [[n ]]* (U [[n ]]- U0 [[n ]] )
772+ t [[K + 1 ]]<- (1 + sqrt(1 + 4 * t0 [[K + 1 ]]^ 2 ))/ 2
773+ # choose smaller weight of Z for convergence
774+ wU [[K + 1 ]]<- min( (t0 [[K + 1 ]]- 1 )/ t [[K + 1 ]],rw * sqrt(L0 [[K + 1 ]]/ L [[K + 1 ]]) )
775+ Zm <- Z + wU [K + 1 ]* (Z - Z0 )
776+
777+ # store the current update
778+ Z0 <- Z
779+ U0 [[n ]]<- U [[n ]]
780+ Usq0 [[n ]]<- Usq [[n ]]
781+ t0 [c(n ,K + 1 )]<- t [c(n ,K + 1 )]
782+ residn0 <- residn
783+ }
784+
785+ # --- diagnostics, reporting, stopping checks ---
786+ resid0 <- resid
787+ resid <- max(0 ,residn )
788+
789+ rel_resid_delta <- abs(resid - resid0 )/ (resid0 + 1 )
790+ rel_resid <- sqrt(2 * resid )/ Tnrm
791+
792+ # reporting
793+ all_resids <- append(all_resids ,resid )
794+ all_rel_resid_deltas <- append(all_rel_resid_deltas ,rel_resid_delta )
795+ all_rel_resids <- append(all_rel_resids ,rel_resid )
796+
797+ # check stopping criterion
798+ crit <- rel_resid_delta < tol
799+ n_stall <- ifelse(crit ,n_stall + 1 ,0 )
800+ if (n_stall > = 3 || rel_resid < tol ) {
801+ if (verbose ) {
802+ if (rel_resid == 0 ) message(' Residue is zero. Exact decomposition was found' )
803+ if (n_stall > = 3 ) message(' Residue relative delta below' ,tol ,' ' ,n_stall ,' times in a row' )
804+ if (rel_resid < tol ) message(' Residue is' ,rel_resid ,' times below input tensor norm' )
805+ message(' tucker.nonneg() converged in' ,n_iter ,' iteration(s),' ,n_redo ,' redo steps' )
806+ }
807+ conv = TRUE
808+ break
809+ }
810+ if (max_time > 0 && ( proc.time()- start_time )[[3 ]]> max_time ) {
811+ warning(" Maximal time exceeded, might be not an optimal solution" )
812+ break
813+ }
814+ }
815+ setTxtProgressBar(pb ,max_iter )
816+ close(pb )
817+ if (! conv && n_iter == max_iter ) {
818+ warning(" Maximal number of iterations reached, might be not an optimal solution" )
819+ }
820+
821+ return (invisible (list (U = U ,Z = Z ,est = ttl(Z ,U ,seq_len(K )),
822+ n_iter = n_iter ,
823+ n_redo = n_redo ,
824+ conv = conv ,
825+ resid = resid ,
826+ diag = list (all_resids = all_resids ,
827+ all_rel_resid_deltas = all_rel_resid_deltas ,
828+ all_rel_resids = all_rel_resids ) ) ) )
829+ }