Description of sympositivedefinitesimplexfactory

0001 function M = sympositivedefinitesimplexfactory(n, k)
0002 % with the bi-invariant geometry such that the sum is the identity matrix.
0003 %
0004 % function M = sympositivedefinitesimplexfactory(n, k)
0005 %
0006 % Given X1, X2, ... Xk symmetric positive definite matrices, the constraint
0007 % tackled is
0008 % X1 + X2 + ... = I.
0009 %
0010 % The Riemannian structure enforced on the manifold
0011 % M:={(X1, X2,...) : X1 + X2 + ... = I } is a submanifold structure of the
0012 % total space defined as the k Cartesian product of symmetric positive
0013 % definite Riemannian manifold (of n-by-n matrices) endowed with the bi-invariant metric.
0014 %
0015 % A point X on the manifold is represented as multidimensional array
0016 % of size n-by-n-by-k. Each n-by-n matrix is symmetric positive definite.
0017 %
0018 % The embedding space is the k Cartesian product of matrices of size
0019 % n-by-n (symmetry not required). The Euclidean gradient and Hessian expressions
0020 % needed for egrad2rgrad and ehess2rhess are in the embedding space
0021 % endowed with the Euclidean metric.
0022 %
0023 % E = (R^(nxn))^k is the embedding space: we have the obvious representation of points
0024 % there as 3D arrays of size nxnxk. It is equipped with the standard Euclidean metric.
0025 %
0026 % P = {X in R^(nxn) : X = X' and X positive definite} is a submanifold of R^(nxn).
0027 % We turn it into a Riemannian manifold (but not a Riemannian submanifold) by equipping
0028 % it with the bi-invariant metric.
0029 %
0030 % M = {X in P^k : X_1 + ... + X_k = I} is the manifold we care about here: it is
0031 % a Riemannian submanifold of P^k, hence it is also a submanifold (but not a Riemannian
0032 % submanifold) of E -- our embedding space.
0033 %
0034 %
0035 % Please cite the Manopt paper as well as the research paper:
0036 %
0037 %     @techreport{mishra2019riemannian,
0038 %       title={Riemannian optimization on the simplex of positive definite matrices},
0039 %       author={Mishra, B. and Kasai, H. and Jawanpuria, P.},
0040 %       institution={arXiv preprint arXiv:1906.10436},
0041 %       year={2019}
0042 %     }
0043 %
0044 % See also sympositivedefinitesimplexcomplexfactory multinomialfactory sympositivedefinitefactory
0045 
0046     % This file is part of Manopt: www.manopt.org.
0047     % Original author: Bamdev Mishra, September 18, 2019.
0048     % Contributors: NB
0049     % Change log: Comments updated, 16 Dec 2019
0050     %             Removed typos in Hessian expression, 01 Nov, 2021
0051     
0052     symm = @(X) .5*(X+X');
0053     
0054     M.name = @() sprintf('%d symmetric positive definite matrices of size %dx%d such that their sum is the identiy matrix.', k, n, n);
0055     
0056     M.dim = @() (k-1)*n*(n+1)/2;
0057     
0058     % Helpers to avoid computing full matrices simply to extract their trace
0059     vec     = @(A) A(:);
0060     trinner = @(A, B) vec(A')'*vec(B);  % = trace(A*B)
0061     trnorm  = @(A) sqrt(trinner(A, A)); % = sqrt(trace(A^2))
0062         
0063    
0064     % Choice of the metric on the orthonormal space is motivated by the
0065     % symmetry present in the space. The metric on the positive definite
0066     % cone is its natural bi-invariant metric.
0067     % The result is equal to: trace( (X\eta) * (X\zeta) )
0068     M.inner = @innerproduct;
0069     function iproduct = innerproduct(X, eta, zeta)
0070         iproduct = 0;
0071         for kk = 1 : k
0072             iproduct = iproduct + trinner(X(:,:,kk)\eta(:,:,kk), X(:,:,kk)\zeta(:,:,kk));
0073         end
0074     end
0075     
0076     % Notice that X\eta is *not* symmetric in general.
0077     % The result is equal to: sqrt(trace((X\eta)^2))
0078     % There should be no need to take the real part, but rounding errors
0079     % may cause a small imaginary part to appear, so we discard it.
0080     M.norm = @innernorm;
0081     function inorm = innernorm(X, eta)
0082         inorm = 0;
0083         for kk = 1:k
0084             inorm = inorm + real(trnorm(X(:,:,kk)\eta(:,:,kk)))^2;
0085         end
0086         inorm = sqrt(inorm);
0087     end
0088     
0089     %     % Same here: X\Y is not symmetric in general.
0090     %     % Same remark about taking the real part.
0091     %     M.dist = @innerdistance;
0092     %     function idistance = innerdistance(X, Y)
0093     %         idistance = 0;
0094     %         for kk = 1:k
0095     %             idistance = idistance + real(trnorm(real(logm(X(:,:,kk)\Y(:,:,kk)))));
0096     %         end
0097     %     end
0098     
0099     M.typicaldist = @() sqrt(k*n*(n+1)/2); % BM: to be looked into.
0100     
0101     
0102     M.egrad2rgrad = @egrad2rgrad;
0103     function rgrad = egrad2rgrad(X, egrad)
0104         egradscaled = nan(size(egrad));
0105         for kk = 1:k
0106             egradscaled(:,:,kk) = X(:,:,kk)*symm(egrad(:,:,kk))*X(:,:,kk);
0107         end
0108         
0109         % Project onto the set X1dot + X2dot + ... = 0.
0110         % That is rgrad = Xk*egradk*Xk + Xk*Lambdasol*Xk
0111         rgrad = M.proj(X, egradscaled);
0112         
0113         %   % Debug
0114         %   norm(sum(rgrad,3), 'fro') % BM: this should be zero.
0115     end
0116     
0117     
0118     M.ehess2rhess = @ehess2rhess;
0119     function Hess = ehess2rhess(X, egrad, ehess, eta)
0120         Hess = nan(size(X));
0121         
0122         egradscaled = nan(size(egrad));
0123         egradscaleddot = nan(size(egrad));
0124         for kk = 1:k
0125             egradk = symm(egrad(:,:,kk));
0126             ehessk = symm(ehess(:,:,kk));
0127             Xk = X(:,:,kk);
0128             etak = eta(:,:,kk);
0129 
0130             egradscaled(:,:,kk) = Xk*egradk*Xk;
0131             egradscaleddot(:,:,kk) = Xk*ehessk*Xk + 2*symm(etak*egradk*Xk);
0132         end
0133 
0134         % Compute Lambdasol
0135         RHS = - sum(egradscaled,3);
0136         [Lambdasol] = mylinearsolve(X, RHS);
0137 
0138 
0139         % Compute Lambdasoldot
0140         temp = nan(size(egrad));;
0141         for kk = 1:k
0142             Xk = X(:,:,kk);
0143             etak = eta(:,:,kk);
0144 
0145             temp(:,:,kk) = 2*symm(etak*Lambdasol*Xk);
0146         end
0147         RHSdot = - sum(egradscaleddot,3) - sum(temp,3);
0148         [Lambdasoldot] = mylinearsolve(X, RHSdot);
0149 
0150 
0151         for kk = 1:k
0152             egradk = symm(egrad(:,:,kk));
0153             ehessk = symm(ehess(:,:,kk));
0154             Xk = X(:,:,kk);
0155             etak = eta(:,:,kk);
0156 
0157             % Directional derivatives of the Riemannian gradient
0158             % Note that Riemannian grdient is Xk*egradk*Xk + Xk*Lambdasol*Xk.
0159             % rhessk = Xk*(ehessk + Lambdasoldot)*Xk + 2*symm(etak*(egradk + Lambdasol)*Xk);
0160             % rhessk = rhessk - symm(etak*(egradk + Lambdasol)*Xk);
0161             rhessk = Xk*(ehessk + Lambdasoldot)*Xk + symm(etak*(egradk + Lambdasol)*Xk);
0162 
0163             Hess(:,:,kk) = rhessk;
0164         end
0165         
0166         % Project onto the set X1dot + X2dot + ... = 0.
0167         Hess = M.proj(X, Hess);
0168     end
0169     
0170     
0171     % Project onto the set X1dot + X2dot + ... = 0.
0172     M.proj = @innerprojection;
0173     function zeta = innerprojection(X, eta)
0174         % Project eta onto the set X1dot + X2dot + ... = 0.
0175         % Projected eta = eta + Xk*Lambdasol* Xk.
0176 
0177         RHS = - sum(eta,3);
0178 
0179         [Lambdasol] = mylinearsolve(X, RHS); % It solves sum Xi Lambdasol Xi = RHS.
0180 
0181         zeta = zeros(size(eta));
0182         for jj = 1 : k
0183             zeta(:,:,jj) = eta(:,:,jj) + X(:,:,jj)*Lambdasol*X(:,:,jj);
0184         end
0185         
0186         %   % Debug
0187         %   neta = eta - zeta; % Normal vector
0188         %   innerproduct(X, zeta, neta) % This should be zero
0189     end
0190 
0191     function [Lambdasol] = mylinearsolve(X, RHS)
0192         % Solve the linear system
0193         % sum Xi Lambdasol Xi = RHS.
0194         tol_omegax_pcg = 1e-8;
0195         max_iterations_pcg = 100;
0196         
0197         % vectorize RHS
0198         rhs = RHS(:);
0199         
0200         % Call PCG
0201         [lambdasol, ~, ~, ~] = pcg(@compute_matrix_system, rhs, tol_omegax_pcg, max_iterations_pcg);
0202 
0203         % Devectorize lambdasol.
0204         Lambdasol = symm(reshape(lambdasol, [n n]));
0205         
0206         function lhslambda = compute_matrix_system(lambda)
0207             Lambda = symm(reshape(lambda, [n n]));
0208             lhsLambda = zeros(n,n);
0209             for kk = 1:k
0210                 lhsLambda = lhsLambda + X(:,:,kk)*Lambda*X(:,:,kk);
0211             end
0212             lhslambda = lhsLambda(:);
0213         end
0214 
0215         % % Debug
0216         % lhsLambda = zeros(n,n);
0217         % for kk = 1:k
0218         %     lhsLambda = lhsLambda + (X(:,:,kk)*Lambdasol*X(:,:,kk));
0219         % end
0220         % norm(lhsLambda - RHS, 'fro')/norm(RHS,'fro')
0221         % keyboard;
0222     end
0223     
0224     M.tangent = M.proj;
0225     M.tangent2ambient = @(X, eta) eta;
0226     
0227     myeps = eps;
0228     
0229     M.retr = @retraction;
0230     function Y = retraction(X, eta, t)
0231         if nargin < 3
0232             teta = eta;
0233         else
0234             teta = t*eta;
0235         end
0236         % The symm() call is mathematically unnecessary but numerically
0237         % necessary.
0238         Y = nan(size(X));
0239         for kk=1:k
0240             % Second-order approximation of expm
0241             Y(:,:,kk) = symm(X(:,:,kk) + teta(:,:,kk) + .5*teta(:,:,kk)*((X(:,:,kk) + myeps*eye(n) )\teta(:,:,kk)));
0242             
0243             % expm
0244             %Y(:,:,kk) = symm(X(:,:,kk)*real(expm((X(:,:,kk)  + myeps*eye(n))\(teta(:,:,kk)))));
0245         end
0246         Ysum = sum(Y, 3);
0247         Ysumsqrt = sqrtm(Ysum);
0248         for kk=1:kk
0249             Y(:,:,kk) = symm((Ysumsqrt\Y(:,:,kk))/Ysumsqrt);
0250         end
0251         %   % Debug
0252         %   norm(sum(Y, 3) - eye(n), 'fro') % This should be zero
0253     end
0254     
0255     M.exp = @exponential;
0256     function Y = exponential(X, eta, t)
0257         if nargin < 3
0258             t = 1.0;
0259         end
0260         Y = retraction(X, eta, t);
0261         warning('manopt:sympositivedefinitesimplexfactory:exp', ...
0262             ['Exponential for the Simplex' ...
0263             'manifold not implemented yet. Used retraction instead.']);
0264     end
0265     
0266     M.hash = @(X) ['z' hashmd5(X(:))];
0267     
0268     % Generate a random symmetric positive definite matrix following a
0269     % certain distribution. The particular choice of a distribution is of
0270     % course arbitrary, and specific applications might require different
0271     % ones.
0272     M.rand = @random;
0273     function X = random()
0274         X = nan(n,n,k);
0275         for kk = 1:k
0276             D = diag(1+rand(n, 1));
0277             [Q, R] = qr(randn(n)); %#ok
0278             X(:,:,kk) = Q*D*Q';
0279         end
0280         Xsum = sum(X, 3);
0281         Xsumsqrt = sqrtm(Xsum);
0282         for kk = 1 : k
0283             X(:,:,kk) = symm((Xsumsqrt\X(:,:,kk))/Xsumsqrt);
0284         end
0285     end
0286     
0287     % Generate a uniformly random unit-norm tangent vector at X.
0288     M.randvec = @randomvec;
0289     function eta = randomvec(X)
0290         eta = nan(size(X));
0291         for kk = 1:k
0292             eta(:,:,kk) = symm(randn(n));
0293         end
0294         eta = M.proj(X, eta);
0295         nrm = M.norm(X, eta);
0296         eta = eta / nrm;
0297     end
0298     
0299     M.lincomb = @matrixlincomb;
0300     
0301     M.zerovec = @(X) zeros(n,n,k);
0302     
0303     % Poor man's vector transport: exploit the fact that all tangent spaces
0304     % are the set of symmetric matrices, so that the identity is a sort of
0305     % vector transport. It may perform poorly if the origin and target (X1
0306     % and X2) are far apart though. This should not be the case for typical
0307     % optimization algorithms, which perform small steps.
0308     M.transp = @(X1, X2, eta) M.proj(X2, eta);
0309     
0310     % vec and mat are not isometries, because of the unusual inner metric.
0311     M.vec = @(X, U) U(:);
0312     M.mat = @(X, u) reshape(u, n, n, k);
0313     M.vecmatareisometries = @() false;
0314     
0315 end
sympositivedefinitesimplexfactory

PURPOSE

SYNOPSIS

DESCRIPTION

CROSS-REFERENCE INFORMATION

SUBFUNCTIONS

SOURCE CODE