Home > manopt > manifolds > multinomial > multinomialfactory.m

multinomialfactory

PURPOSE ^

Manifold of n-by-m column-stochastic matrices with positive entries.

SYNOPSIS ^

function M = multinomialfactory(n, m)

DESCRIPTION ^

 Manifold of n-by-m column-stochastic matrices with positive entries.

 function M = multinomialfactory(n)
 function M = multinomialfactory(n, m)

 The returned structure M is a Manopt manifold structure to optimize over
 the set of n-by-m matrices with (strictly) positive entries and such that
 the entries of each column sum to one. By default, m = 1, which
 corresponds to the relative interior of the simplex (discrete probability
 distributions with nonzero probabilities.)

 The metric imposed on the manifold is the Fisher metric such that 
 the set of n-by-m column-stochastic matrices (a.k.a. the multinomial
 manifold) is a Riemannian submanifold of the space of n-by-m matrices.
 Also it should be noted that the retraction operation that we define 
 is first order and as such the checkhessian tool cannot verify 
 the slope correctly at non-critical points.
             
 The file is based on developments in the research paper
 Y. Sun, J. Gao, X. Hong, B. Mishra, and B. Yin,
 "Heterogeneous tensor decomposition for clustering via manifold
 optimization", arXiv:1504.01777, 2015.

 Link to the paper: http://arxiv.org/abs/1504.01777.

 The exponential and logarithmic map and the distance appear in:
 F. Astrom, S. Petra, B. Schmitzer, C. Schnorr,
 "Image Labeling by Assignment",
 Journal of Mathematical Imaging and Vision, 58(2), pp. 221?238, 2017.
 doi: 10.1007/s10851-016-0702-4
 arxiv: https://arxiv.org/abs/1603.05285

 Please cite the Manopt paper as well as the research paper:
 @Article{sun2015multinomial,
   author  = {Y. Sun and J. Gao and X. Hong and B. Mishra and B. Yin},
   title   = {Heterogeneous Tensor Decomposition for Clustering via Manifold Optimization},
   journal = {IEEE Transactions on Pattern Analysis and Machine Intelligence},
   year    = {2016},
   volume  = {38},
   number  = {3},
   pages   = {476--489},
   doi     = {10.1109/TPAMI.2015.2465901}
 }

CROSS-REFERENCE INFORMATION ^

This function calls: This function is called by:

SUBFUNCTIONS ^

SOURCE CODE ^

0001 function M = multinomialfactory(n, m)
0002 % Manifold of n-by-m column-stochastic matrices with positive entries.
0003 %
0004 % function M = multinomialfactory(n)
0005 % function M = multinomialfactory(n, m)
0006 %
0007 % The returned structure M is a Manopt manifold structure to optimize over
0008 % the set of n-by-m matrices with (strictly) positive entries and such that
0009 % the entries of each column sum to one. By default, m = 1, which
0010 % corresponds to the relative interior of the simplex (discrete probability
0011 % distributions with nonzero probabilities.)
0012 %
0013 % The metric imposed on the manifold is the Fisher metric such that
0014 % the set of n-by-m column-stochastic matrices (a.k.a. the multinomial
0015 % manifold) is a Riemannian submanifold of the space of n-by-m matrices.
0016 % Also it should be noted that the retraction operation that we define
0017 % is first order and as such the checkhessian tool cannot verify
0018 % the slope correctly at non-critical points.
0019 %
0020 % The file is based on developments in the research paper
0021 % Y. Sun, J. Gao, X. Hong, B. Mishra, and B. Yin,
0022 % "Heterogeneous tensor decomposition for clustering via manifold
0023 % optimization", arXiv:1504.01777, 2015.
0024 %
0025 % Link to the paper: http://arxiv.org/abs/1504.01777.
0026 %
0027 % The exponential and logarithmic map and the distance appear in:
0028 % F. Astrom, S. Petra, B. Schmitzer, C. Schnorr,
0029 % "Image Labeling by Assignment",
0030 % Journal of Mathematical Imaging and Vision, 58(2), pp. 221?238, 2017.
0031 % doi: 10.1007/s10851-016-0702-4
0032 % arxiv: https://arxiv.org/abs/1603.05285
0033 %
0034 % Please cite the Manopt paper as well as the research paper:
0035 % @Article{sun2015multinomial,
0036 %   author  = {Y. Sun and J. Gao and X. Hong and B. Mishra and B. Yin},
0037 %   title   = {Heterogeneous Tensor Decomposition for Clustering via Manifold Optimization},
0038 %   journal = {IEEE Transactions on Pattern Analysis and Machine Intelligence},
0039 %   year    = {2016},
0040 %   volume  = {38},
0041 %   number  = {3},
0042 %   pages   = {476--489},
0043 %   doi     = {10.1109/TPAMI.2015.2465901}
0044 % }
0045 
0046 % This file is part of Manopt: www.manopt.org.
0047 % Original author: Bamdev Mishra, April 06, 2015.
0048 % Contributors: Ronny Bergmann
0049 % Change log:
0050 %
0051 %    Sep. 6, 2018 (NB):
0052 %        Removed M.exp() as it was not implemented.
0053 %
0054 %    Apr. 12, 2020 (RB):
0055 %        Adds exp, log, dist.
0056 
0057     if ~exist('m', 'var') || isempty(m)
0058         m = 1;
0059     end
0060 
0061     M.name = @() sprintf('%dx%d column-stochastic matrices with positive entries', n, m);
0062     
0063     M.dim = @() (n-1)*m;
0064     
0065     % We impose the Fisher metric.
0066     M.inner = @iproduct;
0067     function ip = iproduct(X, eta, zeta)
0068         ip = sum((eta(:).*zeta(:))./X(:));
0069     end
0070     
0071     M.norm = @(X, eta) sqrt(M.inner(X, eta, eta));
0072     
0073     M.dist = @(X, Y) norm(2*acos(sum(sqrt(X.*Y), 1)));
0074     
0075     M.typicaldist = @() m*pi/2; % This is an approximation.
0076     
0077     % Column vector of ones of length n.
0078     % TODO: eliminate e by using bsxfun
0079     e = ones(n, 1);
0080     
0081     M.exp = @exponential;
0082     function Y = exponential(X, U, t)
0083         if nargin == 3
0084             tU = t*U;
0085         else
0086             tU = U;
0087         end
0088         Y = zeros(size(X));
0089         for mm = 1 : m
0090             x = X(:, mm);
0091             s = sqrt(x);
0092             us = tU(:, mm) ./ s ./ 2;
0093             un = norm(us);
0094             if un < eps
0095                 Y(:, mm) = X(:, mm);
0096             else
0097                 Y(:, mm) = (cos(un).*s + sin(un)/un.*us).^2;
0098             end
0099         end
0100     end
0101 
0102     M.log = @logarithm;
0103     function U = logarithm(X,Y)
0104         a = sqrt(X.*Y);
0105         s = sum(a, 1);
0106         U = 2*acos(s) ./ sqrt(1-s.^2) .* (a - s.*X);
0107     end
0108     
0109     M.egrad2rgrad = @egrad2rgrad;
0110     function rgrad = egrad2rgrad(X, egrad)
0111         Xegrad = X.*egrad;
0112         lambda = -sum(Xegrad, 1); % Row vector of length m.
0113         rgrad = Xegrad + (e*lambda).*X; % This is in the tangent space.
0114     end
0115     
0116     M.ehess2rhess = @ehess2rhess;
0117     function rhess = ehess2rhess(X, egrad, ehess, eta)
0118         
0119         % Riemannian gradient computation.
0120         % lambda is a row vector of length m.
0121         Xegrad = X.*egrad;
0122         lambda = - sum(Xegrad, 1);
0123         rgrad =  Xegrad + (e*lambda).*X;
0124         
0125         % Directional derivative of the Riemannian gradient.
0126         % lambdadot is a row vector of length m.
0127         Xehess = X.*ehess;
0128         etaegrad = eta.*egrad;
0129         lambdadot = -sum(etaegrad, 1) - sum(Xehess, 1); 
0130         rgraddot = etaegrad + Xehess + (e*lambdadot).*X + (e*lambda).*eta;
0131         
0132         % Correction term because of the non-constant metric that we
0133         % impose. The computation of the correction term follows the use of
0134         % Koszul formula.
0135         correction_term = - 0.5*(eta.*rgrad)./X;
0136         rhess = rgraddot + correction_term;
0137         
0138         % Finally, projection onto the tangent space.
0139         rhess = M.proj(X, rhess);
0140     end
0141     
0142     % Projection of the vector eta in the ambient space onto the tangent
0143     % space.
0144     M.proj = @projection;
0145     function etaproj = projection(X, eta)
0146         alpha = sum(eta, 1); % Row vector of length m.
0147         etaproj = eta - (e*alpha).*X;
0148     end
0149     
0150     M.tangent = M.proj;
0151     M.tangent2ambient = @(X, eta) eta;
0152     
0153     M.retr = @retraction;
0154     function Y = retraction(X, eta, t)
0155         if nargin < 3
0156             t = 1.0;
0157         end
0158         % A first-order retraction.
0159         Y = X.*exp(t*(eta./X)); % Based on mapping for positive scalars.
0160         Y = Y./(e*(sum(Y, 1))); % Projection onto the constraint set.
0161         % For numerical reasons, so that we avoid entries going to zero:
0162         Y = max(Y, eps);
0163     end
0164     
0165     
0166     M.hash = @(X) ['z' hashmd5(X(:))];
0167     
0168     M.rand = @random;
0169     function X = random()
0170         % A random point in the ambient space.
0171         X = rand(n, m); %
0172         X = X./(e*(sum(X, 1)));
0173     end
0174     
0175     M.randvec = @randomvec;
0176     function eta = randomvec(X)
0177         % A random vector in the tangent space
0178         eta = randn(n, m);
0179         eta = M.proj(X, eta); % Projection onto the tangent space.
0180         nrm = M.norm(X, eta);
0181         eta = eta / nrm;
0182     end
0183     
0184     M.lincomb = @matrixlincomb;
0185     
0186     M.zerovec = @(X) zeros(n, m);
0187     
0188     M.transp = @(X1, X2, d) projection(X2, d);
0189     
0190     % vec and mat are not isometries, because of the scaled metric.
0191     M.vec = @(X, U) U(:);
0192     M.mat = @(X, u) reshape(u, n, m);
0193     M.vecmatareisometries = @() false;
0194 end

Generated on Tue 19-May-2020 18:46:12 by m2html © 2005