Description of low_rank_tensor

0001 function low_rank_tensor_completion()
0002 % Given partial observation of a low rank tensor, attempts to complete it.
0003 %
0004 % function low_rank_tensor_completion()
0005 %
0006 % This example demonstrates how to use the geometry factory for the
0007 % quotient manifold of fixed-rank tensors,
0008 % fixedrankfactory_tucker_preconditioned.
0009 %
0010 % This geometry is described in the technical report
0011 % "Riemannian preconditioning for tensor completion"
0012 % Hiroyuki Kasai and Bamdev Mishra, arXiv:1506.02159, 2015.
0013 %
0014 % This can be a starting point for many optimization problems of the form:
0015 %
0016 % minimize f(X) such that rank(X) = [r1 r2 r3], size(X) = [n1, n2, n3].
0017 %
0018 % Input:  None. This example file generates random data.
0019 %
0020 % Output: None.
0021 %
0022 % Please cite the Manopt paper as well as the research paper:
0023 %     @Techreport{kasai2015,
0024 %       Title   = {{R}iemannian preconditioning for tensor completion},
0025 %       Author  = {Kasai, H. and Mishra, B.},
0026 %       Journal = {Arxiv preprint arXiv:1506.02159},
0027 %       Year    = {2015}
0028 %     }
0029 
0030 % This file is part of Manopt and is copyrighted. See the license file.
0031 %
0032 % Main authors: Hiroyuki Kasai and Bamdev Mishra, June 16, 2015.
0033 % Contributors:
0034 %
0035 % Change log:
0036 %
0037 %   Xiaowen Jiang Aug. 20, 2021
0038 %       Added AD to compute the egrad and the ehess
0039     
0040 
0041     % Random data generation with pseudo-random numbers from a
0042     % uniform distribution on [0, 1].
0043     % First, choose the size of the problem.
0044     % We will complete a tensor of size n1-by-n2-by-n3 of rank (r1, r2, r3):
0045     n1 = 70;
0046     n2 = 60;
0047     n3 = 50;
0048     r1 = 3;
0049     r2 = 4;
0050     r3 = 5;
0051     tensor_dims = [n1 n2 n3];
0052     core_dims = [r1 r2 r3];
0053     total_entries = n1*n2*n3;
0054     
0055     % Generate a random tensor A of size n1-by-n2-by-n3 of rank (r1, r2, r3).
0056     [U1,R1] = qr(rand(n1, r1), 0);
0057     [U2,R2] = qr(rand(n2, r2), 0);
0058     [U3,R3] = qr(rand(n3, r3), 0);
0059 
0060     Z.U1 = R1;
0061     Z.U2 = R2;
0062     Z.U3 = R3;   
0063     Z.G = rand( core_dims );
0064     Core = tucker2multiarray(Z); % Converts tucker format tensor to full tensor.
0065 
0066     Y.U1 = U1;
0067     Y.U2 = U2;
0068     Y.U3 = U3;
0069     Y.G = Core;
0070     A = tucker2multiarray(Y);       
0071     
0072     % Generate a random mask P for observed entries: P(i, j, k) = 1 if the entry
0073     % (i, j, k) of A is observed, and 0 otherwise.
0074     % Observation ratio
0075     fraction = 0.1; % Fraction of known entries.
0076     nr = round(fraction * total_entries);
0077     ind = randperm(total_entries);
0078     ind = ind(1 : nr);
0079     P = false(tensor_dims);
0080     P(ind) = true;    
0081     % Hence, we know the nonzero entries in PA:
0082     PA = P.*A;  
0083     
0084 
0085     
0086     
0087     % Pick the manifold of tensors of size n1-by-n2-by-n3 of rank (r1, r2, r3).
0088     problem.M = fixedrankfactory_tucker_preconditioned(tensor_dims, core_dims);
0089     
0090     
0091     
0092     
0093     % Define the problem cost function. The input X is a structure with
0094     % fields U1, U2, U3, G representing a rank (r1,r2,r3) tensor.
0095     % f(X) = 1/2 * || P.*(X - A) ||^2
0096     problem.cost = @cost;
0097     function f = cost(X)
0098         Xmultiarray = tucker2multiarray(X);
0099         Diffmultiarray = P.*Xmultiarray - PA;
0100         Diffmultiarray_flat = reshape(Diffmultiarray, n1, n2*n3);
0101         f = .5*norm(Diffmultiarray_flat , 'fro')^2;
0102     end
0103 
0104 
0105     
0106     
0107     % Define the Euclidean gradient of the cost function, that is, the
0108     % gradient of f(X) seen as a standard function of X.
0109     % nabla f(X) = P.*(X-A)
0110     % We only need to give the Euclidean gradient. Manopt converts it
0111     % internally to the Riemannian counterpart.
0112     problem.egrad =  @egrad;
0113     function [g] = egrad(X)
0114         Xmultiarray = tucker2multiarray(X);
0115         Smultiarray = P.*Xmultiarray - PA;     
0116 
0117         % BM: computation of S, S1, S2, and S3
0118         S1multiarray = reshape(Smultiarray, [n1, n2*n3]);
0119         S2multiarray = reshape(permute(Smultiarray, [2 1 3]),[n2, n1*n3]);
0120         S3multiarray = reshape(permute(Smultiarray, [3 1 2]),[n3, n1*n2]);
0121 
0122         g.U1 = double(S1multiarray) * kron(X.U3, X.U2) * reshape(X.G, r1, r2*r3)';
0123         g.U2 = double(S2multiarray) * kron(X.U3, X.U1) * reshape(permute(X.G, [2 1 3]), r2, r1*r3)';
0124         g.U3 = double(S3multiarray) * kron(X.U2, X.U1) * reshape(permute(X.G, [3 1 2]), r3, r1*r2)';
0125         g.G = reshape(X.U1' * reshape(double(Smultiarray),n1,n2*n3) * kron(X.U3', X.U2')', r1, r2, r3);  
0126     end
0127     
0128     
0129     
0130     
0131     
0132     % Define the Euclidean Hessian of the cost at X, along eta, where eta is
0133     % represented as a tangent vector: a structure with fields U1, U2, U3, G.
0134     % This is the directional derivative of nabla f(X) at X along Xdot:
0135     % nabla^2 f(X)[Xdot] = P.*Xdot
0136     % We only need to give the Euclidean Hessian. Manopt converts it
0137     % internally to the Riemannian counterpart.
0138     problem.ehess = @ehess;
0139     function [Hess] = ehess(X, eta)
0140 
0141         % Computing S, and its unfolding matrices, S1, S2, and S3.
0142         Xmultiarray = tucker2multiarray(X);
0143         S = P.*Xmultiarray - PA;     
0144         S1 = reshape(S, [n1, n2*n3]);
0145         S2 = reshape(permute(S, [2 1 3]),[n2, n1*n3]);
0146         S3 = reshape(permute(S, [3 1 2]),[n3, n1*n2]);            
0147 
0148         % Computing Sdot, S1dot, S2dot and S3dot.
0149         XG = X.G;
0150         etaG = eta.G;
0151         G1 = zeros(4*size(X.G));
0152         G1(1:r1, 1:r2, 1:r3) = XG;
0153         G1(r1 + 1 : 2*r1, r2 + 1 : 2*r2, r3 + 1 : 2*r3) = XG;
0154         G1(2*r1 + 1 : 3*r1, 2*r2 + 1 : 3*r2, 2*r3 + 1 : 3*r3) = XG;
0155         G1(3*r1 + 1 : 4*r1, 3*r2 + 1 : 4*r2, 3*r3 + 1 : 4*r3) = etaG;      
0156              
0157         X1.G = G1;
0158         X1.U1 = [eta.U1 X.U1 X.U1 X.U1];
0159         X1.U2 = [X.U2 eta.U2 X.U2 X.U2];
0160         X1.U3 = [X.U3 X.U3 eta.U3 X.U3];
0161         
0162         X1multiarray = tucker2multiarray(X1);
0163         Sdot = P.*X1multiarray;
0164         S1dot = reshape(Sdot, [n1, n2*n3]);
0165         S2dot = reshape(permute(Sdot, [2 1 3]),[n2, n1*n3]);
0166         S3dot = reshape(permute(Sdot, [3 1 2]),[n3, n1*n2]);
0167         
0168         % Computing unfolding matrices of X.G and eta.G.
0169         X_G1 = reshape(double(X.G),r1, r2*r3);
0170         X_G2 = reshape(permute(double(X.G),[2 1 3]),r2, r1*r3);
0171         X_G3 = reshape(permute(double(X.G),[3 1 2]),r3, r1*r2);
0172         eta_G1 = reshape(double(eta.G),r1, r2*r3);
0173         eta_G2 = reshape(permute(double(eta.G),[2 1 3]),r2, r1*r3);
0174         eta_G3 = reshape(permute(double(eta.G),[3 1 2]),r3, r1*r2);             
0175 
0176         % Computing Hessians for U1, U2 and U3.
0177         T1 = double(S1dot) * (kron(X.U3,X.U2)*X_G1') ...
0178             + double(S1) * (kron(eta.U3,X.U2)*X_G1' ...
0179             + kron(X.U3,eta.U2)*X_G1' + kron(X.U3,X.U2)*eta_G1');
0180         
0181         T2 = double(S2dot) * (kron(X.U3,X.U1)*X_G2') ...
0182             + double(S2) * (kron(eta.U3,X.U1)*X_G2' ...
0183             + kron(X.U3,eta.U1)*X_G2' + kron(X.U3,X.U1)*eta_G2');
0184 
0185         T3 = double(S3dot) * (kron(X.U2,X.U1)*X_G3') ...
0186             + double(S3) * (kron(eta.U2,X.U1)*X_G3' ...
0187             + kron(X.U2,eta.U1)*X_G3' + kron(X.U2,X.U1)*eta_G3');
0188         
0189         Hess.U1 = T1;
0190         Hess.U2 = T2;
0191         Hess.U3 = T3;  
0192         
0193         % Computing Hessian for G
0194         N.U1 = X.U1';
0195         N.U2 = X.U2';
0196         N.U3 = X.U3';
0197         N.G = Sdot;
0198         M0array = tucker2multiarray(N);
0199         
0200         M1.U1 = eta.U1';
0201         M1.U2 = X.U2';
0202         M1.U3 = X.U3';
0203         M1.G = S;    
0204         M1array = tucker2multiarray(M1);
0205         
0206         M2.U1 = X.U1';
0207         M2.U2 = eta.U2';
0208         M2.U3 = X.U3';
0209         M2.G = S;    
0210         M2array = tucker2multiarray(M2); 
0211         
0212         M3.U1 = X.U1';
0213         M3.U2 = X.U2';
0214         M3.U3 = eta.U3';
0215         M3.G = S;    
0216         M3array = tucker2multiarray(M3);   
0217         
0218         Hess.G = M0array + M1array + M2array + M3array; 
0219     end
0220     
0221     % An alternative way to compute the egrad and the ehess is to use
0222     % automatic differentiation provided in the deep learning toolbox (slower)
0223     % Notice that the function norm is not supported for AD so far.
0224     % Replace norm(...,'fro')^2 with cnormsqfro described in the file
0225     % manoptADhelp.m. Also, operations between sparse matrices and dlarrays
0226     % are not supported for now. convert PA and P into full arrays.
0227     % problem.cost = @cost_AD;
0228     % PA_full = full(PA);
0229     % P_full = full(P);
0230     %    function f = cost_AD(X)
0231     %        Xmultiarray = tucker2multiarray(X);
0232     %        Diffmultiarray = P_full.*Xmultiarray - PA_full;
0233     %        Diffmultiarray_flat = reshape(Diffmultiarray, n1, n2*n3);
0234     %        f = .5*cnormsqfro(Diffmultiarray_flat);
0235     %    end
0236     % call manoptAD to prepare AD for the problem structure
0237     % problem = manoptAD(problem);
0238  
0239 
0240     % Check consistency of the gradient and the Hessian. Useful if you
0241     % adapt this example for a new cost function and you would like to make
0242     % sure there is no mistake.
0243     %
0244     % Notice that the checkhessian test fails: the slope is not right.
0245     % This is because the retraction is not second-order compatible with
0246     % the Riemannian exponential on this manifold, making
0247     % the checkhessian tool unusable. The Hessian is correct though.
0248     % % warning('off', 'manopt:fixedrankfactory_tucker_preconditioned:exp');
0249     % % checkgradient(problem);
0250     % % drawnow;
0251     % % pause;
0252     % % checkhessian(problem);
0253     % % drawnow;
0254     % % pause;
0255     
0256 
0257     
0258     % options
0259     options.maxiter = 200;
0260     options.maxinner = 30;
0261     options.maxtime = inf;
0262     options.tolgradnorm = 1e-5;     
0263 
0264 
0265     
0266     
0267     % Minimize the cost function using Riemannian trust-regions
0268     Xtr = trustregions(problem, [], options);
0269 
0270     
0271     
0272     % The reconstructed tensor is X, represented as a structure with fields
0273     % U1, U2, U3 and G.
0274     Xtrmultiarray = tucker2multiarray(Xtr);
0275     fprintf('||X-A||_F = %g\n', norm(reshape(Xtrmultiarray - A, [n1 n2*n3]), 'fro'));   
0276     
0277    
0278     
0279     
0280     % Alternatively, we could decide to use a solver such as steepestdescent (SD)
0281     % or conjugategradient (CG). These solvers need to solve a
0282     % line-search problem at each iteration. Standard line searches in
0283     % Manopt have generic purpose systems to do this. But for the problem
0284     % at hand, we could exploit the least-squares structure to compute an
0285     % approximate stepsize for the line-search problem. The approximation
0286     % is obtained by linearizing the nonlinear manifold locally and further
0287     % approximating it with a degree 2 polynomial approximation.
0288     % The specific derivation is in the paper referenced above.
0289     
0290     problem.linesearch = @linesearch_helper;
0291     function tmin = linesearch_helper(X, eta)
0292         
0293         % term0
0294         Xmultiarray = tucker2multiarray(X);
0295         residual_mat = P.*Xmultiarray - PA;     
0296         residual_vec = residual_mat(:);
0297         term0 = residual_vec;
0298         
0299         % term1
0300         XG = X.G;
0301         etaG = eta.G;        
0302         G1 = zeros(4*size(X.G));
0303         G1(1:r1, 1:r2, 1:r3) = XG;
0304         G1(r1 + 1 : 2*r1, r2 + 1 : 2*r2, r3 + 1 : 2*r3) = XG;
0305         G1(2*r1 + 1 : 3*r1, 2*r2 + 1 : 3*r2, 2*r3 + 1 : 3*r3) = XG;
0306         G1(3*r1 + 1 : 4*r1, 3*r2 + 1 : 4*r2, 3*r3 + 1 : 4*r3) = etaG;  
0307 
0308         X1.U1 = [eta.U1 X.U1 X.U1 X.U1];
0309         X1.U2 = [X.U2 eta.U2 X.U2 X.U2];
0310         X1.U3 = [X.U3 X.U3 eta.U3 X.U3];
0311         X1.G = G1;
0312         
0313         X1multiarray = tucker2multiarray(X1);
0314         term1_mat = P.*X1multiarray;    
0315         term1 = term1_mat(:);
0316         
0317         % tmin is the solution to the problem argmin a2*t^2 + a1*t, where
0318         % the coefficients a1 and a2 are shown below.
0319         a2 = (term1'*term1);
0320         a1 = 2*(term1'*term0);
0321         tmin = - 0.5*(a1 / a2);
0322         
0323     end    
0324 
0325     % Notice that for this solver, the Hessian is not needed.
0326     [Xcg, costcg, infocg] = conjugategradient(problem, [], options);
0327     
0328     fprintf('Take a look at the options that CG used:\n');
0329     disp(options);
0330     fprintf('And see how many trials were made at each line search call:\n');
0331     info_ls = [infocg.linesearch];
0332     disp([info_ls.costevals]); 
0333     
0334     
0335      
0336     fprintf('Try it again without the linesearch helper.\n');
0337     
0338     % Remove the linesearch helper from the problem structure.
0339     problem = rmfield(problem, 'linesearch');
0340     
0341     [Xcg, xcost, info, options] = conjugategradient(problem, []); %#ok<ASGLU>
0342     
0343     fprintf('Take a look at the options that CG used:\n');
0344     disp(options);
0345     fprintf('And see how many trials were made at each line search call:\n');
0346     info_ls = [info.linesearch];
0347     disp([info_ls.costevals]);
0348     
0349     
0350     
0351 end
low_rank_tensor_completion

PURPOSE

SYNOPSIS

DESCRIPTION

CROSS-REFERENCE INFORMATION

SUBFUNCTIONS

SOURCE CODE