function [w, h] = RealPLSA(v, r, ep) % Inputs: v - m-by-n data matrix % r - number of desired components % ep - number of iterations % Outputs: w - m-by-r matrix of basis components % h - r-by-n mixture weight matrix % % Madhu Shashanka, , 2008 % % Step 1: convert n-dimensional data to points in the positive orthant in % (n+1)-dimensional space, on the n-simplex hyperplane. % [m, n] = size(v); idxs = [m+1]; i = m+1; tc = []; while (i~=1) idxs = [idxs floor(i/2)]; i = floor(i/2); end for i=idxs(end-1:-1:1) tcs2 = size(tc, 2); tn = [tc*[eye(tcs2) zeros(tcs2, tcs2)]; tc*[zeros(tcs2, tcs2) eye(tcs2)]]; tn = [tn [ones(floor(i/2), 1); -ones(floor(i/2), 1)]]; if rem(i, 2) tn = [tn ones(i-1, 1)]; tn = [tn; [zeros(1, i-2) -(i-1)]]; end tc = tn; end clear tn; tc = tc ./ repmat( sqrt(sum(tc.^2)), m+1, 1); % transform the data into (n+1)-dimensions nv = (v'*tc')'; % make sure all the entries are positive mnv = min(nv(:)); nv = nv-mnv; % scale the data so that vectors sum to 1 % (sums of all vectors should be the same) msc = sum(nv(:, 1)); nv = nv ./ msc; % Data now ready for PLSA % r = no. of factors, ep = num iters [wn, hn] = plsa(nv, r, ep); wv = wn*msc; wv = wv+mnv; w = (wv'*tc)'; h = hn; % w*h should be the approximation of original data v function [w, h] = plsa( v, r, ep) % Get sizes [m,n]=size(v); w = rand( m, r); w = w ./ repmat( sum( w, 1), m, 1); h = rand( r, n); h = h ./ repmat( sum( h, 1), r, 1); for e = 1:ep nw = w .* ((v ./ (w *h +eps))*h'); h = h .* (w'*(v ./ (w *h +eps))); w = nw ./ repmat( sum( nw, 1), m, 1); h = h ./ repmat( sum( h), r, 1); end