% Generalized Sparse Coding Neural Gas for learning data representations
% and performing overcomplete blind source separation (BSS).
%
% C=mscng(X, M, K, epochs, delta, alpha0, alphaf, lambda0, lambdaf)
%
%Input:
% X : training data (one sample per column) (improve performance by removing
%     mean)
% M : number of basis elements / number of underlying sources
% K : number of non-zero coefficients / number of active sources per
%     mixture (this is set to the number of dimensions of the training
%     samples in case of BSS)
% epochs : number of training epochs (number of learning iterations
%          is epochs*number of training samples)
% delta  : noise level (set delta >= 0 for BSS, 
%                      default is -1, i.e. learning of data representation using a fixed K value)
% alpha0 : initial learning rate
% alphaf : final learning rate 
% lambda0 : initial neighbourhood-size
% lambdaf : final neighbourhood-size
%
%Output:
% C : learned basis/mixing matrix
%
% Copyright (C) 2009 Kai Labusch
%
% This program is free software; you can redistribute it and/or
% modify it under the terms of the GNU General Public License
% as published by the Free Software Foundation; either version 2
% of the License, or (at your option) any later version.
%
function C=mscng(X, M, K, epochs, delta, alpha0, alphaf, lambda0, lambdaf)

N = size(X,1);

if ~exist('delta', 'var'),
    delta = -1; % we learn a data representation by default
else
    K = N; % otherwise we are BSS mode and set the maximum number of non-zero
           % elements of the coefficients to the maximum, i.e. which is the
           % dimensionality of the given mixtures
end

if ~exist('alpha0','var'),
    alpha0 = 0.5; % default initial learning rate
end

if ~exist('alphaf','var'),
    alphaf = 0.005; % default final learning rate
end

if ~exist('lambda0','var'),
    lambda0 = M/2; % default initial neighbourhood-size
end

if ~exist('lambdaf','var'),
    lambdaf = 0.01; % default final neighbourhood-size
end

C = rand(N,M) - 0.5;
t_max = epochs*size(X,2);

for t=0:t_max
    
    %select random sample 
    x = X(:, floor(rand*(size(X,2)-1))+1);
    
    %set c_1,..., c_M to unit length
    C = C./repmat(sqrt(sum(C.^2,1)), N, 1);
    
    %compute current size of neighbourhood
    lambda_t = lambda0*(lambdaf/lambda0)^(t/t_max);
    
    %compute current learning rate
    alpha_t  = alpha0*(alphaf/alpha0)^(t/t_max);
    
    r=x; R=C; candidates=1:M;
    
    h=0;

    while (h <= K-1) && (norm(r) > delta)
        % determine sequence of distances
        y = r'*R;
        
        % sort distances
        [y_sorted, inds]= sort(-(y.^2));
        
        %obtain ranking
        ranking = zeros(1,M-h);
        ranking(inds) = 0:(M-h-1);             
        neighbourhood = exp(-ranking/lambda_t);
        
        %compute current update for C and R
        delta_CR = (repmat(r, 1, M-h) - R.*repmat(y, N,1)).*repmat(alpha_t*neighbourhood.*y, N, 1);
        
        %update C
        C(:,candidates) = C(:,candidates) + delta_CR;
        
        %update R
        R = R + delta_CR;
        
        % set norm of remaining columns to 1
        R = R ./ repmat(sqrt(sum(R.^2,1)), N, 1);
        
        % find current winner
        [tmp, l_win] = max((R'*r).^2);
        
        winner = R(:,l_win);
        
        % remove projection of residual to winner from current residual
        r = r - (winner'*r)*winner;
        
        % remove projection of remaining basis elements to winner from
        % current basis
        R = R - repmat(winner, 1, M-h).*repmat(winner'*R, N, 1);
        
        % remove winner from set of possible candidates,
        % this corresponds to U = [U , l_win]
        R = R(:, [1:l_win-1, l_win+1:end]);
        
        candidates = candidates([1:l_win-1, l_win+1:end]);
        
        % set norm of remaining columns to 1
        R = R./repmat(sqrt(sum(R.^2,1)), N, 1);
        
        h=h+1;
    end
end

return;