%% Script to do dimension reduction analysis on economics dataset

% need data in format Cereals (Cnorm), Food (Fnorm), Non-food (Nnorm),
% Frequency bins (fx) and Rounds (rnd) where there are multiple rounds per
% year. Mean income levels should be in format Rounds (rnds), Year number
% (years) and Mean Income Level (mean_income). Dataset is
% 'Indian_dataset.mat' for first analysis.

close all
clear all
uiopen;
clc

%% Step 1: find CDFs and PDFs for existing data

[PDF_C, PDF_interp_C, CDF_C, CDF_interp_C] = economics_pdf(Cnorm, fx, rnd);
[PDF_F, PDF_interp_F, CDF_F, CDF_interp_F] = economics_pdf(Fnorm, fx, rnd);
[PDF_N, PDF_interp_N, CDF_N, CDF_interp_N] = economics_pdf(Nnorm, fx, rnd);

% plot the smoothed CDF and PDF for a sample year

sample_round = 13;

figure(1)
hold on
plot(PDF_interp_C{sample_round}(1,:),PDF_interp_C{sample_round}(2,:),'--')
plot(PDF_interp_F{sample_round}(1,:),PDF_interp_F{sample_round}(2,:),'--')
plot(PDF_interp_N{sample_round}(1,:),PDF_interp_N{sample_round}(2,:),'--')
title('PDFs')
xlabel('Normalised expenditure')
ylabel('Probability')
hold off

figure(2)
hold on
plot(CDF_interp_C{sample_round}(1,:),CDF_interp_C{sample_round}(2,:),'--')
plot(CDF_interp_F{sample_round}(1,:),CDF_interp_F{sample_round}(2,:),'--')
plot(CDF_interp_N{sample_round}(1,:),CDF_interp_N{sample_round}(2,:),'--')
title('CDFs')
xlabel('Normalised expenditure')
ylabel('Cumulative Probability')
hold off

%% Step 2: Do dimension reduction for the datasets
% note this requires Iain's version of the Netlab and drtoolbox packages 
% for Matlab so I have put a check in to avoid doing this if the data 
% already exists. Once the mappings are learned they are bias-shifter to
% minimum zero as many will spread evenly around the origin by design.
% a=neuroscale; b=locally linear embedding; c=isomap; d=curvilinear
% component analysis; e=pca

if exist('Detrended_dim_red.mat','file')==0
% NeuroScale
X = [Cnorm,Fnorm,Nnorm];
net = rbf(3,length(Cnorm),1,'rlogr','neuroscale','0');
net.c = X;
options = foptions;
options(6) = 1;
net = rbftrain(net, options, X, sqrt(dist2(X,X)));
Y = rbffwd(net, X);
Y = Y + abs(min(Y));

% Locally Linear Embedding
Y2a = lle(X,1,18);
Y2a = Y2a + abs(min(Y2a));

% Isomap
Y2b = isomap(X,1,18);
Y2b = Y2b + abs(min(Y2b));

% Curvilinear Component Analysis
Y2c = cca_embed(X,1);
Y2c = Y2c + abs(min(Y2c));

% Principal Component Analysis
Y2d = pca_dr(X,1);
Y2d = Y2d + abs(min(Y2d));

else
    load('Detrended_dim_red.mat');
    Y = Y - (min(Y));
    Y2a = Y2a - (min(Y2a));
    Y2b = Y2b - (min(Y2b));
    Y2c = Y2c - (min(Y2c));
    Y2d = Y2d - (min(Y2d));
end

% calculate CDF, PDF and intepolated versions for plots
[PDF_Y, PDF_interp_Y, CDF_Y, CDF_interp_Y] = economics_pdf(Y, fx, rnd);
[PDF_Y2a, PDF_interp_Y2a, CDF_Y2a, CDF_interp_Y2a] = economics_pdf(Y2a, fx, rnd);
[PDF_Y2b, PDF_interp_Y2b, CDF_Y2b, CDF_interp_Y2b] = economics_pdf(Y2b, fx, rnd);
[PDF_Y2c, PDF_interp_Y2c, CDF_Y2c, CDF_interp_Y2c] = economics_pdf(Y2c, fx, rnd);
[PDF_Y2d, PDF_interp_Y2d, CDF_Y2d, CDF_interp_Y2d] = economics_pdf(Y2d, fx, rnd);

% add to plots
figure(1)
hold on
plot(PDF_interp_Y{sample_round}(1,:),PDF_interp_Y{sample_round}(2,:))
plot(PDF_interp_Y2a{sample_round}(1,:),PDF_interp_Y2a{sample_round}(2,:))
plot(PDF_interp_Y2b{sample_round}(1,:),PDF_interp_Y2b{sample_round}(2,:))
plot(PDF_interp_Y2c{sample_round}(1,:),PDF_interp_Y2c{sample_round}(2,:))
plot(PDF_interp_Y2d{sample_round}(1,:),PDF_interp_Y2d{sample_round}(2,:))
legend('Cereals','Food','Non-Food','NeuroScale','LLE','IsoMap','CCA','PCA')
hold off

figure(2)
hold on
plot(CDF_interp_Y{sample_round}(1,:),CDF_interp_Y{sample_round}(2,:))
plot(CDF_interp_Y2a{sample_round}(1,:),CDF_interp_Y2a{sample_round}(2,:))
plot(CDF_interp_Y2b{sample_round}(1,:),CDF_interp_Y2b{sample_round}(2,:))
plot(CDF_interp_Y2c{sample_round}(1,:),CDF_interp_Y2c{sample_round}(2,:))
plot(CDF_interp_Y2d{sample_round}(1,:),CDF_interp_Y2d{sample_round}(2,:))
legend('Cereals','Food','Non-Food','NeuroScale','LLE','IsoMap','CCA','PCA')
hold off

%% Step 3: Calculate M values for log-log plots

% for Cereal, Food and Non-Food data first

[M_C,alpha_C] = economics_polyfit(PDF_C);
disp(['Cereals Alpha = ',num2str(alpha_C)]);
[M_F,alpha_F] = economics_polyfit(PDF_F);
disp(['Food Alpha = ',num2str(alpha_F)]);
[M_N,alpha_N] = economics_polyfit(PDF_N);
disp(['Non-Food Alpha = ',num2str(alpha_N)]);

% now for dimension reduced variables

[M_Y,alpha_Y] = economics_polyfit(PDF_Y);
disp(['NeuroScale Alpha = ',num2str(alpha_Y)]);
[M_Y2a,alpha_Y2a] = economics_polyfit(PDF_Y2a);
disp(['LLE Alpha = ',num2str(alpha_Y2a)]);
[M_Y2b,alpha_Y2b] = economics_polyfit(PDF_Y2b);
disp(['IsoMap Alpha = ',num2str(alpha_Y2b)]);
[M_Y2c,alpha_Y2c] = economics_polyfit(PDF_Y2c);
disp(['CCA Alpha = ',num2str(alpha_Y2c)]);
[M_Y2d,alpha_Y2d] = economics_polyfit(PDF_Y2d);
disp(['PCA Alpha = ',num2str(alpha_Y2d)]);

%% Step 4: Solve for V(t) and K(t)

% check first for Cnorm
[V,K] = economics_vk(Cnorm,(Cnorm + Fnorm + Nnorm),rnd);
% do on Dimension reduced variables
%[V,K] = economics_vk(Y,(Cnorm + Fnorm + Nnorm),rnd);

epsilon = rand(1,1); % add a small perturbation
Nu = -V.*K./2 + epsilon;

% fit K and V functions with linear function to find bias at t=t0
%V_poly = polyfit((1:21)',V,1); %sometimes polyfit doesn't work
V_poly = [(1:length(V))',ones(length(V),1)]\V;
V_0 = V_poly(2); % bias at t = 0
%K_poly = polyfit((1:21)',K,1);
K_poly = [(1:length(K))',ones(length(K),1)]\K;
K_0 = K_poly(2); % bias at t = 0

% plot V and K against t as well as linear fit
figure(3)
plot(V,'o'); hold on
plot(V_poly(1)*(1:21)' + V_poly(2),'r')
plot(K,'ko')
plot(K_poly(1)*(1:21)' + K_poly(2),'g')
legend('V(t)','V Linear Fit','K(t)','K Linear Fit')
hold off

Nu_0 = V_0*K_0/2;

% %% Step 5: Solve PDE to calculate CD(y,t)
% 
% % solve with numerical integration of finite elements steps
% CD = economics_pde_burgers(V_poly,K_poly);
% figure(4)
% plot(CD(2,:))
% title('CD(y,t) with income y = 2 over all time')

%% Step 5: Solve PDEs for Consumption Deprivation CD(y,t) Fokker-Planck f(y,t)

C = mean_income; C_recent = C(11:end);
% polyfit doesn't seem to like this calculation so done inside script
C_poly = ([years(11:end),ones(8,1)])\C_recent;

figure(5)
plot(years, C,'o')
title('Mean Income')
hold on
plot(linspace(years(11),years(end),200),C_poly(2) + C_poly(1)*linspace(years(11),years(end),200),'r')

% use combined function based on Amit's fortran code
[Years_FCD,F,CD] = economics_pde(V_poly, K_poly, C_poly, alpha_C);

% F = economics_pde_fokker(V_poly,K_poly,C_poly,CD,alpha_C);
% figure(6)
% plot(F(2,:))
% title('Fokker Planck')

%% Step 6: Correct income estimate with ensemble average
C_corrected = linspace(0,1,size(F,1))*F;
figure(7)
plot(C_corrected)
title('Corrected income from ensemble average')

%% Poverty index

P = sum(CD.*F, 1);
figure(8)
plot(P)
title('Poverty Index')