%% Script to do dimension reduction analysis on economics dataset % need data in format Cereals (Cnorm), Food (Fnorm), Non-food (Nnorm), % Frequency bins (fx) and Rounds (rnd) where there are multiple rounds per % year. Mean income levels should be in format Rounds (rnds), Year number % (years) and Mean Income Level (mean_income). Dataset is % 'Indian_dataset.mat' for first analysis. close all clear all uiopen; clc %% Step 1: find CDFs and PDFs for existing data [PDF_C, PDF_interp_C, CDF_C, CDF_interp_C] = economics_pdf(Cnorm, fx, rnd); [PDF_F, PDF_interp_F, CDF_F, CDF_interp_F] = economics_pdf(Fnorm, fx, rnd); [PDF_N, PDF_interp_N, CDF_N, CDF_interp_N] = economics_pdf(Nnorm, fx, rnd); % plot the smoothed CDF and PDF for a sample year sample_round = 13; figure(1) hold on plot(PDF_interp_C{sample_round}(1,:),PDF_interp_C{sample_round}(2,:),'--') plot(PDF_interp_F{sample_round}(1,:),PDF_interp_F{sample_round}(2,:),'--') plot(PDF_interp_N{sample_round}(1,:),PDF_interp_N{sample_round}(2,:),'--') title('PDFs') xlabel('Normalised expenditure') ylabel('Probability') hold off figure(2) hold on plot(CDF_interp_C{sample_round}(1,:),CDF_interp_C{sample_round}(2,:),'--') plot(CDF_interp_F{sample_round}(1,:),CDF_interp_F{sample_round}(2,:),'--') plot(CDF_interp_N{sample_round}(1,:),CDF_interp_N{sample_round}(2,:),'--') title('CDFs') xlabel('Normalised expenditure') ylabel('Cumulative Probability') hold off %% Step 2: Do dimension reduction for the datasets % note this requires Iain's version of the Netlab and drtoolbox packages % for Matlab so I have put a check in to avoid doing this if the data % already exists. Once the mappings are learned they are bias-shifter to % minimum zero as many will spread evenly around the origin by design. % a=neuroscale; b=locally linear embedding; c=isomap; d=curvilinear % component analysis; e=pca if exist('Detrended_dim_red.mat','file')==0 % NeuroScale X = [Cnorm,Fnorm,Nnorm]; net = rbf(3,length(Cnorm),1,'rlogr','neuroscale','0'); net.c = X; options = foptions; options(6) = 1; net = rbftrain(net, options, X, sqrt(dist2(X,X))); Y = rbffwd(net, X); Y = Y + abs(min(Y)); % Locally Linear Embedding Y2a = lle(X,1,18); Y2a = Y2a + abs(min(Y2a)); % Isomap Y2b = isomap(X,1,18); Y2b = Y2b + abs(min(Y2b)); % Curvilinear Component Analysis Y2c = cca_embed(X,1); Y2c = Y2c + abs(min(Y2c)); % Principal Component Analysis Y2d = pca_dr(X,1); Y2d = Y2d + abs(min(Y2d)); else load('Detrended_dim_red.mat'); Y = Y - (min(Y)); Y2a = Y2a - (min(Y2a)); Y2b = Y2b - (min(Y2b)); Y2c = Y2c - (min(Y2c)); Y2d = Y2d - (min(Y2d)); end % calculate CDF, PDF and intepolated versions for plots [PDF_Y, PDF_interp_Y, CDF_Y, CDF_interp_Y] = economics_pdf(Y, fx, rnd); [PDF_Y2a, PDF_interp_Y2a, CDF_Y2a, CDF_interp_Y2a] = economics_pdf(Y2a, fx, rnd); [PDF_Y2b, PDF_interp_Y2b, CDF_Y2b, CDF_interp_Y2b] = economics_pdf(Y2b, fx, rnd); [PDF_Y2c, PDF_interp_Y2c, CDF_Y2c, CDF_interp_Y2c] = economics_pdf(Y2c, fx, rnd); [PDF_Y2d, PDF_interp_Y2d, CDF_Y2d, CDF_interp_Y2d] = economics_pdf(Y2d, fx, rnd); % add to plots figure(1) hold on plot(PDF_interp_Y{sample_round}(1,:),PDF_interp_Y{sample_round}(2,:)) plot(PDF_interp_Y2a{sample_round}(1,:),PDF_interp_Y2a{sample_round}(2,:)) plot(PDF_interp_Y2b{sample_round}(1,:),PDF_interp_Y2b{sample_round}(2,:)) plot(PDF_interp_Y2c{sample_round}(1,:),PDF_interp_Y2c{sample_round}(2,:)) plot(PDF_interp_Y2d{sample_round}(1,:),PDF_interp_Y2d{sample_round}(2,:)) legend('Cereals','Food','Non-Food','NeuroScale','LLE','IsoMap','CCA','PCA') hold off figure(2) hold on plot(CDF_interp_Y{sample_round}(1,:),CDF_interp_Y{sample_round}(2,:)) plot(CDF_interp_Y2a{sample_round}(1,:),CDF_interp_Y2a{sample_round}(2,:)) plot(CDF_interp_Y2b{sample_round}(1,:),CDF_interp_Y2b{sample_round}(2,:)) plot(CDF_interp_Y2c{sample_round}(1,:),CDF_interp_Y2c{sample_round}(2,:)) plot(CDF_interp_Y2d{sample_round}(1,:),CDF_interp_Y2d{sample_round}(2,:)) legend('Cereals','Food','Non-Food','NeuroScale','LLE','IsoMap','CCA','PCA') hold off %% Step 3: Calculate M values for log-log plots % for Cereal, Food and Non-Food data first [M_C,alpha_C] = economics_polyfit(PDF_C); disp(['Cereals Alpha = ',num2str(alpha_C)]); [M_F,alpha_F] = economics_polyfit(PDF_F); disp(['Food Alpha = ',num2str(alpha_F)]); [M_N,alpha_N] = economics_polyfit(PDF_N); disp(['Non-Food Alpha = ',num2str(alpha_N)]); % now for dimension reduced variables [M_Y,alpha_Y] = economics_polyfit(PDF_Y); disp(['NeuroScale Alpha = ',num2str(alpha_Y)]); [M_Y2a,alpha_Y2a] = economics_polyfit(PDF_Y2a); disp(['LLE Alpha = ',num2str(alpha_Y2a)]); [M_Y2b,alpha_Y2b] = economics_polyfit(PDF_Y2b); disp(['IsoMap Alpha = ',num2str(alpha_Y2b)]); [M_Y2c,alpha_Y2c] = economics_polyfit(PDF_Y2c); disp(['CCA Alpha = ',num2str(alpha_Y2c)]); [M_Y2d,alpha_Y2d] = economics_polyfit(PDF_Y2d); disp(['PCA Alpha = ',num2str(alpha_Y2d)]); %% Step 4: Solve for V(t) and K(t) % check first for Cnorm [V,K] = economics_vk(Cnorm,(Cnorm + Fnorm + Nnorm),rnd); % do on Dimension reduced variables %[V,K] = economics_vk(Y,(Cnorm + Fnorm + Nnorm),rnd); epsilon = rand(1,1); % add a small perturbation Nu = -V.*K./2 + epsilon; % fit K and V functions with linear function to find bias at t=t0 %V_poly = polyfit((1:21)',V,1); %sometimes polyfit doesn't work V_poly = [(1:length(V))',ones(length(V),1)]\V; V_0 = V_poly(2); % bias at t = 0 %K_poly = polyfit((1:21)',K,1); K_poly = [(1:length(K))',ones(length(K),1)]\K; K_0 = K_poly(2); % bias at t = 0 % plot V and K against t as well as linear fit figure(3) plot(V,'o'); hold on plot(V_poly(1)*(1:21)' + V_poly(2),'r') plot(K,'ko') plot(K_poly(1)*(1:21)' + K_poly(2),'g') legend('V(t)','V Linear Fit','K(t)','K Linear Fit') hold off Nu_0 = V_0*K_0/2; % %% Step 5: Solve PDE to calculate CD(y,t) % % % solve with numerical integration of finite elements steps % CD = economics_pde_burgers(V_poly,K_poly); % figure(4) % plot(CD(2,:)) % title('CD(y,t) with income y = 2 over all time') %% Step 5: Solve PDEs for Consumption Deprivation CD(y,t) Fokker-Planck f(y,t) C = mean_income; C_recent = C(11:end); % polyfit doesn't seem to like this calculation so done inside script C_poly = ([years(11:end),ones(8,1)])\C_recent; figure(5) plot(years, C,'o') title('Mean Income') hold on plot(linspace(years(11),years(end),200),C_poly(2) + C_poly(1)*linspace(years(11),years(end),200),'r') % use combined function based on Amit's fortran code [Years_FCD,F,CD] = economics_pde(V_poly, K_poly, C_poly, alpha_C); % F = economics_pde_fokker(V_poly,K_poly,C_poly,CD,alpha_C); % figure(6) % plot(F(2,:)) % title('Fokker Planck') %% Step 6: Correct income estimate with ensemble average C_corrected = linspace(0,1,size(F,1))*F; figure(7) plot(C_corrected) title('Corrected income from ensemble average') %% Poverty index P = sum(CD.*F, 1); figure(8) plot(P) title('Poverty Index')