Q: How to calculate Conditional PDF?

Here's one solution, provided by Joseph Martin:


function cond_pdf = condpdf(data1,data2,bord1,bord2);
%CONDPDF
%cond_pdf = CONDPDF(data1,data2,bord1,bord2) 
%computes the set of discrete conditional pdfs
%of the variable in data1 given the value of the variable in data2
%the binning is according to bord1 and bord2
%all sizes should be as in function TWODBIN
%and cond_pdf is the same size as count in TWODBIN 
% (C) Joseph Martin
count_2d = twodbin(data1,data2,bord1,bord2);
count_2d_norm = count_2d / ((bord1(end) - bord1(1)) / (length(bord1) - 1) * (bord2(end) - bord2(1)) / (length(bord2) - 1) * sum(sum(count_2d)));

nn = find(isfinite(data1) & isfinite(data2));
count_2 = onedbin(data2(nn),bord2);
count_2_norm = count_2 / ((bord2(end) - bord2(1)) / (length(bord2) - 1) * sum(count_2));

cond_pdf = count_2d_norm ./ (ones(size(count_2d_norm,1),1) * count_2_norm');


function count = onedbin(data,bord);
%ONEDBIN
%count = ONEDBIN(data,bord) bins data within the domains specified
%by the bin ends in bord
%data should be n x 1, and bord should be 1 x m
%count is a m-1 x 1 vector
%NaN values are ignored
% (C) Joseph Martin, Daniel Rudnick
nn = find(isfinite(data));
data = data(nn);
bord = bord';
count = zeros(length(bord)-1,1);
ii = find((data >= bord(1,1)) & (data <= bord(end,1)));
bord_ave =  runmean(bord,2);
for a=1:length(ii)
   [dum,ind] = min(abs(data(ii(a))-bord_ave));
   count(ind,1) = count(ind) + 1;
end


function count = twodbin(data1,data2,bord1,bord2);
%TWODBIN
%count = TWODBIN(data1,data2,bord1,bord2) bins data pairs in vectors data1 and data2
%within the 2 dimensional domains specified by the bin ends in bord1 and bord2 
%data1 and data2 should be n x 1, bord1 should be 1 x m
%and bord2 should be 1 x q
%count is a m-1 x q-1 matrix
%NaN values are ignored
% (C) Joseph Martin, Daniel Rudnick
if (length(data1) ~= length(data2))
   error('First two arguments must be the same size.')
end
nn = find(isfinite(data1) & isfinite(data2));
data1 = data1(nn);
data2 = data2(nn);
bord1 = bord1'*ones(1,size(bord2,2));
bord2 = ones(size(bord1,1),1)*bord2;
count = zeros(size(bord1,1)-1,size(bord2,2)-1);
ii = find((data1 >= bord1(1,1)) & (data1 <= bord1(end,1)) & (data2 >= bord2(1,1)) & (data2 <= bord2(1,end)));
bord1_ave =  runmean(bord1(:,1),2);
bord2_ave =  runmean(bord2(1,:)',2);
for a=1:length(ii)
   [dum,ind1] = min(abs(data1(ii(a))-bord1_ave));
   [dum,ind2] = min(abs(data2(ii(a))-bord2_ave));
   count(ind1,ind2) = count(ind1,ind2) + 1;
end