Just a quick add-on to my previous post on yet another way to present multidimensional data:
To recap, we have a “distribution of distributions”, where each distribution has two dimensions, mu and sigma.
In the previous post, I chose to present the data as a heatmap, as below:
But there are other ways such multidimensional data could be presented, below a 3D version:
Although this 3D presentation looks quite fancy, I’m not sure it’s easier to understand than the heatmap above. One way to think about the data is that the heatmap presents a top view, that is, we are looking at the data from above, while the 3D view lets us see the data from a dimensional perspective. To illustrate that, below are plots showing the “profiles” of the “mountain”, that is, mu and sigma separately. You can imagine that the mu-plot is the projection of the 3D-data when standing on its mu-side, and the sigma-plot is the projection when standing on the sigma side:
import numpy as np import matplotlib.pyplot as plt import scipy.stats as sps import pandas as pd from mpl_toolkits.mplot3d import Axes3D # all possible values grid = np.mgrid[37.5:39:0.01,0.001:1.1:0.001].reshape(2,-1).T df = pd.DataFrame(grid) df.columns = ['mu','sigma'] # data to be fitted test_data = sps.norm.rvs(38.4,0.5,size=20) # likelihood - mult all combinations of mu & sigma from grid to fit to test data likelihood = np.array( [ sps.norm.pdf( test_data,grid[:,0][i], grid[:,1][i]).prod(axis=0) for i in range(len(grid))]) df['likelihood'] = likelihood # priors for mu and sigma distributions given by loc & scale posterior = likelihood * sps.norm.pdf( grid[:,0],loc=38.0,scale=0.1) * sps.uniform.pdf( grid[:,1],loc=0.1,scale=0.5) posterior /= posterior.sum() df['posterior'] = posterior df = df.sort_values(by=['posterior']) print (df.tail()) maximum = np.argmax(posterior) print ('max mu posterior probability={} resides at x={}'.format( posterior[maximum],grid[:,0][maximum])) print ('max sigma posterior probability {}'.format(grid[:,1][maximum])) plt.title('mu posterior-distribution of distributions') plt.plot(grid[:,0],posterior,ls='steps') plt.xlabel(r'$\mu$') plt.ylabel('likelihood') plt.figure() plt.title('sigma posterior- distribution of distributions') plt.xlabel(r'$\sigma$') plt.ylabel('likelihood') plt.plot(grid[:,1],posterior) sample_rows_from_posterior = np.random.choice( range(len(grid)),replace=True,p=posterior,size=500) plt.figure() plt.title('Posterior samples heatmap') plt.plot(df['mu'][sample_rows_from_posterior], df['sigma'][sample_rows_from_posterior],'o',alpha=0.3,color='b') plt.xlabel(r'$\mu$') plt.ylabel(r'$\sigma$') fig = plt.figure(figsize=(14,10)) ax = fig.add_subplot(111,projection='3d') ax.set_title('Posterior Samples') x = df['mu'][sample_rows_from_posterior] y = df['sigma'][sample_rows_from_posterior] X,Y = np.meshgrid(x,y) zs = df['posterior'][sample_rows_from_posterior] Z = zs surf = ax.plot_surface(X,Y,Z,cmap='terrain_r',shade=True) ax.set_xlabel(r'$\mu$') ax.set_ylabel(r'$\sigma$') ax.set_zlabel('Posterior Probability') ax.view_init(elev=None,azim=-45) fig.colorbar(surf) plt.figure() bins=30 plt.subplot(2,1,1) plt.title('Mu samples from posterior') plt.xlabel(r'$\mu$') plt.ylabel('likelihood') plt.hist(df['mu'][sample_rows_from_posterior],bins=bins) plt.subplot(2,1,2) plt.title('Sigma samples from posterior') plt.hist(df['sigma'][sample_rows_from_posterior],bins=bins) plt.xlabel(r'$\sigma$') plt.ylabel('likelihood') plt.tight_layout() plt.show()