back to thematplotlib-gallery
athttps://github.com/rasbt/matplotlib-gallery
%load_ext watermark
%watermark -u -v -d -p matplotlib,numpy
Last updated: 31/07/2014 CPython 3.4.1IPython 2.0.0matplotlib 1.3.1numpy 1.8.1
%matplotlib inline
importnumpyasnpimportrandomfrommatplotlibimportpyplotaspltdata=np.random.normal(0,20,1000)# fixed bin sizebins=np.arange(-100,100,5)# fixed bin sizeplt.xlim([min(data)-5,max(data)+5])plt.hist(data,bins=bins,alpha=0.5)plt.title('Random Gaussian data (fixed bin size)')plt.xlabel('variable X (bin size = 5)')plt.ylabel('count')plt.show()
importnumpyasnpimportrandomimportmathfrommatplotlibimportpyplotaspltdata=np.random.normal(0,20,1000)bins=np.linspace(math.ceil(min(data)),math.floor(max(data)),20)# fixed number of binsplt.xlim([min(data)-5,max(data)+5])plt.hist(data,bins=bins,alpha=0.5)plt.title('Random Gaussian data (fixed number of bins)')plt.xlabel('variable X (20 evenly spaced bins)')plt.ylabel('count')plt.show()
importnumpyasnpimportrandomfrommatplotlibimportpyplotaspltdata1=[random.gauss(15,10)foriinrange(500)]data2=[random.gauss(5,5)foriinrange(500)]bins=np.arange(-60,60,2.5)plt.xlim([min(data1+data2)-5,max(data1+data2)+5])plt.hist(data1,bins=bins,alpha=0.3,label='class 1')plt.hist(data2,bins=bins,alpha=0.3,label='class 2')plt.title('Random Gaussian data')plt.xlabel('variable X')plt.ylabel('count')plt.legend(loc='upper right')plt.show()
smooth=interp1d(bins,y,kind='cubic')
smooth
<scipy.interpolate.interpolate.interp1d at 0x107b78908>
importnumpyasnpimportrandomimportmathfrommatplotlibimportpyplotaspltimportmatplotlib.mlabasmlabfromscipy.statsimportnormfromscipy.interpolateimportinterp1ddata=np.random.normal(0,20,10000)# plotting the histogramn,bins,patches=plt.hist(data,bins=20,normed=1,alpha=0.5,color='lightblue')# fitting the datamu,sigma=norm.fit(data)# adding the fitted liney=mlab.normpdf(bins,mu,sigma)interp=interp1d(bins,y,kind='cubic')plt.plot(bins,interp(y),linewidth=2,color='blue')plt.xlim([min(data)-5,max(data)+5])plt.title('Random Gaussian data (fixed number of bins)')plt.xlabel('variable X (20 evenly spaced bins)')plt.ylabel('count')plt.show()
The line plot below is using bins of a histogram and is particularly useful if you are working with many different overlapping data sets.
# Generate a random Gaussian dataset with different means# 5 rows with 30 columns, where every row represents 1 sample.importnumpyasnpdata=np.ones((5,30))foriinrange(5):data[i,:]=np.random.normal(loc=i/2,scale=1.0,size=30)
Via thenumpy.histogram
function, we can categorize our data into distinct bins.
frommathimportfloor,ceil# for rounding up and downdata_min=floor(data.min())# minimum val. of the dataset rounded downdata_max=floor(data.max())# maximum val. of the dataset rounded upbins_size=0.5bins=np.arange(floor(data_min),ceil(data_max),bin_size)np.histogram(data[0,:],bins=bins)
(array([0, 5, 4, 9, 4, 6, 1, 1, 0]), array([-2. , -1.5, -1. , -0.5, 0. , 0.5, 1. , 1.5, 2. , 2.5]))
Thenumpy.histogram
function returns a tuple, where the first value is an array of how many samples fall into the first bin, the second bin, and so forth.
The second value is another NumPy array; it contains the specified bins. Note that all bins but the last one are half open intervals, e.g., the first bin would be[-2, -1.5)
(including -2, but not including -1.5), and the second bin would be[-1.5, -1.)
(including -1.5, but not including 1.0). But the last bin is defined as[2., 2.5]
(including 2 and including 2.5).
frommatplotlibimportpyplotaspltmarkers=['^','v','o','p','x','s','p',',']plt.figure(figsize=(13,8))forrowinrange(data.shape[0]):hist=np.histogram(data[row,:],bins=bins)plt.errorbar(hist[1][:-1]+bin_size/2,hist[0],alpha=0.3,xerr=bin_size/2,capsize=0,fmt=None,linewidth=8,)plt.legend(['sample%s'%i for i in range(1, 6)])plt.grid()plt.title('Histogram showing bar heights but without area under the bars',fontsize=18)plt.ylabel('count',fontsize=14)plt.xlabel('X value (bin size =%s)'%bin_size, fontsize=14)plt.xticks(bins+bin_size)plt.show()