1010import fitz
1111import nltk
1212nltk .download ('punkt' )
13+
1314######
1415# Visual Python: Data Analysis > PDF
1516######
@@ -43,6 +44,7 @@ def vp_pdf_get_sentence(fname_lst):
4344df = _vp_pd .concat ([df ,df_doc ])
4445
4546return df .reset_index ().drop ('index' ,axis = 1 )
47+
4648######
4749# Visual Python: Data Analysis > Frame
4850######
@@ -63,6 +65,7 @@ def vp_drop_outlier(df, col, weight=1.5):
6365df_res = df .drop (outlier_index ).copy ()
6466
6567return df_res
68+
6669######
6770# Visual Python: Machine Learning > Model Info
6871######
@@ -74,10 +77,12 @@ def vp_create_feature_importances(model, X_train=None, sort=False):
7477
7578df_i = _vp_pd .DataFrame (model .feature_importances_ ,index = feature_names ,columns = ['Feature_importance' ])
7679df_i ['Percentage' ]= 100 * (df_i ['Feature_importance' ]/ df_i ['Feature_importance' ].max ())
77- if sort :df_i .sort_values (by = 'Feature_importance' ,ascending = False ,inplace = True )
80+ if sort :
81+ df_i .sort_values (by = 'Feature_importance' ,ascending = False ,inplace = True )
7882df_i = df_i .round (2 )
7983
8084return df_i
85+
8186######
8287# Visual Python: Machine Learning > Model Info
8388######
@@ -91,10 +96,13 @@ def vp_plot_feature_importances(model, X_train=None, sort=False, top_count=0):
9196df_i ['Percentage' ].sort_values ().plot (kind = 'barh' )
9297else :
9398df_i ['Percentage' ].plot (kind = 'barh' )
99+
94100_vp_plt .xlabel ('Feature importance Percentage' )
95101_vp_plt .ylabel ('Features' )
96-
97102_vp_plt .show ()
103+
104+ return
105+
98106######
99107# Visual Python: Visualization > Seaborn
100108######
@@ -134,4 +142,6 @@ def _single(ax):
134142for idx ,ax in _vp_np .ndenumerate (axs ):
135143_single (ax )
136144else :
137- _single (axs )
145+ _single (axs )
146+
147+ return