|
| 1 | +# This file provides demo for pandas functionality |
| 2 | +importrandom |
| 3 | + |
| 4 | +importpandasaspd |
| 5 | + |
| 6 | + |
| 7 | +defseriesDemo(): |
| 8 | +"""This function gives demo for Pandas Series""" |
| 9 | +a= [1,2.2,"Pooja",4,5] |
| 10 | +s1=pd.Series(a,index=["id","module_num","name","n1","n2"]) |
| 11 | +print("Series before update\n",s1) |
| 12 | +s1.loc[0]=2 |
| 13 | +s1.iloc[3:5]=12 |
| 14 | +s1._set_value("n1",15) |
| 15 | +s1['name']='Mrs. Pooja' |
| 16 | +print("Series before update") |
| 17 | +print(s1) |
| 18 | + |
| 19 | +d1= {'c1':'India','c2':'USA','c3':'Switzerland'} |
| 20 | +print("Dictionary: ",d1) |
| 21 | +s2=pd.Series(d1,name='Country') |
| 22 | +print(f"Series:{s2.name} Data Type:{s2.dtype} Size:{s2.size}\n") |
| 23 | +print(s2) |
| 24 | + |
| 25 | +arr= [1,2,3,4] |
| 26 | +arr2= [10,20,30,4] |
| 27 | +s3=pd.Series(arr) |
| 28 | +s4=pd.Series(arr2) |
| 29 | +s5=pd.concat([s3,s4],ignore_index=True) |
| 30 | +s5.name="S5" |
| 31 | +print(s5.drop(labels=2)) |
| 32 | +print("Index where value 4 is present :\n ",s5.loc[s5==4]) |
| 33 | + |
| 34 | + |
| 35 | +defdataFrameDemo(): |
| 36 | +d1= { |
| 37 | +"id": [1,2,3], |
| 38 | +"name": ["Rohit","Pooja","Rajani"] |
| 39 | + } |
| 40 | +print(f"Type of d1:{type(d1)}") |
| 41 | +df=pd.DataFrame(d1) |
| 42 | +print(df) |
| 43 | +print("-"*25) |
| 44 | + |
| 45 | +d2= [10,20,30,40] |
| 46 | +df2=pd.DataFrame(d2,index=['first','second','third','fourth'],columns=['id']) |
| 47 | +print("using iloc with index\n",df2.iloc[1]) |
| 48 | +print("using loc with label index\n",df2.loc['fourth']) |
| 49 | +print("/"*15) |
| 50 | + |
| 51 | +print("using iloc with multiple rows\n",df2.iloc[[0,2]]) |
| 52 | +print("using loc with multiple rows\n",df2.loc[['second','fourth']]) |
| 53 | +print("/"*15) |
| 54 | + |
| 55 | +print("using iloc slicing\n",df2.iloc[:2]) |
| 56 | +print("using loc slicing\n",df2.loc['third':'fourth']) |
| 57 | +print("-"*25) |
| 58 | + |
| 59 | +d3= [random.randint(i,i*10)foriinrange(0,500)] |
| 60 | +d4= ['A','B','C','D','E']*100 |
| 61 | + |
| 62 | +s3,s4=pd.Series(d3),pd.Series(d4) |
| 63 | + |
| 64 | +df1=pd.concat([s3,s4],axis=1).rename(columns={0:'id',1:'str_val'}) |
| 65 | +print(df1[['id','str_val']][10:20]) |
| 66 | +print("-"*25) |
| 67 | + |
| 68 | + |
| 69 | +defdataframe_filter_demo(): |
| 70 | +"""Dataframe operation""" |
| 71 | + |
| 72 | +d3= [random.randint(i,i*10)foriinrange(0,500)] |
| 73 | +d4= ['A','B','C','D','E']*100 |
| 74 | + |
| 75 | +s3,s4=pd.Series(d3),pd.Series(d4) |
| 76 | + |
| 77 | +df1=pd.concat([s3,s4],axis=1).rename(columns={0:'id',1:'str_val'}) |
| 78 | + |
| 79 | +forxindf1.index: |
| 80 | +ifdf1.loc[x,'id']%500==0: |
| 81 | +print(df1.loc[x]['id']) |
| 82 | + |
| 83 | +print("Single column condition:\n",df1.loc[df1['str_val']=='A']) |
| 84 | +print("-"*25) |
| 85 | + |
| 86 | +print("Double column condition:\n",df1.loc[(df1['str_val']=='A')& (df1['id']%10==0)]) |
| 87 | +print("-"*25) |
| 88 | + |
| 89 | +print(df1.query("str_val == 'B' and (id > 110 and id < 700)")) |
| 90 | + |
| 91 | +print(df1.filter(items=['id','str_val']).head(20)) |
| 92 | + |
| 93 | +print(df1.filter(like='val').head(20)) |
| 94 | + |
| 95 | + |
| 96 | +defdataframe_duplicate_demo(): |
| 97 | +d= { |
| 98 | +'id': [1,2,3,4,5,1], |
| 99 | +'name': ['R','O','H','I','T','R'] |
| 100 | + } |
| 101 | +df=pd.DataFrame(d) |
| 102 | +print("Input data frame: ",df) |
| 103 | + |
| 104 | +duplicated_series=df.duplicated() |
| 105 | + |
| 106 | +print("Find duplicate rows\n",duplicated_series) |
| 107 | + |
| 108 | +print("Find only duplicated row\n",df[duplicated_series==1]) |
| 109 | + |
| 110 | +print("Remove duplicate rows\n",df.drop_duplicates()) |
| 111 | + |
| 112 | +print("Remove duplicate on basis of column values\n",df.drop_duplicates(subset=['name'])) |
| 113 | + |
| 114 | +print("update duplicated values in df") |
| 115 | +foriindf[duplicated_series==1].index.values: |
| 116 | +df.loc[i]= (6,'P') |
| 117 | + |
| 118 | +print(df) |
| 119 | + |
| 120 | + |
| 121 | +defnull_handling_demo(): |
| 122 | +d= { |
| 123 | +'id': [1,2,3,4,5,None,8], |
| 124 | +'name': ['R','O','H','I','T','C',None] |
| 125 | + } |
| 126 | +df=pd.DataFrame(d) |
| 127 | + |
| 128 | +print("Data frame with null") |
| 129 | +print(df.isna()) |
| 130 | + |
| 131 | +print("Remove null values from data frame") |
| 132 | +df1=df.dropna() |
| 133 | +df2=pd.concat([pd.to_numeric(df1['id'],downcast="integer"),df1['name']],axis=1) |
| 134 | +print(df2) |
| 135 | + |
| 136 | +print("Fill null values from data frame") |
| 137 | +d1= {'id':0,'name':'#'} |
| 138 | +df1=df.fillna(d1).astype(dtype={'id':int,'name':object}) |
| 139 | +print(df1) |
| 140 | + |
| 141 | + |
| 142 | +defdataframe_add_rows(): |
| 143 | +d= { |
| 144 | +'id': [1,2,3,4,5], |
| 145 | +'name': ['R','O','H','I','T'] |
| 146 | + } |
| 147 | +df=pd.DataFrame(d) |
| 148 | + |
| 149 | +# to add single row |
| 150 | +df.loc[df.index.max()+1]= (6,'P') |
| 151 | + |
| 152 | +# to add multiple rows |
| 153 | +multiple_rows= { |
| 154 | +'id': [7,8,9,10,11,12], |
| 155 | +'name': ['R','A','K','A','S','H'] |
| 156 | + } |
| 157 | +df1=pd.DataFrame(multiple_rows) |
| 158 | +df=pd.concat([df,df1],axis=0,ignore_index=True) |
| 159 | +print(df) |
| 160 | + |
| 161 | + |
| 162 | +defdataframe_where_demo(): |
| 163 | +d= { |
| 164 | +'id': [1,2,3,4,5], |
| 165 | +'name': ['R','O','H','I','T'] |
| 166 | + } |
| 167 | +df=pd.DataFrame(d) |
| 168 | + |
| 169 | +cond1=df['id']>2 |
| 170 | +cond2=df['name']!='T' |
| 171 | + |
| 172 | +print(df.where(cond1&cond2)) |
| 173 | + |
| 174 | + |
| 175 | +defdataframe_remove_rows(): |
| 176 | +d= { |
| 177 | +'id': [1,2,3,4,5], |
| 178 | +'name': ['R','O','H','I','T'] |
| 179 | + } |
| 180 | +df=pd.DataFrame(d) |
| 181 | + |
| 182 | +index_list=df.index.values |
| 183 | + |
| 184 | +print("Remove first two elements from dataframe using indexes") |
| 185 | +print(df.drop(index=index_list[:2])) |
| 186 | + |
| 187 | +print("Remove elements from dataframe on basis of condition") |
| 188 | +df.drop(df[df['id']==2].index.values,inplace=True) |
| 189 | +print(df) |
| 190 | + |
| 191 | +print("Remove column id from dataframe") |
| 192 | +df.drop(columns=['id'],inplace=True) |
| 193 | + |
| 194 | +print(df) |
| 195 | + |
| 196 | + |
| 197 | +defdataframe_join_demo(): |
| 198 | +emp_data= { |
| 199 | +'emp_id': [10,20,30,40,50,60], |
| 200 | +'emp_name': ["Rohit","Pooja","Rajani","Rushi","Rutu","Prithvi"], |
| 201 | +'emp_sal': [5600,6200,7900,7623.45,5823.41,5399.14], |
| 202 | +'dept_id': [1,2,3,1,3,3] |
| 203 | + } |
| 204 | + |
| 205 | +dept_data= { |
| 206 | +'dept_id': [1,2,3], |
| 207 | +'dept_name': ["IT","Civil","Computer Science"] |
| 208 | + } |
| 209 | + |
| 210 | +emp_df=pd.DataFrame(emp_data) |
| 211 | +dept_df=pd.DataFrame(dept_data) |
| 212 | +print("Emp df\n",emp_df) |
| 213 | +print("Dept df\n",dept_df) |
| 214 | + |
| 215 | +print("Joined df") |
| 216 | +print(emp_df.join(dept_df.set_index('dept_id'),on='dept_id',how='inner')) |
| 217 | + |
| 218 | +dept_renamed_df=dept_df.rename(columns={'dept_id':'id'}) |
| 219 | +print(dept_renamed_df) |
| 220 | +# print(emp_df.join(dept_renamed_df.set_index('id'), on='id', how='inner')) |
| 221 | + |
| 222 | + |
| 223 | +defdataframe_merge_demo(): |
| 224 | +emp_data= { |
| 225 | +'emp_id': [10,20,30,40,50,60], |
| 226 | +'emp_name': ["Rohit","Pooja","Rajani","Rushi","Rutu","Prithvi"], |
| 227 | +'emp_sal': [5600,6200,7900,7623.45,5823.41,5399.14], |
| 228 | +'dept_id': [1,2,3,1,3,3] |
| 229 | + } |
| 230 | + |
| 231 | +dept_data= { |
| 232 | +'id': [1,2,3], |
| 233 | +'dept_name': ["IT","Civil","Computer Science"] |
| 234 | + } |
| 235 | + |
| 236 | +emp_df=pd.DataFrame(emp_data) |
| 237 | +dept_df=pd.DataFrame(dept_data) |
| 238 | + |
| 239 | +print("Merged df when column name is not same") |
| 240 | +print(emp_df.merge(dept_df,how='left',left_on='dept_id',right_on='id')) |
| 241 | + |
| 242 | +print("Merged df when column names are same") |
| 243 | +print(emp_df.merge(dept_df.rename(columns={'id':'dept_id'}),how='left')) |
| 244 | + |
| 245 | + |
| 246 | +dataframe_remove_rows() |