|
| 1 | +""" |
| 2 | +
|
| 3 | +.. _l-onnx-array-onnxruntime-profiling: |
| 4 | +
|
| 5 | +Profiling with onnxruntime |
| 6 | +========================== |
| 7 | +
|
| 8 | +*onnxruntime* optimizes the onnx graph by default before running |
| 9 | +the inference. It modifies, fuses or add new operators. |
| 10 | +Some of them are standard onnx operators, some of them |
| 11 | +are implemented in onnxruntime (see `Supported Operators |
| 12 | +<https://github.com/microsoft/onnxruntime/blob/main/docs/OperatorKernels.md>`_). |
| 13 | +This example profiles the two models. |
| 14 | +
|
| 15 | +Optimize a model with onnxruntime |
| 16 | ++++++++++++++++++++++++++++++++++ |
| 17 | +""" |
| 18 | +importos |
| 19 | +importnumpy |
| 20 | +importmatplotlib.pyplotasplt |
| 21 | +fromonnxruntimeimportget_available_providers |
| 22 | +fromonnx_array_api.ext_test_caseimportexample_path |
| 23 | +fromonnx_array_api.ort.ort_optimizersimportort_optimized_model |
| 24 | +fromonnx_array_api.ort.ort_profileimportort_profile |
| 25 | + |
| 26 | + |
| 27 | +filename=example_path("data/small.onnx") |
| 28 | +optimized=filename+".optimized.onnx" |
| 29 | + |
| 30 | +ifnotos.path.exists(optimized): |
| 31 | +ort_optimized_model(filename,output=optimized) |
| 32 | +print(optimized) |
| 33 | + |
| 34 | +############################# |
| 35 | +# Profiling |
| 36 | +# +++++++++ |
| 37 | + |
| 38 | +feeds= {"input":numpy.random.random((1,3,112,112)).astype(numpy.float32)} |
| 39 | +prof_base=ort_profile( |
| 40 | +filename, |
| 41 | +feeds, |
| 42 | +repeat=6, |
| 43 | +disable_optimization=True, |
| 44 | +providers=["CPUExecutionProvider"], |
| 45 | +) |
| 46 | +prof_base.to_excel("prof_base.xlsx",index=False) |
| 47 | +prof_base |
| 48 | + |
| 49 | +####################################### |
| 50 | +# And the optimized model. |
| 51 | + |
| 52 | +prof_opt=ort_profile( |
| 53 | +optimized, |
| 54 | +feeds, |
| 55 | +repeat=6, |
| 56 | +disable_optimization=True, |
| 57 | +providers=["CPUExecutionProvider"], |
| 58 | +) |
| 59 | +prof_opt |
| 60 | + |
| 61 | +####################################### |
| 62 | +# And the graph is: |
| 63 | + |
| 64 | + |
| 65 | +defplot_profile(df,ax0,ax1=None,title=None): |
| 66 | +gr_dur= ( |
| 67 | +df[["dur","args_op_name"]].groupby("args_op_name").sum().sort_values("dur") |
| 68 | + ) |
| 69 | +gr_dur.plot.barh(ax=ax0) |
| 70 | +iftitleisnotNone: |
| 71 | +ax0.set_title(title) |
| 72 | +ifax1isnotNone: |
| 73 | +gr_n= ( |
| 74 | +df[["dur","args_op_name"]] |
| 75 | + .groupby("args_op_name") |
| 76 | + .count() |
| 77 | + .sort_values("dur") |
| 78 | + ) |
| 79 | +gr_n=gr_n.loc[gr_dur.index, :] |
| 80 | +gr_n.plot.barh(ax=ax1) |
| 81 | +ax1.set_title("n occurences") |
| 82 | + |
| 83 | + |
| 84 | +unique_op=set(prof_base["args_op_name"]) |
| 85 | +fig,ax=plt.subplots(2,2,figsize=(10,len(unique_op)),sharex="col") |
| 86 | +plot_profile(prof_base,ax[0,0],ax[0,1],title="baseline") |
| 87 | +plot_profile(prof_opt,ax[1,0],ax[1,1],title="optimized") |
| 88 | + |
| 89 | +fig.savefig("plot_profiling.png") |
| 90 | + |
| 91 | +################################################## |
| 92 | +# Merging profiles |
| 93 | +# ++++++++++++++++ |
| 94 | +# |
| 95 | +# Let's try to compare both profiles assuming every iteration |
| 96 | +# process the same image and the input and output size are the |
| 97 | +# same at every iteration. |
| 98 | + |
| 99 | + |
| 100 | +defpreprocess(df): |
| 101 | +groupkey= [ |
| 102 | +"args_op_name", |
| 103 | +"args_output_type_shape", |
| 104 | +"args_input_type_shape", |
| 105 | +"args_provider", |
| 106 | + ] |
| 107 | + |
| 108 | +def_idx(row): |
| 109 | +""" |
| 110 | + There may be multiple node with the same |
| 111 | + input/output types and shapes. |
| 112 | + This function gives every instance a distinct id. |
| 113 | + First unique op with same I/O receives the index 0. |
| 114 | + The counter restart when the session goes to the |
| 115 | + next image. |
| 116 | + """ |
| 117 | +ifrow["cat"]=="Session": |
| 118 | +occurences[0]= {} |
| 119 | +return-1 |
| 120 | +assert"idx"notingroupkey |
| 121 | +vals= [row[k]forkingroupkey] |
| 122 | +key=tuple(map(str,vals)) |
| 123 | +ifkeynotinoccurences[0]: |
| 124 | +occurences[0][key]=0 |
| 125 | +else: |
| 126 | +occurences[0][key]+=1 |
| 127 | +returnoccurences[0][key] |
| 128 | + |
| 129 | +df=df.copy() |
| 130 | +occurences= [{}] |
| 131 | +df["idx"]=df.apply(_idx,axis=1) |
| 132 | +df=df[(df["cat"]=="Node")&df["name"].str.contains("kernel_time")] |
| 133 | +groupkey.append("idx") |
| 134 | +forcingroupkey: |
| 135 | +ifc!="idx": |
| 136 | +df[c]=df[c].apply(str) |
| 137 | +gr=df[groupkey+ ["dur"]].groupby(groupkey) |
| 138 | +returngr.sum() |
| 139 | + |
| 140 | + |
| 141 | +base=preprocess(prof_base) |
| 142 | +opti=preprocess(prof_opt) |
| 143 | +merge=base.merge( |
| 144 | +opti,how="outer",suffixes=("base","opti"),left_index=True,right_index=True |
| 145 | +) |
| 146 | +merge=merge.reset_index(drop=False) |
| 147 | +merge.to_excel("plot_profiling_merged.xlsx",index=False) |
| 148 | +merge |
| 149 | + |
| 150 | + |
| 151 | +##################################################### |
| 152 | +# Aggregation |
| 153 | + |
| 154 | + |
| 155 | +defclassify(row): |
| 156 | +ifnumpy.isnan(row["duropti"]): |
| 157 | +return"-" |
| 158 | +ifnumpy.isnan(row["durbase"]): |
| 159 | +return"+" |
| 160 | +return"=" |
| 161 | + |
| 162 | + |
| 163 | +keys= {"float":"f"} |
| 164 | + |
| 165 | + |
| 166 | +defprocess_shape(s): |
| 167 | +value=eval(s) |
| 168 | +ns= [] |
| 169 | +forvinvalue: |
| 170 | +iflen(v)!=1: |
| 171 | +raiseNotImplementedError(f"Unexpected value{v} in{s!r}.") |
| 172 | +k,v=list(v.items())[0] |
| 173 | +n="-".join([keys[k],"x".join(map(str,v))]) |
| 174 | +ns.append(n) |
| 175 | +return",".join(ns) |
| 176 | + |
| 177 | + |
| 178 | +deflabel(row): |
| 179 | +name=row["args_op_name"] |
| 180 | +inshape=process_shape(row["args_input_type_shape"]) |
| 181 | +outshape=process_shape(row["args_output_type_shape"]) |
| 182 | +side=row["side"][0] |
| 183 | +prov=row["args_provider"][:3] |
| 184 | +idx=row["idx"] |
| 185 | +returnf"[{side}{prov}]{name}({inshape})->{outshape}[{idx}]" |
| 186 | + |
| 187 | + |
| 188 | +df=merge.copy() |
| 189 | +df["side"]=df.apply(classify,axis=1) |
| 190 | +df["label"]=df.apply(label,axis=1) |
| 191 | +gr= ( |
| 192 | +df[["label","durbase","duropti","idx"]] |
| 193 | + .groupby("label") |
| 194 | + .agg({"durbase":numpy.sum,"duropti":numpy.sum,"idx":max}) |
| 195 | +) |
| 196 | +gr |
| 197 | + |
| 198 | +################################ |
| 199 | +# Final plot |
| 200 | +# ++++++++++ |
| 201 | + |
| 202 | +# let's filter out unsignificant operator. |
| 203 | +grmax=gr["durbase"]+gr["duropti"] |
| 204 | +total=grmax.sum() |
| 205 | +grmax/=total |
| 206 | +gr=gr[grmax>=0.01] |
| 207 | + |
| 208 | + |
| 209 | +fig,ax=plt.subplots(1,2,figsize=(14,min(gr.shape[0],500)),sharey=True) |
| 210 | +gr[["durbase","duropti"]].plot.barh(ax=ax[0]) |
| 211 | +ax[0].set_title("Side by side duration") |
| 212 | +gr=gr.copy() |
| 213 | +gr["idx"]+=1 |
| 214 | +gr[["idx"]].plot.barh(ax=ax[1]) |
| 215 | +ax[1].set_title("Side by side count") |
| 216 | +fig.tight_layout() |
| 217 | +fig.savefig("plot_profiling_side_by_side.png") |
| 218 | + |
| 219 | + |
| 220 | +######################################## |
| 221 | +# On CUDA |
| 222 | +# +++++++ |
| 223 | + |
| 224 | + |
| 225 | +if"CUDAExecutionProvider"inget_available_providers(): |
| 226 | +print("Profiling on CUDA") |
| 227 | +prof_base=ort_profile( |
| 228 | +filename, |
| 229 | +feeds, |
| 230 | +repeat=6, |
| 231 | +disable_optimization=True, |
| 232 | +providers=["CUDAExecutionProvider"], |
| 233 | + ) |
| 234 | +prof_opti=ort_profile( |
| 235 | +optimized, |
| 236 | +feeds, |
| 237 | +repeat=6, |
| 238 | +disable_optimization=True, |
| 239 | +providers=["CUDAExecutionProvider"], |
| 240 | + ) |
| 241 | + |
| 242 | +unique_op=set(prof_base["args_op_name"]) |
| 243 | +fig,ax=plt.subplots(2,2,figsize=(10,len(unique_op)),sharex="col") |
| 244 | +plot_profile(prof_base,ax[0,0],ax[0,1],title="baseline") |
| 245 | +plot_profile(prof_opt,ax[1,0],ax[1,1],title="optimized") |
| 246 | +fig.savefig("plot_profiling_cuda.png") |
| 247 | +else: |
| 248 | +print(f"CUDA not available in{get_available_providers()}") |
| 249 | +fig,ax=None,None |
| 250 | + |
| 251 | +ax |