2121from onnxruntime import get_available_providers
2222from onnx_array_api .ext_test_case import example_path
2323from onnx_array_api .ort .ort_optimizers import ort_optimized_model
24- from onnx_array_api .ort .ort_profile import ort_profile
24+ from onnx_array_api .ort .ort_profile import ort_profile ,merge_ort_profile
25+ from onnx_array_api .plotting .stat_plot import plot_ort_profile
2526
2627
27- filename = example_path ("data/small.onnx" )
28+ suffix = ""
29+ filename = example_path (f"data/small{ suffix } .onnx" )
2830optimized = filename + ".optimized.onnx"
31+ print (f"model={ filename !r} " )
2932
3033if not os .path .exists (optimized ):
3134ort_optimized_model (filename ,output = optimized )
32- print (optimized )
35+ print (f" optimized= { optimized !r } " )
3336
3437#############################
38+ # .. _l-example-ort-profiling:
39+ #
3540# Profiling
3641# +++++++++
3742
4348disable_optimization = True ,
4449providers = ["CPUExecutionProvider" ],
4550)
46- prof_base .to_excel ("prof_base.xlsx" ,index = False )
51+ prof_base .to_excel (f "prof_base{ suffix } .xlsx" ,index = False )
4752prof_base
4853
4954#######################################
5055# And the optimized model.
5156
52- prof_opt = ort_profile (
57+ prof_opti = ort_profile (
5358optimized ,
5459feeds ,
5560repeat = 6 ,
5661disable_optimization = True ,
5762providers = ["CPUExecutionProvider" ],
5863)
59- prof_opt
64+ prof_opti .to_excel (f"prof_opti{ suffix } .xlsx" ,index = False )
65+ prof_opti
6066
6167#######################################
6268# And the graph is:
6369
64-
65- def plot_profile (df ,ax0 ,ax1 = None ,title = None ):
66- gr_dur = (
67- df [["dur" ,"args_op_name" ]].groupby ("args_op_name" ).sum ().sort_values ("dur" )
68- )
69- gr_dur .plot .barh (ax = ax0 )
70- if title is not None :
71- ax0 .set_title (title )
72- if ax1 is not None :
73- gr_n = (
74- df [["dur" ,"args_op_name" ]]
75- .groupby ("args_op_name" )
76- .count ()
77- .sort_values ("dur" )
78- )
79- gr_n = gr_n .loc [gr_dur .index , :]
80- gr_n .plot .barh (ax = ax1 )
81- ax1 .set_title ("n occurences" )
82-
83-
8470unique_op = set (prof_base ["args_op_name" ])
8571fig ,ax = plt .subplots (2 ,2 ,figsize = (10 ,len (unique_op )),sharex = "col" )
86- plot_profile (prof_base ,ax [0 ,0 ],ax [0 ,1 ],title = "baseline" )
87- plot_profile ( prof_opt ,ax [1 ,0 ],ax [1 ,1 ],title = "optimized" )
88-
89- fig .savefig ("plot_profiling.png" )
72+ plot_ort_profile (prof_base ,ax [0 ,0 ],ax [0 ,1 ],title = "baseline" )
73+ plot_ort_profile ( prof_opti ,ax [1 ,0 ],ax [1 ,1 ],title = "optimized" )
74+ fig . tight_layout ()
75+ fig .savefig (f "plot_profiling{ suffix } .png" )
9076
9177##################################################
9278# Merging profiles
@@ -96,103 +82,14 @@ def plot_profile(df, ax0, ax1=None, title=None):
9682# process the same image and the input and output size are the
9783# same at every iteration.
9884
99-
100- def preprocess (df ):
101- groupkey = [
102- "args_op_name" ,
103- "args_output_type_shape" ,
104- "args_input_type_shape" ,
105- "args_provider" ,
106- ]
107-
108- def _idx (row ):
109- """
110- There may be multiple node with the same
111- input/output types and shapes.
112- This function gives every instance a distinct id.
113- First unique op with same I/O receives the index 0.
114- The counter restart when the session goes to the
115- next image.
116- """
117- if row ["cat" ]== "Session" :
118- occurences [0 ]= {}
119- return - 1
120- assert "idx" not in groupkey
121- vals = [row [k ]for k in groupkey ]
122- key = tuple (map (str ,vals ))
123- if key not in occurences [0 ]:
124- occurences [0 ][key ]= 0
125- else :
126- occurences [0 ][key ]+= 1
127- return occurences [0 ][key ]
128-
129- df = df .copy ()
130- occurences = [{}]
131- df ["idx" ]= df .apply (_idx ,axis = 1 )
132- df = df [(df ["cat" ]== "Node" )& df ["name" ].str .contains ("kernel_time" )]
133- groupkey .append ("idx" )
134- for c in groupkey :
135- if c != "idx" :
136- df [c ]= df [c ].apply (str )
137- gr = df [groupkey + ["dur" ]].groupby (groupkey )
138- return gr .sum ()
139-
140-
141- base = preprocess (prof_base )
142- opti = preprocess (prof_opt )
143- merge = base .merge (
144- opti ,how = "outer" ,suffixes = ("base" ,"opti" ),left_index = True ,right_index = True
145- )
146- merge = merge .reset_index (drop = False )
147- merge .to_excel ("plot_profiling_merged.xlsx" ,index = False )
85+ merge ,gr = merge_ort_profile (prof_base ,prof_opti )
86+ merge .to_excel (f"plot_profiling_merged{ suffix } .xlsx" ,index = False )
14887merge
14988
150-
15189#####################################################
152- # Aggregation
153-
154-
155- def classify (row ):
156- if numpy .isnan (row ["duropti" ]):
157- return "-"
158- if numpy .isnan (row ["durbase" ]):
159- return "+"
160- return "="
90+ # More detailed
16191
162-
163- keys = {"float" :"f" }
164-
165-
166- def process_shape (s ):
167- value = eval (s )
168- ns = []
169- for v in value :
170- if len (v )!= 1 :
171- raise NotImplementedError (f"Unexpected value{ v } in{ s !r} ." )
172- k ,v = list (v .items ())[0 ]
173- n = "-" .join ([keys [k ],"x" .join (map (str ,v ))])
174- ns .append (n )
175- return "," .join (ns )
176-
177-
178- def label (row ):
179- name = row ["args_op_name" ]
180- inshape = process_shape (row ["args_input_type_shape" ])
181- outshape = process_shape (row ["args_output_type_shape" ])
182- side = row ["side" ][0 ]
183- prov = row ["args_provider" ][:3 ]
184- idx = row ["idx" ]
185- return f"[{ side } { prov } ]{ name } ({ inshape } )->{ outshape } [{ idx } ]"
186-
187-
188- df = merge .copy ()
189- df ["side" ]= df .apply (classify ,axis = 1 )
190- df ["label" ]= df .apply (label ,axis = 1 )
191- gr = (
192- df [["label" ,"durbase" ,"duropti" ,"idx" ]]
193- .groupby ("label" )
194- .agg ({"durbase" :numpy .sum ,"duropti" :numpy .sum ,"idx" :max })
195- )
92+ gr .to_excel (f"plot_profiling_merged_details{ suffix } .xlsx" ,index = False )
19693gr
19794
19895################################
@@ -210,11 +107,10 @@ def label(row):
210107gr [["durbase" ,"duropti" ]].plot .barh (ax = ax [0 ])
211108ax [0 ].set_title ("Side by side duration" )
212109gr = gr .copy ()
213- gr ["idx" ]+= 1
214- gr [["idx" ]].plot .barh (ax = ax [1 ])
110+ gr [["countbase" ,"countopti" ]].plot .barh (ax = ax [1 ])
215111ax [1 ].set_title ("Side by side count" )
216112fig .tight_layout ()
217- fig .savefig ("plot_profiling_side_by_side.png" )
113+ fig .savefig (f "plot_profiling_side_by_side{ suffix } .png" )
218114
219115
220116########################################
@@ -231,21 +127,44 @@ def label(row):
231127disable_optimization = True ,
232128providers = ["CUDAExecutionProvider" ],
233129 )
130+ prof_base .to_excel (f"prof_cuda_base{ suffix } .xlsx" ,index = False )
131+
234132prof_opti = ort_profile (
235133optimized ,
236134feeds ,
237135repeat = 6 ,
238136disable_optimization = True ,
239- providers = ["CUDAExecutionProvider" ],
137+ providers = ["CUDAExecutionProvider" , "CPUExecutionProvider" ],
240138 )
139+ prof_opti .to_excel (f"prof_cuda_opti{ suffix } .xlsx" ,index = False )
241140
242141unique_op = set (prof_base ["args_op_name" ])
243142fig ,ax = plt .subplots (2 ,2 ,figsize = (10 ,len (unique_op )),sharex = "col" )
244- plot_profile (prof_base ,ax [0 ,0 ],ax [0 ,1 ],title = "baseline" )
245- plot_profile (prof_opt ,ax [1 ,0 ],ax [1 ,1 ],title = "optimized" )
246- fig .savefig ("plot_profiling_cuda.png" )
143+ plot_ort_profile (prof_base ,ax [0 ,0 ],ax [0 ,1 ],title = "baseline" )
144+ plot_ort_profile (prof_opti ,ax [1 ,0 ],ax [1 ,1 ],title = "optimized" )
145+ fig .tight_layout ()
146+ fig .savefig (f"plot_profiling_cuda{ suffix } .png" )
147+
148+ merge ,gr = merge_ort_profile (prof_base ,prof_opti )
149+ merge .to_excel (f"plot_profiling_merged{ suffix } .xlsx" ,index = False )
150+ gr .to_excel (f"plot_profiling_merged_details{ suffix } .xlsx" ,index = False )
151+
152+ grmax = gr ["durbase" ]+ gr ["duropti" ]
153+ total = grmax .sum ()
154+ grmax /= total
155+ gr = gr [grmax >= 0.01 ]
156+
157+ fig ,ax = plt .subplots (1 ,2 ,figsize = (14 ,min (gr .shape [0 ],500 )),sharey = True )
158+ gr [["durbase" ,"duropti" ]].plot .barh (ax = ax [0 ])
159+ ax [0 ].set_title ("Side by side duration" )
160+ gr = gr .copy ()
161+ gr [["countbase" ,"countopti" ]].plot .barh (ax = ax [1 ])
162+ ax [1 ].set_title ("Side by side count" )
163+ fig .tight_layout ()
164+ fig .savefig (f"plot_profiling_side_by_side_cuda{ suffix } .png" )
165+
247166else :
248- print (f"CUDA not available in{ get_available_providers ()} " )
167+ print (f"CUDA not available in{ get_available_providers ()} . " )
249168fig ,ax = None ,None
250169
251170ax