- Notifications
You must be signed in to change notification settings - Fork0
Adds function to profile onnxruntime#12
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to ourterms of service andprivacy statement. We’ll occasionally send you account related emails.
Already on GitHub?Sign in to your account
Uh oh!
There was an error while loading.Please reload this page.
Merged
Changes fromall commits
Commits
Show all changes
9 commits Select commitHold shift + click to select a range
3bdf171
Adds function to profile onnxruntime
xadupre9cff041
examples
xaduprec7e4652
fix example
xadupre22c5592
fix requirements.txt
xaduprec9309c3
fix example
xadupre2db6e9d
documentation
xaduprec750996
update examples
xadupre21c300f
remove a warning
xaduprea4b7f90
fix providers
xadupreFile filter
Filter by extension
Conversations
Failed to load comments.
Loading
Uh oh!
There was an error while loading.Please reload this page.
Jump to
Jump to file
Failed to load files.
Loading
Uh oh!
There was an error while loading.Please reload this page.
Diff view
Diff view
There are no files selected for viewing
10 changes: 10 additions & 0 deletions_doc/api/ort.rst
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.Learn more about bidirectional Unicode characters
8 changes: 4 additions & 4 deletions_doc/examples/plot_benchmark_rf.py
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.Learn more about bidirectional Unicode characters
16 changes: 11 additions & 5 deletions_doc/examples/plot_optimization.py
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.Learn more about bidirectional Unicode characters
251 changes: 251 additions & 0 deletions_doc/examples/plot_profiling.py
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,251 @@ | ||
""" | ||
.. _l-onnx-array-onnxruntime-profiling: | ||
Profiling with onnxruntime | ||
========================== | ||
*onnxruntime* optimizes the onnx graph by default before running | ||
the inference. It modifies, fuses or add new operators. | ||
Some of them are standard onnx operators, some of them | ||
are implemented in onnxruntime (see `Supported Operators | ||
<https://github.com/microsoft/onnxruntime/blob/main/docs/OperatorKernels.md>`_). | ||
This example profiles the two models. | ||
Optimize a model with onnxruntime | ||
+++++++++++++++++++++++++++++++++ | ||
""" | ||
import os | ||
import numpy | ||
import matplotlib.pyplot as plt | ||
from onnxruntime import get_available_providers | ||
from onnx_array_api.ext_test_case import example_path | ||
from onnx_array_api.ort.ort_optimizers import ort_optimized_model | ||
from onnx_array_api.ort.ort_profile import ort_profile | ||
filename = example_path("data/small.onnx") | ||
optimized = filename + ".optimized.onnx" | ||
if not os.path.exists(optimized): | ||
ort_optimized_model(filename, output=optimized) | ||
print(optimized) | ||
############################# | ||
# Profiling | ||
# +++++++++ | ||
feeds = {"input": numpy.random.random((1, 3, 112, 112)).astype(numpy.float32)} | ||
prof_base = ort_profile( | ||
filename, | ||
feeds, | ||
repeat=6, | ||
disable_optimization=True, | ||
providers=["CPUExecutionProvider"], | ||
) | ||
prof_base.to_excel("prof_base.xlsx", index=False) | ||
prof_base | ||
####################################### | ||
# And the optimized model. | ||
prof_opt = ort_profile( | ||
optimized, | ||
feeds, | ||
repeat=6, | ||
disable_optimization=True, | ||
providers=["CPUExecutionProvider"], | ||
) | ||
prof_opt | ||
####################################### | ||
# And the graph is: | ||
def plot_profile(df, ax0, ax1=None, title=None): | ||
gr_dur = ( | ||
df[["dur", "args_op_name"]].groupby("args_op_name").sum().sort_values("dur") | ||
) | ||
gr_dur.plot.barh(ax=ax0) | ||
if title is not None: | ||
ax0.set_title(title) | ||
if ax1 is not None: | ||
gr_n = ( | ||
df[["dur", "args_op_name"]] | ||
.groupby("args_op_name") | ||
.count() | ||
.sort_values("dur") | ||
) | ||
gr_n = gr_n.loc[gr_dur.index, :] | ||
gr_n.plot.barh(ax=ax1) | ||
ax1.set_title("n occurences") | ||
unique_op = set(prof_base["args_op_name"]) | ||
fig, ax = plt.subplots(2, 2, figsize=(10, len(unique_op)), sharex="col") | ||
plot_profile(prof_base, ax[0, 0], ax[0, 1], title="baseline") | ||
plot_profile(prof_opt, ax[1, 0], ax[1, 1], title="optimized") | ||
fig.savefig("plot_profiling.png") | ||
################################################## | ||
# Merging profiles | ||
# ++++++++++++++++ | ||
# | ||
# Let's try to compare both profiles assuming every iteration | ||
# process the same image and the input and output size are the | ||
# same at every iteration. | ||
def preprocess(df): | ||
groupkey = [ | ||
"args_op_name", | ||
"args_output_type_shape", | ||
"args_input_type_shape", | ||
"args_provider", | ||
] | ||
def _idx(row): | ||
""" | ||
There may be multiple node with the same | ||
input/output types and shapes. | ||
This function gives every instance a distinct id. | ||
First unique op with same I/O receives the index 0. | ||
The counter restart when the session goes to the | ||
next image. | ||
""" | ||
if row["cat"] == "Session": | ||
occurences[0] = {} | ||
return -1 | ||
assert "idx" not in groupkey | ||
vals = [row[k] for k in groupkey] | ||
key = tuple(map(str, vals)) | ||
if key not in occurences[0]: | ||
occurences[0][key] = 0 | ||
else: | ||
occurences[0][key] += 1 | ||
return occurences[0][key] | ||
df = df.copy() | ||
occurences = [{}] | ||
df["idx"] = df.apply(_idx, axis=1) | ||
df = df[(df["cat"] == "Node") & df["name"].str.contains("kernel_time")] | ||
groupkey.append("idx") | ||
for c in groupkey: | ||
if c != "idx": | ||
df[c] = df[c].apply(str) | ||
gr = df[groupkey + ["dur"]].groupby(groupkey) | ||
return gr.sum() | ||
base = preprocess(prof_base) | ||
opti = preprocess(prof_opt) | ||
merge = base.merge( | ||
opti, how="outer", suffixes=("base", "opti"), left_index=True, right_index=True | ||
) | ||
merge = merge.reset_index(drop=False) | ||
merge.to_excel("plot_profiling_merged.xlsx", index=False) | ||
merge | ||
##################################################### | ||
# Aggregation | ||
def classify(row): | ||
if numpy.isnan(row["duropti"]): | ||
return "-" | ||
if numpy.isnan(row["durbase"]): | ||
return "+" | ||
return "=" | ||
keys = {"float": "f"} | ||
def process_shape(s): | ||
value = eval(s) | ||
ns = [] | ||
for v in value: | ||
if len(v) != 1: | ||
raise NotImplementedError(f"Unexpected value {v} in {s!r}.") | ||
k, v = list(v.items())[0] | ||
n = "-".join([keys[k], "x".join(map(str, v))]) | ||
ns.append(n) | ||
return ",".join(ns) | ||
def label(row): | ||
name = row["args_op_name"] | ||
inshape = process_shape(row["args_input_type_shape"]) | ||
outshape = process_shape(row["args_output_type_shape"]) | ||
side = row["side"][0] | ||
prov = row["args_provider"][:3] | ||
idx = row["idx"] | ||
return f"[{side}{prov}]{name}({inshape})->{outshape}[{idx}]" | ||
df = merge.copy() | ||
df["side"] = df.apply(classify, axis=1) | ||
df["label"] = df.apply(label, axis=1) | ||
gr = ( | ||
df[["label", "durbase", "duropti", "idx"]] | ||
.groupby("label") | ||
.agg({"durbase": numpy.sum, "duropti": numpy.sum, "idx": max}) | ||
) | ||
gr | ||
################################ | ||
# Final plot | ||
# ++++++++++ | ||
# let's filter out unsignificant operator. | ||
grmax = gr["durbase"] + gr["duropti"] | ||
total = grmax.sum() | ||
grmax /= total | ||
gr = gr[grmax >= 0.01] | ||
fig, ax = plt.subplots(1, 2, figsize=(14, min(gr.shape[0], 500)), sharey=True) | ||
gr[["durbase", "duropti"]].plot.barh(ax=ax[0]) | ||
ax[0].set_title("Side by side duration") | ||
gr = gr.copy() | ||
gr["idx"] += 1 | ||
gr[["idx"]].plot.barh(ax=ax[1]) | ||
ax[1].set_title("Side by side count") | ||
fig.tight_layout() | ||
fig.savefig("plot_profiling_side_by_side.png") | ||
######################################## | ||
# On CUDA | ||
# +++++++ | ||
if "CUDAExecutionProvider" in get_available_providers(): | ||
print("Profiling on CUDA") | ||
prof_base = ort_profile( | ||
filename, | ||
feeds, | ||
repeat=6, | ||
disable_optimization=True, | ||
providers=["CUDAExecutionProvider"], | ||
) | ||
prof_opti = ort_profile( | ||
optimized, | ||
feeds, | ||
repeat=6, | ||
disable_optimization=True, | ||
providers=["CUDAExecutionProvider"], | ||
) | ||
unique_op = set(prof_base["args_op_name"]) | ||
fig, ax = plt.subplots(2, 2, figsize=(10, len(unique_op)), sharex="col") | ||
plot_profile(prof_base, ax[0, 0], ax[0, 1], title="baseline") | ||
plot_profile(prof_opt, ax[1, 0], ax[1, 1], title="optimized") | ||
fig.savefig("plot_profiling_cuda.png") | ||
else: | ||
print(f"CUDA not available in {get_available_providers()}") | ||
fig, ax = None, None | ||
ax |
Binary file added_doc/examples/plot_profiling_merged.xlsx
Binary file not shown.
Binary file added_doc/examples/prof_base.xlsx
Binary file not shown.
File renamed without changes.
36 changes: 36 additions & 0 deletions_unittests/ut_ort/test_ort_profile.py
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
import unittest | ||
import numpy as np | ||
from pandas import DataFrame | ||
from onnx_array_api.npx import absolute, jit_onnx | ||
from onnx_array_api.ext_test_case import ExtTestCase | ||
from onnx_array_api.ort.ort_optimizers import ort_optimized_model | ||
from onnx_array_api.ort.ort_profile import ort_profile | ||
class TestOrtProfile(ExtTestCase): | ||
def test_ort_profile(self): | ||
def l1_loss(x, y): | ||
return absolute(x - y).sum() | ||
def l2_loss(x, y): | ||
return ((x - y) ** 2).sum() | ||
def myloss(x, y): | ||
return l1_loss(x[:, 0], y[:, 0]) + l2_loss(x[:, 1], y[:, 1]) | ||
jitted_myloss = jit_onnx(myloss) | ||
x = np.array([[0.1, 0.2], [0.3, 0.4]], dtype=np.float32) | ||
y = np.array([[0.11, 0.22], [0.33, 0.44]], dtype=np.float32) | ||
jitted_myloss(x, y) | ||
onx = jitted_myloss.get_onnx() | ||
feeds = {"x0": x, "x1": y} | ||
self.assertRaise(lambda: ort_optimized_model(onx, "NO"), ValueError) | ||
optimized = ort_optimized_model(onx) | ||
prof = ort_profile(optimized, feeds) | ||
self.assertIsInstance(prof, DataFrame) | ||
prof = ort_profile(optimized, feeds, as_df=False) | ||
self.assertIsInstance(prof, list) | ||
if __name__ == "__main__": | ||
unittest.main(verbosity=2) |
6 changes: 5 additions & 1 deletiononnx_array_api/ort/ort_optimizers.py
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.Learn more about bidirectional Unicode characters
Oops, something went wrong.
Uh oh!
There was an error while loading.Please reload this page.
Add this suggestion to a batch that can be applied as a single commit.This suggestion is invalid because no changes were made to the code.Suggestions cannot be applied while the pull request is closed.Suggestions cannot be applied while viewing a subset of changes.Only one suggestion per line can be applied in a batch.Add this suggestion to a batch that can be applied as a single commit.Applying suggestions on deleted lines is not supported.You must change the existing code in this line in order to create a valid suggestion.Outdated suggestions cannot be applied.This suggestion has been applied or marked resolved.Suggestions cannot be applied from pending reviews.Suggestions cannot be applied on multi-line comments.Suggestions cannot be applied while the pull request is queued to merge.Suggestion cannot be applied right now. Please check back later.