Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Adds function to profile onnxruntime#12

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to ourterms of service andprivacy statement. We’ll occasionally send you account related emails.

Already on GitHub?Sign in to your account

Merged
sdpython merged 9 commits intomainfromprof
Apr 27, 2023
Merged
Show file tree
Hide file tree
Changes fromall commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions_doc/api/ort.rst
View file
Open in desktop
Original file line numberDiff line numberDiff line change
Expand Up@@ -3,6 +3,11 @@
ort
===

Optimization
++++++++++++

.. autofunction:: onnx_array_api.ort.ort_optimizers.ort_optimized_model

OrtTensor
+++++++++

Expand All@@ -15,3 +20,8 @@ OrtTensor
.. autoclass:: onnx_array_api.ort.ort_tensors.OrtTensor
:members:

Profiling
+++++++++

.. autofunction:: onnx_array_api.ort.ort_profile.ort_profile

8 changes: 4 additions & 4 deletions_doc/examples/plot_benchmark_rf.py
View file
Open in desktop
Original file line numberDiff line numberDiff line change
Expand Up@@ -200,13 +200,13 @@ def measure_inference(fct, X, repeat, max_time=5, quantile=1):
cache_dir, f"nf-{X.shape[1]}-rf-J-{n_j}-E-{n_estimators}-D-{max_depth}.onnx"
)
if os.path.exists(cache_name):
sess = InferenceSession(cache_name, so)
sess = InferenceSession(cache_name, so, providers=["CPUExecutionProvider"])
else:
bar.set_description(f"J={n_j} E={n_estimators} D={max_depth} cvt onnx")
onx = to_onnx(rf, X[:1])
with open(cache_name, "wb") as f:
f.write(onx.SerializeToString())
sess = InferenceSession(cache_name, so)
sess = InferenceSession(cache_name, so, providers=["CPUExecutionProvider"])
onx_size = os.stat(cache_name).st_size

# run once to avoid counting the first run
Expand DownExpand Up@@ -234,7 +234,7 @@ def measure_inference(fct, X, repeat, max_time=5, quantile=1):
o1.update(dict(avg=mean, med=med, n_runs=r, ttime=t, name="base"))
data.append(o1)

#baseline
#onnxruntime
bar.set_description(f"J={n_j} E={n_estimators} D={max_depth} predictO")
r, t, mean, med = measure_inference(
lambda x: sess.run(None, {"X": x}), X, repeat=repeat, max_time=max_time
Expand All@@ -258,7 +258,7 @@ def measure_inference(fct, X, repeat, max_time=5, quantile=1):

#######################################################
# Printing the data
print(df)
df

#####################################################
# Plot
Expand Down
16 changes: 11 additions & 5 deletions_doc/examples/plot_optimization.py
View file
Open in desktop
Original file line numberDiff line numberDiff line change
Expand Up@@ -5,6 +5,12 @@
Optimization with onnxruntime
=============================

*onnxruntime* optimizes the onnx graph by default before running
the inference. It modifies, fuses or add new operators.
Some of them are standard onnx operators, some of them
are implemented in onnxruntime (see `Supported Operators
<https://github.com/microsoft/onnxruntime/blob/main/docs/OperatorKernels.md>`_).
This example looks into the differences of two models.

Optimize a model with onnxruntime
+++++++++++++++++++++++++++++++++
Expand DownExpand Up@@ -38,8 +44,8 @@
so.graph_optimization_level = GraphOptimizationLevel.ORT_ENABLE_ALL
img = numpy.random.random((1, 3, 112, 112)).astype(numpy.float32)

sess = InferenceSession(filename, so)
sess_opt = InferenceSession(optimized, so)
sess = InferenceSession(filename, so, providers=["CPUExecutionProvider"])
sess_opt = InferenceSession(optimized, so, providers=["CPUExecutionProvider"])
input_name = sess.get_inputs()[0].name
out = sess.run(None, {input_name: img})[0]
out_opt = sess_opt.run(None, {input_name: img})[0]
Expand DownExpand Up@@ -110,10 +116,10 @@
fig, ax = plt.subplots(1, 1, figsize=(12, 4))

df = DataFrame([t1, t2]).set_index("name")
print(df)
df

print(df["average"].values)
print((df["average"] - df["deviation"]).values)
#######################################
# And the graph is:

ax.bar(df.index, df["average"].values, yerr=df["deviation"].values, capsize=6)
ax.set_title("Measure performance of optimized model\nlower is better")
Expand Down
251 changes: 251 additions & 0 deletions_doc/examples/plot_profiling.py
View file
Open in desktop
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,251 @@
"""

.. _l-onnx-array-onnxruntime-profiling:

Profiling with onnxruntime
==========================

*onnxruntime* optimizes the onnx graph by default before running
the inference. It modifies, fuses or add new operators.
Some of them are standard onnx operators, some of them
are implemented in onnxruntime (see `Supported Operators
<https://github.com/microsoft/onnxruntime/blob/main/docs/OperatorKernels.md>`_).
This example profiles the two models.

Optimize a model with onnxruntime
+++++++++++++++++++++++++++++++++
"""
import os
import numpy
import matplotlib.pyplot as plt
from onnxruntime import get_available_providers
from onnx_array_api.ext_test_case import example_path
from onnx_array_api.ort.ort_optimizers import ort_optimized_model
from onnx_array_api.ort.ort_profile import ort_profile


filename = example_path("data/small.onnx")
optimized = filename + ".optimized.onnx"

if not os.path.exists(optimized):
ort_optimized_model(filename, output=optimized)
print(optimized)

#############################
# Profiling
# +++++++++

feeds = {"input": numpy.random.random((1, 3, 112, 112)).astype(numpy.float32)}
prof_base = ort_profile(
filename,
feeds,
repeat=6,
disable_optimization=True,
providers=["CPUExecutionProvider"],
)
prof_base.to_excel("prof_base.xlsx", index=False)
prof_base

#######################################
# And the optimized model.

prof_opt = ort_profile(
optimized,
feeds,
repeat=6,
disable_optimization=True,
providers=["CPUExecutionProvider"],
)
prof_opt

#######################################
# And the graph is:


def plot_profile(df, ax0, ax1=None, title=None):
gr_dur = (
df[["dur", "args_op_name"]].groupby("args_op_name").sum().sort_values("dur")
)
gr_dur.plot.barh(ax=ax0)
if title is not None:
ax0.set_title(title)
if ax1 is not None:
gr_n = (
df[["dur", "args_op_name"]]
.groupby("args_op_name")
.count()
.sort_values("dur")
)
gr_n = gr_n.loc[gr_dur.index, :]
gr_n.plot.barh(ax=ax1)
ax1.set_title("n occurences")


unique_op = set(prof_base["args_op_name"])
fig, ax = plt.subplots(2, 2, figsize=(10, len(unique_op)), sharex="col")
plot_profile(prof_base, ax[0, 0], ax[0, 1], title="baseline")
plot_profile(prof_opt, ax[1, 0], ax[1, 1], title="optimized")

fig.savefig("plot_profiling.png")

##################################################
# Merging profiles
# ++++++++++++++++
#
# Let's try to compare both profiles assuming every iteration
# process the same image and the input and output size are the
# same at every iteration.


def preprocess(df):
groupkey = [
"args_op_name",
"args_output_type_shape",
"args_input_type_shape",
"args_provider",
]

def _idx(row):
"""
There may be multiple node with the same
input/output types and shapes.
This function gives every instance a distinct id.
First unique op with same I/O receives the index 0.
The counter restart when the session goes to the
next image.
"""
if row["cat"] == "Session":
occurences[0] = {}
return -1
assert "idx" not in groupkey
vals = [row[k] for k in groupkey]
key = tuple(map(str, vals))
if key not in occurences[0]:
occurences[0][key] = 0
else:
occurences[0][key] += 1
return occurences[0][key]

df = df.copy()
occurences = [{}]
df["idx"] = df.apply(_idx, axis=1)
df = df[(df["cat"] == "Node") & df["name"].str.contains("kernel_time")]
groupkey.append("idx")
for c in groupkey:
if c != "idx":
df[c] = df[c].apply(str)
gr = df[groupkey + ["dur"]].groupby(groupkey)
return gr.sum()


base = preprocess(prof_base)
opti = preprocess(prof_opt)
merge = base.merge(
opti, how="outer", suffixes=("base", "opti"), left_index=True, right_index=True
)
merge = merge.reset_index(drop=False)
merge.to_excel("plot_profiling_merged.xlsx", index=False)
merge


#####################################################
# Aggregation


def classify(row):
if numpy.isnan(row["duropti"]):
return "-"
if numpy.isnan(row["durbase"]):
return "+"
return "="


keys = {"float": "f"}


def process_shape(s):
value = eval(s)
ns = []
for v in value:
if len(v) != 1:
raise NotImplementedError(f"Unexpected value {v} in {s!r}.")
k, v = list(v.items())[0]
n = "-".join([keys[k], "x".join(map(str, v))])
ns.append(n)
return ",".join(ns)


def label(row):
name = row["args_op_name"]
inshape = process_shape(row["args_input_type_shape"])
outshape = process_shape(row["args_output_type_shape"])
side = row["side"][0]
prov = row["args_provider"][:3]
idx = row["idx"]
return f"[{side}{prov}]{name}({inshape})->{outshape}[{idx}]"


df = merge.copy()
df["side"] = df.apply(classify, axis=1)
df["label"] = df.apply(label, axis=1)
gr = (
df[["label", "durbase", "duropti", "idx"]]
.groupby("label")
.agg({"durbase": numpy.sum, "duropti": numpy.sum, "idx": max})
)
gr

################################
# Final plot
# ++++++++++

# let's filter out unsignificant operator.
grmax = gr["durbase"] + gr["duropti"]
total = grmax.sum()
grmax /= total
gr = gr[grmax >= 0.01]


fig, ax = plt.subplots(1, 2, figsize=(14, min(gr.shape[0], 500)), sharey=True)
gr[["durbase", "duropti"]].plot.barh(ax=ax[0])
ax[0].set_title("Side by side duration")
gr = gr.copy()
gr["idx"] += 1
gr[["idx"]].plot.barh(ax=ax[1])
ax[1].set_title("Side by side count")
fig.tight_layout()
fig.savefig("plot_profiling_side_by_side.png")


########################################
# On CUDA
# +++++++


if "CUDAExecutionProvider" in get_available_providers():
print("Profiling on CUDA")
prof_base = ort_profile(
filename,
feeds,
repeat=6,
disable_optimization=True,
providers=["CUDAExecutionProvider"],
)
prof_opti = ort_profile(
optimized,
feeds,
repeat=6,
disable_optimization=True,
providers=["CUDAExecutionProvider"],
)

unique_op = set(prof_base["args_op_name"])
fig, ax = plt.subplots(2, 2, figsize=(10, len(unique_op)), sharex="col")
plot_profile(prof_base, ax[0, 0], ax[0, 1], title="baseline")
plot_profile(prof_opt, ax[1, 0], ax[1, 1], title="optimized")
fig.savefig("plot_profiling_cuda.png")
else:
print(f"CUDA not available in {get_available_providers()}")
fig, ax = None, None

ax
Binary file added_doc/examples/plot_profiling_merged.xlsx
View file
Open in desktop
Binary file not shown.
Binary file added_doc/examples/prof_base.xlsx
View file
Open in desktop
Binary file not shown.
36 changes: 36 additions & 0 deletions_unittests/ut_ort/test_ort_profile.py
View file
Open in desktop
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
import unittest
import numpy as np
from pandas import DataFrame
from onnx_array_api.npx import absolute, jit_onnx
from onnx_array_api.ext_test_case import ExtTestCase
from onnx_array_api.ort.ort_optimizers import ort_optimized_model
from onnx_array_api.ort.ort_profile import ort_profile


class TestOrtProfile(ExtTestCase):
def test_ort_profile(self):
def l1_loss(x, y):
return absolute(x - y).sum()

def l2_loss(x, y):
return ((x - y) ** 2).sum()

def myloss(x, y):
return l1_loss(x[:, 0], y[:, 0]) + l2_loss(x[:, 1], y[:, 1])

jitted_myloss = jit_onnx(myloss)
x = np.array([[0.1, 0.2], [0.3, 0.4]], dtype=np.float32)
y = np.array([[0.11, 0.22], [0.33, 0.44]], dtype=np.float32)
jitted_myloss(x, y)
onx = jitted_myloss.get_onnx()
feeds = {"x0": x, "x1": y}
self.assertRaise(lambda: ort_optimized_model(onx, "NO"), ValueError)
optimized = ort_optimized_model(onx)
prof = ort_profile(optimized, feeds)
self.assertIsInstance(prof, DataFrame)
prof = ort_profile(optimized, feeds, as_df=False)
self.assertIsInstance(prof, list)


if __name__ == "__main__":
unittest.main(verbosity=2)
6 changes: 5 additions & 1 deletiononnx_array_api/ort/ort_optimizers.py
View file
Open in desktop
Original file line numberDiff line numberDiff line change
Expand Up@@ -33,7 +33,11 @@ def ort_optimized_model(
so = SessionOptions()
so.graph_optimization_level = glevel
so.optimized_model_filepath = str(cache)
InferenceSession(onx if isinstance(onx, str) else onx.SerializeToString(), so)
InferenceSession(
onx if isinstance(onx, str) else onx.SerializeToString(),
so,
providers=["CPUExecutionProvider"],
)
if output is None and not cache.exists():
raise RuntimeError(f"The optimized model {str(cache)!r} not found.")
if output is not None:
Expand Down
Loading

[8]ページ先頭

©2009-2025 Movatter.jp