Apr 27, 2023 · Apr 25, 2023 · Apr 25, 2023 · Apr 25, 2023 · Apr 25, 2023 · Apr 25, 2023
diff --git a/_doc/api/ort.rst b/_doc/api/ort.rst
 ort
 ===

 Optimization
 ++++++++++++

 .. autofunction:: onnx_array_api.ort.ort_optimizers.ort_optimized_model

 OrtTensor
 +++++++++

 .. autoclass:: onnx_array_api.ort.ort_tensors.OrtTensor
    :members:

 Profiling
 +++++++++

 .. autofunction:: onnx_array_api.ort.ort_profile.ort_profile

diff --git a/_doc/examples/plot_benchmark_rf.py b/_doc/examples/plot_benchmark_rf.py
        cache_dir, f"nf-{X.shape[1]}-rf-J-{n_j}-E-{n_estimators}-D-{max_depth}.onnx"
    )
    if os.path.exists(cache_name):
        sess = InferenceSession(cache_name, so)
        sess = InferenceSession(cache_name, so, providers=["CPUExecutionProvider"])
    else:
        bar.set_description(f"J={n_j} E={n_estimators} D={max_depth} cvt onnx")
        onx = to_onnx(rf, X[:1])
        with open(cache_name, "wb") as f:
            f.write(onx.SerializeToString())
        sess = InferenceSession(cache_name, so)
        sess = InferenceSession(cache_name, so, providers=["CPUExecutionProvider"])
    onx_size = os.stat(cache_name).st_size

    # run once to avoid counting the first run
    o1.update(dict(avg=mean, med=med, n_runs=r, ttime=t, name="base"))
    data.append(o1)

    #baseline
    #onnxruntime
    bar.set_description(f"J={n_j} E={n_estimators} D={max_depth} predictO")
    r, t, mean, med = measure_inference(
        lambda x: sess.run(None, {"X": x}), X, repeat=repeat, max_time=max_time

 #######################################################
 # Printing the data
 print(df)
 df

 #####################################################
 # Plot
diff --git a/_doc/examples/plot_optimization.py b/_doc/examples/plot_optimization.py
 Optimization with onnxruntime
 =============================

 *onnxruntime* optimizes the onnx graph by default before running
 the inference. It modifies, fuses or add new operators.
 Some of them are standard onnx operators, some of them
 are implemented in onnxruntime (see `Supported Operators
 <https://github.com/microsoft/onnxruntime/blob/main/docs/OperatorKernels.md>`_).
 This example looks into the differences of two models.

 Optimize a model with onnxruntime
 +++++++++++++++++++++++++++++++++
 so.graph_optimization_level = GraphOptimizationLevel.ORT_ENABLE_ALL
 img = numpy.random.random((1, 3, 112, 112)).astype(numpy.float32)

 sess = InferenceSession(filename, so)
 sess_opt = InferenceSession(optimized, so)
 sess = InferenceSession(filename, so, providers=["CPUExecutionProvider"])
 sess_opt = InferenceSession(optimized, so, providers=["CPUExecutionProvider"])
 input_name = sess.get_inputs()[0].name
 out = sess.run(None, {input_name: img})[0]
 out_opt = sess_opt.run(None, {input_name: img})[0]
 fig, ax = plt.subplots(1, 1, figsize=(12, 4))

 df = DataFrame([t1, t2]).set_index("name")
 print(df)
 df

 print(df["average"].values)
 print((df["average"] - df["deviation"]).values)
 #######################################
 # And the graph is:

 ax.bar(df.index, df["average"].values, yerr=df["deviation"].values, capsize=6)
 ax.set_title("Measure performance of optimized model\nlower is better")
diff --git a/_doc/examples/plot_profiling.py b/_doc/examples/plot_profiling.py
 """

 .. _l-onnx-array-onnxruntime-profiling:

 Profiling with onnxruntime
 ==========================

 *onnxruntime* optimizes the onnx graph by default before running
 the inference. It modifies, fuses or add new operators.
 Some of them are standard onnx operators, some of them
 are implemented in onnxruntime (see `Supported Operators
 <https://github.com/microsoft/onnxruntime/blob/main/docs/OperatorKernels.md>`_).
 This example profiles the two models.

 Optimize a model with onnxruntime
 +++++++++++++++++++++++++++++++++
 """
 import os
 import numpy
 import matplotlib.pyplot as plt
 from onnxruntime import get_available_providers
 from onnx_array_api.ext_test_case import example_path
 from onnx_array_api.ort.ort_optimizers import ort_optimized_model
 from onnx_array_api.ort.ort_profile import ort_profile


 filename = example_path("data/small.onnx")
 optimized = filename + ".optimized.onnx"

 if not os.path.exists(optimized):
    ort_optimized_model(filename, output=optimized)
 print(optimized)

 #############################
 # Profiling
 # +++++++++

 feeds = {"input": numpy.random.random((1, 3, 112, 112)).astype(numpy.float32)}
 prof_base = ort_profile(
    filename,
    feeds,
    repeat=6,
    disable_optimization=True,
    providers=["CPUExecutionProvider"],
 )
 prof_base.to_excel("prof_base.xlsx", index=False)
 prof_base

 #######################################
 # And the optimized model.

 prof_opt = ort_profile(
    optimized,
    feeds,
    repeat=6,
    disable_optimization=True,
    providers=["CPUExecutionProvider"],
 )
 prof_opt

 #######################################
 # And the graph is:


 def plot_profile(df, ax0, ax1=None, title=None):
    gr_dur = (
        df[["dur", "args_op_name"]].groupby("args_op_name").sum().sort_values("dur")
    )
    gr_dur.plot.barh(ax=ax0)
    if title is not None:
        ax0.set_title(title)
    if ax1 is not None:
        gr_n = (
            df[["dur", "args_op_name"]]
            .groupby("args_op_name")
            .count()
            .sort_values("dur")
        )
        gr_n = gr_n.loc[gr_dur.index, :]
        gr_n.plot.barh(ax=ax1)
        ax1.set_title("n occurences")


 unique_op = set(prof_base["args_op_name"])
 fig, ax = plt.subplots(2, 2, figsize=(10, len(unique_op)), sharex="col")
 plot_profile(prof_base, ax[0, 0], ax[0, 1], title="baseline")
 plot_profile(prof_opt, ax[1, 0], ax[1, 1], title="optimized")

 fig.savefig("plot_profiling.png")

 ##################################################
 # Merging profiles
 # ++++++++++++++++
 #
 # Let's try to compare both profiles assuming every iteration
 # process the same image and the input and output size are the
 # same at every iteration.


 def preprocess(df):
    groupkey = [
        "args_op_name",
        "args_output_type_shape",
        "args_input_type_shape",
        "args_provider",
    ]

    def _idx(row):
        """
        There may be multiple node with the same
        input/output types and shapes.
        This function gives every instance a distinct id.
        First unique op with same I/O receives the index 0.
        The counter restart when the session goes to the
        next image.
        """
        if row["cat"] == "Session":
            occurences[0] = {}
            return -1
        assert "idx" not in groupkey
        vals = [row[k] for k in groupkey]
        key = tuple(map(str, vals))
        if key not in occurences[0]:
            occurences[0][key] = 0
        else:
            occurences[0][key] += 1
        return occurences[0][key]

    df = df.copy()
    occurences = [{}]
    df["idx"] = df.apply(_idx, axis=1)
    df = df[(df["cat"] == "Node") & df["name"].str.contains("kernel_time")]
    groupkey.append("idx")
    for c in groupkey:
        if c != "idx":
            df[c] = df[c].apply(str)
    gr = df[groupkey + ["dur"]].groupby(groupkey)
    return gr.sum()


 base = preprocess(prof_base)
 opti = preprocess(prof_opt)
 merge = base.merge(
    opti, how="outer", suffixes=("base", "opti"), left_index=True, right_index=True
 )
 merge = merge.reset_index(drop=False)
 merge.to_excel("plot_profiling_merged.xlsx", index=False)
 merge


 #####################################################
 # Aggregation


 def classify(row):
    if numpy.isnan(row["duropti"]):
        return "-"
    if numpy.isnan(row["durbase"]):
        return "+"
    return "="


 keys = {"float": "f"}


 def process_shape(s):
    value = eval(s)
    ns = []
    for v in value:
        if len(v) != 1:
            raise NotImplementedError(f"Unexpected value {v} in {s!r}.")
        k, v = list(v.items())[0]
        n = "-".join([keys[k], "x".join(map(str, v))])
        ns.append(n)
    return ",".join(ns)


 def label(row):
    name = row["args_op_name"]
    inshape = process_shape(row["args_input_type_shape"])
    outshape = process_shape(row["args_output_type_shape"])
    side = row["side"][0]
    prov = row["args_provider"][:3]
    idx = row["idx"]
    return f"[{side}{prov}]{name}({inshape})->{outshape}[{idx}]"


 df = merge.copy()
 df["side"] = df.apply(classify, axis=1)
 df["label"] = df.apply(label, axis=1)
 gr = (
    df[["label", "durbase", "duropti", "idx"]]
    .groupby("label")
    .agg({"durbase": numpy.sum, "duropti": numpy.sum, "idx": max})
 )
 gr

 ################################
 # Final plot
 # ++++++++++

 # let's filter out unsignificant operator.
 grmax = gr["durbase"] + gr["duropti"]
 total = grmax.sum()
 grmax /= total
 gr = gr[grmax >= 0.01]


 fig, ax = plt.subplots(1, 2, figsize=(14, min(gr.shape[0], 500)), sharey=True)
 gr[["durbase", "duropti"]].plot.barh(ax=ax[0])
 ax[0].set_title("Side by side duration")
 gr = gr.copy()
 gr["idx"] += 1
 gr[["idx"]].plot.barh(ax=ax[1])
 ax[1].set_title("Side by side count")
 fig.tight_layout()
 fig.savefig("plot_profiling_side_by_side.png")


 ########################################
 # On CUDA
 # +++++++


 if "CUDAExecutionProvider" in get_available_providers():
    print("Profiling on CUDA")
    prof_base = ort_profile(
        filename,
        feeds,
        repeat=6,
        disable_optimization=True,
        providers=["CUDAExecutionProvider"],
    )
    prof_opti = ort_profile(
        optimized,
        feeds,
        repeat=6,
        disable_optimization=True,
        providers=["CUDAExecutionProvider"],
    )

    unique_op = set(prof_base["args_op_name"])
    fig, ax = plt.subplots(2, 2, figsize=(10, len(unique_op)), sharex="col")
    plot_profile(prof_base, ax[0, 0], ax[0, 1], title="baseline")
    plot_profile(prof_opt, ax[1, 0], ax[1, 1], title="optimized")
    fig.savefig("plot_profiling_cuda.png")
 else:
    print(f"CUDA not available in {get_available_providers()}")
    fig, ax = None, None

 ax
diff --git a/_doc/examples/plot_profiling_merged.xlsx b/_doc/examples/plot_profiling_merged.xlsx
diff --git a/_doc/examples/prof_base.xlsx b/_doc/examples/prof_base.xlsx
diff --git a/_unittests/ut_ort/test_ort_opimizer.py → _unittests/ut_ort/test_ort_optimizer.py b/_unittests/ut_ort/test_ort_opimizer.py → _unittests/ut_ort/test_ort_optimizer.py
diff --git a/_unittests/ut_ort/test_ort_profile.py b/_unittests/ut_ort/test_ort_profile.py
 import unittest
 import numpy as np
 from pandas import DataFrame
 from onnx_array_api.npx import absolute, jit_onnx
 from onnx_array_api.ext_test_case import ExtTestCase
 from onnx_array_api.ort.ort_optimizers import ort_optimized_model
 from onnx_array_api.ort.ort_profile import ort_profile


 class TestOrtProfile(ExtTestCase):
    def test_ort_profile(self):
        def l1_loss(x, y):
            return absolute(x - y).sum()

        def l2_loss(x, y):
            return ((x - y) ** 2).sum()

        def myloss(x, y):
            return l1_loss(x[:, 0], y[:, 0]) + l2_loss(x[:, 1], y[:, 1])

        jitted_myloss = jit_onnx(myloss)
        x = np.array([[0.1, 0.2], [0.3, 0.4]], dtype=np.float32)
        y = np.array([[0.11, 0.22], [0.33, 0.44]], dtype=np.float32)
        jitted_myloss(x, y)
        onx = jitted_myloss.get_onnx()
        feeds = {"x0": x, "x1": y}
        self.assertRaise(lambda: ort_optimized_model(onx, "NO"), ValueError)
        optimized = ort_optimized_model(onx)
        prof = ort_profile(optimized, feeds)
        self.assertIsInstance(prof, DataFrame)
        prof = ort_profile(optimized, feeds, as_df=False)
        self.assertIsInstance(prof, list)


 if __name__ == "__main__":
    unittest.main(verbosity=2)
diff --git a/onnx_array_api/ort/ort_optimizers.py b/onnx_array_api/ort/ort_optimizers.py
    so = SessionOptions()
    so.graph_optimization_level = glevel
    so.optimized_model_filepath = str(cache)
    InferenceSession(onx if isinstance(onx, str) else onx.SerializeToString(), so)
    InferenceSession(
        onx if isinstance(onx, str) else onx.SerializeToString(),
        so,
        providers=["CPUExecutionProvider"],
    )
    if output is None and not cache.exists():
        raise RuntimeError(f"The optimized model {str(cache)!r} not found.")
    if output is not None:
Original file line number	Diff line number	Diff line change
Expand Up		@@ -3,6 +3,11 @@
		ort
		===

		Optimization
		++++++++++++

		.. autofunction:: onnx_array_api.ort.ort_optimizers.ort_optimized_model

		OrtTensor
		+++++++++

Expand All		@@ -15,3 +20,8 @@ OrtTensor
		.. autoclass:: onnx_array_api.ort.ort_tensors.OrtTensor
		:members:

		Profiling
		+++++++++

		.. autofunction:: onnx_array_api.ort.ort_profile.ort_profile
Original file line number	Diff line number	Diff line change
Expand Up		@@ -200,13 +200,13 @@ def measure_inference(fct, X, repeat, max_time=5, quantile=1):
		cache_dir, f"nf-{X.shape[1]}-rf-J-{n_j}-E-{n_estimators}-D-{max_depth}.onnx"
		)
		if os.path.exists(cache_name):
		sess = InferenceSession(cache_name, so)
		sess = InferenceSession(cache_name, so, providers=["CPUExecutionProvider"])
		else:
		bar.set_description(f"J={n_j} E={n_estimators} D={max_depth} cvt onnx")
		onx = to_onnx(rf, X[:1])
		with open(cache_name, "wb") as f:
		f.write(onx.SerializeToString())
		sess = InferenceSession(cache_name, so)
		sess = InferenceSession(cache_name, so, providers=["CPUExecutionProvider"])
		onx_size = os.stat(cache_name).st_size

		# run once to avoid counting the first run
Expand DownExpand Up		@@ -234,7 +234,7 @@ def measure_inference(fct, X, repeat, max_time=5, quantile=1):
		o1.update(dict(avg=mean, med=med, n_runs=r, ttime=t, name="base"))
		data.append(o1)

		#baseline
		#onnxruntime
		bar.set_description(f"J={n_j} E={n_estimators} D={max_depth} predictO")
		r, t, mean, med = measure_inference(
		lambda x: sess.run(None, {"X": x}), X, repeat=repeat, max_time=max_time
Expand All		@@ -258,7 +258,7 @@ def measure_inference(fct, X, repeat, max_time=5, quantile=1):

		#######################################################
		# Printing the data
		print(df)
		df

		#####################################################
		# Plot
Expand Down
Original file line number	Diff line number	Diff line change
Expand Up		@@ -5,6 +5,12 @@
		Optimization with onnxruntime
		=============================

		onnxruntime optimizes the onnx graph by default before running
		the inference. It modifies, fuses or add new operators.
		Some of them are standard onnx operators, some of them
		are implemented in onnxruntime (see `Supported Operators
		<https://github.com/microsoft/onnxruntime/blob/main/docs/OperatorKernels.md>`_).
		This example looks into the differences of two models.

		Optimize a model with onnxruntime
		+++++++++++++++++++++++++++++++++
Expand DownExpand Up		@@ -38,8 +44,8 @@
		so.graph_optimization_level = GraphOptimizationLevel.ORT_ENABLE_ALL
		img = numpy.random.random((1, 3, 112, 112)).astype(numpy.float32)

		sess = InferenceSession(filename, so)
		sess_opt = InferenceSession(optimized, so)
		sess = InferenceSession(filename, so, providers=["CPUExecutionProvider"])
		sess_opt = InferenceSession(optimized, so, providers=["CPUExecutionProvider"])
		input_name = sess.get_inputs()[0].name
		out = sess.run(None, {input_name: img})[0]
		out_opt = sess_opt.run(None, {input_name: img})[0]
Expand DownExpand Up		@@ -110,10 +116,10 @@
		fig, ax = plt.subplots(1, 1, figsize=(12, 4))

		df = DataFrame([t1, t2]).set_index("name")
		print(df)
		df

		print(df["average"].values)
		print((df["average"] - df["deviation"]).values)
		#######################################
		# And the graph is:

		ax.bar(df.index, df["average"].values, yerr=df["deviation"].values, capsize=6)
		ax.set_title("Measure performance of optimized model\nlower is better")
Expand Down
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,251 @@
		"""

		.. _l-onnx-array-onnxruntime-profiling:

		Profiling with onnxruntime
		==========================

		onnxruntime optimizes the onnx graph by default before running
		the inference. It modifies, fuses or add new operators.
		Some of them are standard onnx operators, some of them
		are implemented in onnxruntime (see `Supported Operators
		<https://github.com/microsoft/onnxruntime/blob/main/docs/OperatorKernels.md>`_).
		This example profiles the two models.

		Optimize a model with onnxruntime
		+++++++++++++++++++++++++++++++++
		"""
		import os
		import numpy
		import matplotlib.pyplot as plt
		from onnxruntime import get_available_providers
		from onnx_array_api.ext_test_case import example_path
		from onnx_array_api.ort.ort_optimizers import ort_optimized_model
		from onnx_array_api.ort.ort_profile import ort_profile


		filename = example_path("data/small.onnx")
		optimized = filename + ".optimized.onnx"

		if not os.path.exists(optimized):
		ort_optimized_model(filename, output=optimized)
		print(optimized)

		#############################
		# Profiling
		# +++++++++

		feeds = {"input": numpy.random.random((1, 3, 112, 112)).astype(numpy.float32)}
		prof_base = ort_profile(
		filename,
		feeds,
		repeat=6,
		disable_optimization=True,
		providers=["CPUExecutionProvider"],
		)
		prof_base.to_excel("prof_base.xlsx", index=False)
		prof_base

		#######################################
		# And the optimized model.

		prof_opt = ort_profile(
		optimized,
		feeds,
		repeat=6,
		disable_optimization=True,
		providers=["CPUExecutionProvider"],
		)
		prof_opt

		#######################################
		# And the graph is:


		def plot_profile(df, ax0, ax1=None, title=None):
		gr_dur = (
		df[["dur", "args_op_name"]].groupby("args_op_name").sum().sort_values("dur")
		)
		gr_dur.plot.barh(ax=ax0)
		if title is not None:
		ax0.set_title(title)
		if ax1 is not None:
		gr_n = (
		df[["dur", "args_op_name"]]
		.groupby("args_op_name")
		.count()
		.sort_values("dur")
		)
		gr_n = gr_n.loc[gr_dur.index, :]
		gr_n.plot.barh(ax=ax1)
		ax1.set_title("n occurences")


		unique_op = set(prof_base["args_op_name"])
		fig, ax = plt.subplots(2, 2, figsize=(10, len(unique_op)), sharex="col")
		plot_profile(prof_base, ax[0, 0], ax[0, 1], title="baseline")
		plot_profile(prof_opt, ax[1, 0], ax[1, 1], title="optimized")

		fig.savefig("plot_profiling.png")

		##################################################
		# Merging profiles
		# ++++++++++++++++
		#
		# Let's try to compare both profiles assuming every iteration
		# process the same image and the input and output size are the
		# same at every iteration.


		def preprocess(df):
		groupkey = [
		"args_op_name",
		"args_output_type_shape",
		"args_input_type_shape",
		"args_provider",
		]

		def _idx(row):
		"""
		There may be multiple node with the same
		input/output types and shapes.
		This function gives every instance a distinct id.
		First unique op with same I/O receives the index 0.
		The counter restart when the session goes to the
		next image.
		"""
		if row["cat"] == "Session":
		occurences[0] = {}
		return -1
		assert "idx" not in groupkey
		vals = [row[k] for k in groupkey]
		key = tuple(map(str, vals))
		if key not in occurences[0]:
		occurences[0][key] = 0
		else:
		occurences[0][key] += 1
		return occurences[0][key]

		df = df.copy()
		occurences = [{}]
		df["idx"] = df.apply(_idx, axis=1)
		df = df[(df["cat"] == "Node") & df["name"].str.contains("kernel_time")]
		groupkey.append("idx")
		for c in groupkey:
		if c != "idx":
		df[c] = df[c].apply(str)
		gr = df[groupkey + ["dur"]].groupby(groupkey)
		return gr.sum()


		base = preprocess(prof_base)
		opti = preprocess(prof_opt)
		merge = base.merge(
		opti, how="outer", suffixes=("base", "opti"), left_index=True, right_index=True
		)
		merge = merge.reset_index(drop=False)
		merge.to_excel("plot_profiling_merged.xlsx", index=False)
		merge


		#####################################################
		# Aggregation


		def classify(row):
		if numpy.isnan(row["duropti"]):
		return "-"
		if numpy.isnan(row["durbase"]):
		return "+"
		return "="


		keys = {"float": "f"}


		def process_shape(s):
		value = eval(s)
		ns = []
		for v in value:
		if len(v) != 1:
		raise NotImplementedError(f"Unexpected value {v} in {s!r}.")
		k, v = list(v.items())[0]
		n = "-".join([keys[k], "x".join(map(str, v))])
		ns.append(n)
		return ",".join(ns)


		def label(row):
		name = row["args_op_name"]
		inshape = process_shape(row["args_input_type_shape"])
		outshape = process_shape(row["args_output_type_shape"])
		side = row["side"][0]
		prov = row["args_provider"][:3]
		idx = row["idx"]
		return f"[{side}{prov}]{name}({inshape})->{outshape}[{idx}]"


		df = merge.copy()
		df["side"] = df.apply(classify, axis=1)
		df["label"] = df.apply(label, axis=1)
		gr = (
		df[["label", "durbase", "duropti", "idx"]]
		.groupby("label")
		.agg({"durbase": numpy.sum, "duropti": numpy.sum, "idx": max})
		)
		gr

		################################
		# Final plot
		# ++++++++++

		# let's filter out unsignificant operator.
		grmax = gr["durbase"] + gr["duropti"]
		total = grmax.sum()
		grmax /= total
		gr = gr[grmax >= 0.01]


		fig, ax = plt.subplots(1, 2, figsize=(14, min(gr.shape[0], 500)), sharey=True)
		gr[["durbase", "duropti"]].plot.barh(ax=ax[0])
		ax[0].set_title("Side by side duration")
		gr = gr.copy()
		gr["idx"] += 1
		gr[["idx"]].plot.barh(ax=ax[1])
		ax[1].set_title("Side by side count")
		fig.tight_layout()
		fig.savefig("plot_profiling_side_by_side.png")


		########################################
		# On CUDA
		# +++++++


		if "CUDAExecutionProvider" in get_available_providers():
		print("Profiling on CUDA")
		prof_base = ort_profile(
		filename,
		feeds,
		repeat=6,
		disable_optimization=True,
		providers=["CUDAExecutionProvider"],
		)
		prof_opti = ort_profile(
		optimized,
		feeds,
		repeat=6,
		disable_optimization=True,
		providers=["CUDAExecutionProvider"],
		)

		unique_op = set(prof_base["args_op_name"])
		fig, ax = plt.subplots(2, 2, figsize=(10, len(unique_op)), sharex="col")
		plot_profile(prof_base, ax[0, 0], ax[0, 1], title="baseline")
		plot_profile(prof_opt, ax[1, 0], ax[1, 1], title="optimized")
		fig.savefig("plot_profiling_cuda.png")
		else:
		print(f"CUDA not available in {get_available_providers()}")
		fig, ax = None, None

		ax
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,36 @@
		import unittest
		import numpy as np
		from pandas import DataFrame
		from onnx_array_api.npx import absolute, jit_onnx
		from onnx_array_api.ext_test_case import ExtTestCase
		from onnx_array_api.ort.ort_optimizers import ort_optimized_model
		from onnx_array_api.ort.ort_profile import ort_profile


		class TestOrtProfile(ExtTestCase):
		def test_ort_profile(self):
		def l1_loss(x, y):
		return absolute(x - y).sum()

		def l2_loss(x, y):
		return ((x - y) ** 2).sum()

		def myloss(x, y):
		return l1_loss(x[:, 0], y[:, 0]) + l2_loss(x[:, 1], y[:, 1])

		jitted_myloss = jit_onnx(myloss)
		x = np.array([[0.1, 0.2], [0.3, 0.4]], dtype=np.float32)
		y = np.array([[0.11, 0.22], [0.33, 0.44]], dtype=np.float32)
		jitted_myloss(x, y)
		onx = jitted_myloss.get_onnx()
		feeds = {"x0": x, "x1": y}
		self.assertRaise(lambda: ort_optimized_model(onx, "NO"), ValueError)
		optimized = ort_optimized_model(onx)
		prof = ort_profile(optimized, feeds)
		self.assertIsInstance(prof, DataFrame)
		prof = ort_profile(optimized, feeds, as_df=False)
		self.assertIsInstance(prof, list)


		if __name__ == "__main__":
		unittest.main(verbosity=2)
Original file line number	Diff line number	Diff line change
Expand Up		@@ -33,7 +33,11 @@ def ort_optimized_model(
		so = SessionOptions()
		so.graph_optimization_level = glevel
		so.optimized_model_filepath = str(cache)
		InferenceSession(onx if isinstance(onx, str) else onx.SerializeToString(), so)
		InferenceSession(
		onx if isinstance(onx, str) else onx.SerializeToString(),
		so,
		providers=["CPUExecutionProvider"],
		)
		if output is None and not cache.exists():
		raise RuntimeError(f"The optimized model {str(cache)!r} not found.")
		if output is not None:
Expand Down