May 5, 2023 · May 4, 2023
diff --git a/_doc/api/ort.rst b/_doc/api/ort.rst
 Profiling
 +++++++++

 .. autofunction:: onnx_array_api.ort.ort_profile.merge_ort_profile

 .. autofunction:: onnx_array_api.ort.ort_profile.ort_profile

diff --git a/_doc/api/plotting.rst b/_doc/api/plotting.rst

 .. autofunction:: onnx_array_api.plotting.dot_plot.to_dot

 Statistics
 ++++++++++

 .. autofunction:: onnx_array_api.plotting.stat_plot.plot_ort_profile

 Text
 ++++

diff --git a/_doc/examples/plot_profiling.py b/_doc/examples/plot_profiling.py
 from onnxruntime import get_available_providers
 from onnx_array_api.ext_test_case import example_path
 from onnx_array_api.ort.ort_optimizers import ort_optimized_model
 from onnx_array_api.ort.ort_profile import ort_profile
 from onnx_array_api.ort.ort_profile import ort_profile, merge_ort_profile
 from onnx_array_api.plotting.stat_plot import plot_ort_profile


 filename = example_path("data/small.onnx")
 suffix = ""
 filename = example_path(f"data/small{suffix}.onnx")
 optimized = filename + ".optimized.onnx"
 print(f"model={filename!r}")

 if not os.path.exists(optimized):
    ort_optimized_model(filename, output=optimized)
 print(optimized)
 print(f"optimized={optimized!r}")

 #############################
 # .. _l-example-ort-profiling:
 #
 # Profiling
 # +++++++++

    disable_optimization=True,
    providers=["CPUExecutionProvider"],
 )
 prof_base.to_excel("prof_base.xlsx", index=False)
 prof_base.to_excel(f"prof_base{suffix}.xlsx", index=False)
 prof_base

 #######################################
 # And the optimized model.

 prof_opt = ort_profile(
 prof_opti = ort_profile(
    optimized,
    feeds,
    repeat=6,
    disable_optimization=True,
    providers=["CPUExecutionProvider"],
 )
 prof_opt
 prof_opti.to_excel(f"prof_opti{suffix}.xlsx", index=False)
 prof_opti

 #######################################
 # And the graph is:


 def plot_profile(df, ax0, ax1=None, title=None):
    gr_dur = (
        df[["dur", "args_op_name"]].groupby("args_op_name").sum().sort_values("dur")
    )
    gr_dur.plot.barh(ax=ax0)
    if title is not None:
        ax0.set_title(title)
    if ax1 is not None:
        gr_n = (
            df[["dur", "args_op_name"]]
            .groupby("args_op_name")
            .count()
            .sort_values("dur")
        )
        gr_n = gr_n.loc[gr_dur.index, :]
        gr_n.plot.barh(ax=ax1)
        ax1.set_title("n occurences")


 unique_op = set(prof_base["args_op_name"])
 fig, ax = plt.subplots(2, 2, figsize=(10, len(unique_op)), sharex="col")
 plot_profile(prof_base, ax[0, 0], ax[0, 1], title="baseline")
 plot_profile(prof_opt, ax[1, 0], ax[1, 1], title="optimized")

 fig.savefig("plot_profiling.png")
 plot_ort_profile(prof_base, ax[0, 0], ax[0, 1], title="baseline")
 plot_ort_profile(prof_opti, ax[1, 0], ax[1, 1], title="optimized")
 fig.tight_layout()
 fig.savefig(f"plot_profiling{suffix}.png")

 ##################################################
 # Merging profiles
 # process the same image and the input and output size are the
 # same at every iteration.


 def preprocess(df):
    groupkey = [
        "args_op_name",
        "args_output_type_shape",
        "args_input_type_shape",
        "args_provider",
    ]

    def _idx(row):
        """
        There may be multiple node with the same
        input/output types and shapes.
        This function gives every instance a distinct id.
        First unique op with same I/O receives the index 0.
        The counter restart when the session goes to the
        next image.
        """
        if row["cat"] == "Session":
            occurences[0] = {}
            return -1
        assert "idx" not in groupkey
        vals = [row[k] for k in groupkey]
        key = tuple(map(str, vals))
        if key not in occurences[0]:
            occurences[0][key] = 0
        else:
            occurences[0][key] += 1
        return occurences[0][key]

    df = df.copy()
    occurences = [{}]
    df["idx"] = df.apply(_idx, axis=1)
    df = df[(df["cat"] == "Node") & df["name"].str.contains("kernel_time")]
    groupkey.append("idx")
    for c in groupkey:
        if c != "idx":
            df[c] = df[c].apply(str)
    gr = df[groupkey + ["dur"]].groupby(groupkey)
    return gr.sum()


 base = preprocess(prof_base)
 opti = preprocess(prof_opt)
 merge = base.merge(
    opti, how="outer", suffixes=("base", "opti"), left_index=True, right_index=True
 )
 merge = merge.reset_index(drop=False)
 merge.to_excel("plot_profiling_merged.xlsx", index=False)
 merge, gr = merge_ort_profile(prof_base, prof_opti)
 merge.to_excel(f"plot_profiling_merged{suffix}.xlsx", index=False)
 merge


 #####################################################
 # Aggregation


 def classify(row):
    if numpy.isnan(row["duropti"]):
        return "-"
    if numpy.isnan(row["durbase"]):
        return "+"
    return "="
 # More detailed


 keys = {"float": "f"}


 def process_shape(s):
    value = eval(s)
    ns = []
    for v in value:
        if len(v) != 1:
            raise NotImplementedError(f"Unexpected value {v} in {s!r}.")
        k, v = list(v.items())[0]
        n = "-".join([keys[k], "x".join(map(str, v))])
        ns.append(n)
    return ",".join(ns)


 def label(row):
    name = row["args_op_name"]
    inshape = process_shape(row["args_input_type_shape"])
    outshape = process_shape(row["args_output_type_shape"])
    side = row["side"][0]
    prov = row["args_provider"][:3]
    idx = row["idx"]
    return f"[{side}{prov}]{name}({inshape})->{outshape}[{idx}]"


 df = merge.copy()
 df["side"] = df.apply(classify, axis=1)
 df["label"] = df.apply(label, axis=1)
 gr = (
    df[["label", "durbase", "duropti", "idx"]]
    .groupby("label")
    .agg({"durbase": numpy.sum, "duropti": numpy.sum, "idx": max})
 )
 gr.to_excel(f"plot_profiling_merged_details{suffix}.xlsx", index=False)
 gr

 ################################
 gr[["durbase", "duropti"]].plot.barh(ax=ax[0])
 ax[0].set_title("Side by side duration")
 gr = gr.copy()
 gr["idx"] += 1
 gr[["idx"]].plot.barh(ax=ax[1])
 gr[["countbase", "countopti"]].plot.barh(ax=ax[1])
 ax[1].set_title("Side by side count")
 fig.tight_layout()
 fig.savefig("plot_profiling_side_by_side.png")
 fig.savefig(f"plot_profiling_side_by_side{suffix}.png")


 ########################################
        disable_optimization=True,
        providers=["CUDAExecutionProvider"],
    )
    prof_base.to_excel(f"prof_cuda_base{suffix}.xlsx", index=False)

    prof_opti = ort_profile(
        optimized,
        feeds,
        repeat=6,
        disable_optimization=True,
        providers=["CUDAExecutionProvider"],
        providers=["CUDAExecutionProvider", "CPUExecutionProvider"],
    )
    prof_opti.to_excel(f"prof_cuda_opti{suffix}.xlsx", index=False)

    unique_op = set(prof_base["args_op_name"])
    fig, ax = plt.subplots(2, 2, figsize=(10, len(unique_op)), sharex="col")
    plot_profile(prof_base, ax[0, 0], ax[0, 1], title="baseline")
    plot_profile(prof_opt, ax[1, 0], ax[1, 1], title="optimized")
    fig.savefig("plot_profiling_cuda.png")
    plot_ort_profile(prof_base, ax[0, 0], ax[0, 1], title="baseline")
    plot_ort_profile(prof_opti, ax[1, 0], ax[1, 1], title="optimized")
    fig.tight_layout()
    fig.savefig(f"plot_profiling_cuda{suffix}.png")

    merge, gr = merge_ort_profile(prof_base, prof_opti)
    merge.to_excel(f"plot_profiling_merged{suffix}.xlsx", index=False)
    gr.to_excel(f"plot_profiling_merged_details{suffix}.xlsx", index=False)

    grmax = gr["durbase"] + gr["duropti"]
    total = grmax.sum()
    grmax /= total
    gr = gr[grmax >= 0.01]

    fig, ax = plt.subplots(1, 2, figsize=(14, min(gr.shape[0], 500)), sharey=True)
    gr[["durbase", "duropti"]].plot.barh(ax=ax[0])
    ax[0].set_title("Side by side duration")
    gr = gr.copy()
    gr[["countbase", "countopti"]].plot.barh(ax=ax[1])
    ax[1].set_title("Side by side count")
    fig.tight_layout()
    fig.savefig(f"plot_profiling_side_by_side_cuda{suffix}.png")

 else:
    print(f"CUDA not available in {get_available_providers()}")
    print(f"CUDA not available in {get_available_providers()}.")
    fig, ax = None, None

 ax
diff --git a/_unittests/ut_ort/data/prof_base.xlsx b/_unittests/ut_ort/data/prof_base.xlsx
diff --git a/_unittests/ut_ort/data/prof_opti.xlsx b/_unittests/ut_ort/data/prof_opti.xlsx
diff --git a/_unittests/ut_ort/test_ort_profile.py b/_unittests/ut_ort/test_ort_profile.py
 import unittest
 import os
 import numpy as np
 from pandas import DataFrame
 from pandas import DataFrame, read_excel
 from onnx_array_api.npx import absolute, jit_onnx
 from onnx_array_api.ext_test_case import ExtTestCase
 from onnx_array_api.ort.ort_optimizers import ort_optimized_model
 from onnx_array_api.ort.ort_profile import ort_profile
 from onnx_array_api.ort.ort_profile import ort_profile, merge_ort_profile


 class TestOrtProfile(ExtTestCase):
        self.assertRaise(lambda: ort_optimized_model(onx, "NO"), ValueError)
        optimized = ort_optimized_model(onx)
        prof = ort_profile(optimized, feeds)
        prof.to_csv("prof.csv", index=False)
        self.assertIsInstance(prof, DataFrame)
        prof = ort_profile(optimized, feeds, as_df=False)
        self.assertIsInstance(prof, list)

    def test_merge_ort_profile(self):
        data = os.path.join(os.path.dirname(__file__), "data")
        df1 = read_excel(os.path.join(data, "prof_base.xlsx"))
        df2 = read_excel(os.path.join(data, "prof_opti.xlsx"))
        merged, gr = merge_ort_profile(df1, df2)
        self.assertEqual(merged.shape, (23, 9))
        self.assertEqual(
            list(merged.columns),
            [
                "args_op_name",
                "args_output_type_shape",
                "args_input_type_shape",
                "args_provider",
                "idx",
                "durbase",
                "countbase",
                "duropti",
                "countopti",
            ],
        )
        self.assertEqual(gr.shape, (19, 4))
        self.assertEqual(
            list(gr.columns), ["durbase", "duropti", "countbase", "countopti"]
        )


 if __name__ == "__main__":
    unittest.main(verbosity=2)
Original file line number	Diff line number	Diff line change
Expand Up		@@ -23,5 +23,7 @@ OrtTensor
		Profiling
		+++++++++

		.. autofunction:: onnx_array_api.ort.ort_profile.merge_ort_profile

		.. autofunction:: onnx_array_api.ort.ort_profile.ort_profile
Original file line number	Diff line number	Diff line change
Expand Up		@@ -8,6 +8,11 @@ Dot

		.. autofunction:: onnx_array_api.plotting.dot_plot.to_dot

		Statistics
		++++++++++

		.. autofunction:: onnx_array_api.plotting.stat_plot.plot_ort_profile

		Text
		++++

Expand Down
Original file line number	Diff line number	Diff line change
Expand Up		@@ -21,17 +21,22 @@
		from onnxruntime import get_available_providers
		from onnx_array_api.ext_test_case import example_path
		from onnx_array_api.ort.ort_optimizers import ort_optimized_model
		from onnx_array_api.ort.ort_profile import ort_profile
		from onnx_array_api.ort.ort_profile import ort_profile, merge_ort_profile
		from onnx_array_api.plotting.stat_plot import plot_ort_profile


		filename = example_path("data/small.onnx")
		suffix = ""
		filename = example_path(f"data/small{suffix}.onnx")
		optimized = filename + ".optimized.onnx"
		print(f"model={filename!r}")

		if not os.path.exists(optimized):
		ort_optimized_model(filename, output=optimized)
		print(optimized)
		print(f"optimized={optimized!r}")

		#############################
		# .. _l-example-ort-profiling:
		#
		# Profiling
		# +++++++++

Expand All		@@ -43,50 +48,31 @@
		disable_optimization=True,
		providers=["CPUExecutionProvider"],
		)
		prof_base.to_excel("prof_base.xlsx", index=False)
		prof_base.to_excel(f"prof_base{suffix}.xlsx", index=False)
		prof_base

		#######################################
		# And the optimized model.

		prof_opt = ort_profile(
		prof_opti = ort_profile(
		optimized,
		feeds,
		repeat=6,
		disable_optimization=True,
		providers=["CPUExecutionProvider"],
		)
		prof_opt
		prof_opti.to_excel(f"prof_opti{suffix}.xlsx", index=False)
		prof_opti

		#######################################
		# And the graph is:


		def plot_profile(df, ax0, ax1=None, title=None):
		gr_dur = (
		df[["dur", "args_op_name"]].groupby("args_op_name").sum().sort_values("dur")
		)
		gr_dur.plot.barh(ax=ax0)
		if title is not None:
		ax0.set_title(title)
		if ax1 is not None:
		gr_n = (
		df[["dur", "args_op_name"]]
		.groupby("args_op_name")
		.count()
		.sort_values("dur")
		)
		gr_n = gr_n.loc[gr_dur.index, :]
		gr_n.plot.barh(ax=ax1)
		ax1.set_title("n occurences")


		unique_op = set(prof_base["args_op_name"])
		fig, ax = plt.subplots(2, 2, figsize=(10, len(unique_op)), sharex="col")
		plot_profile(prof_base, ax[0, 0], ax[0, 1], title="baseline")
		plot_profile(prof_opt, ax[1, 0], ax[1, 1], title="optimized")

		fig.savefig("plot_profiling.png")
		plot_ort_profile(prof_base, ax[0, 0], ax[0, 1], title="baseline")
		plot_ort_profile(prof_opti, ax[1, 0], ax[1, 1], title="optimized")
		fig.tight_layout()
		fig.savefig(f"plot_profiling{suffix}.png")

		##################################################
		# Merging profiles
Expand All		@@ -96,103 +82,14 @@ def plot_profile(df, ax0, ax1=None, title=None):
		# process the same image and the input and output size are the
		# same at every iteration.


		def preprocess(df):
		groupkey = [
		"args_op_name",
		"args_output_type_shape",
		"args_input_type_shape",
		"args_provider",
		]

		def _idx(row):
		"""
		There may be multiple node with the same
		input/output types and shapes.
		This function gives every instance a distinct id.
		First unique op with same I/O receives the index 0.
		The counter restart when the session goes to the
		next image.
		"""
		if row["cat"] == "Session":
		occurences[0] = {}
		return -1
		assert "idx" not in groupkey
		vals = [row[k] for k in groupkey]
		key = tuple(map(str, vals))
		if key not in occurences[0]:
		occurences[0][key] = 0
		else:
		occurences[0][key] += 1
		return occurences[0][key]

		df = df.copy()
		occurences = [{}]
		df["idx"] = df.apply(_idx, axis=1)
		df = df[(df["cat"] == "Node") & df["name"].str.contains("kernel_time")]
		groupkey.append("idx")
		for c in groupkey:
		if c != "idx":
		df[c] = df[c].apply(str)
		gr = df[groupkey + ["dur"]].groupby(groupkey)
		return gr.sum()


		base = preprocess(prof_base)
		opti = preprocess(prof_opt)
		merge = base.merge(
		opti, how="outer", suffixes=("base", "opti"), left_index=True, right_index=True
		)
		merge = merge.reset_index(drop=False)
		merge.to_excel("plot_profiling_merged.xlsx", index=False)
		merge, gr = merge_ort_profile(prof_base, prof_opti)
		merge.to_excel(f"plot_profiling_merged{suffix}.xlsx", index=False)
		merge


		#####################################################
		# Aggregation


		def classify(row):
		if numpy.isnan(row["duropti"]):
		return "-"
		if numpy.isnan(row["durbase"]):
		return "+"
		return "="
		# More detailed


		keys = {"float": "f"}


		def process_shape(s):
		value = eval(s)
		ns = []
		for v in value:
		if len(v) != 1:
		raise NotImplementedError(f"Unexpected value {v} in {s!r}.")
		k, v = list(v.items())[0]
		n = "-".join([keys[k], "x".join(map(str, v))])
		ns.append(n)
		return ",".join(ns)


		def label(row):
		name = row["args_op_name"]
		inshape = process_shape(row["args_input_type_shape"])
		outshape = process_shape(row["args_output_type_shape"])
		side = row["side"][0]
		prov = row["args_provider"][:3]
		idx = row["idx"]
		return f"[{side}{prov}]{name}({inshape})->{outshape}[{idx}]"


		df = merge.copy()
		df["side"] = df.apply(classify, axis=1)
		df["label"] = df.apply(label, axis=1)
		gr = (
		df[["label", "durbase", "duropti", "idx"]]
		.groupby("label")
		.agg({"durbase": numpy.sum, "duropti": numpy.sum, "idx": max})
		)
		gr.to_excel(f"plot_profiling_merged_details{suffix}.xlsx", index=False)
		gr

		################################
Expand All		@@ -210,11 +107,10 @@ def label(row):
		gr[["durbase", "duropti"]].plot.barh(ax=ax[0])
		ax[0].set_title("Side by side duration")
		gr = gr.copy()
		gr["idx"] += 1
		gr[["idx"]].plot.barh(ax=ax[1])
		gr[["countbase", "countopti"]].plot.barh(ax=ax[1])
		ax[1].set_title("Side by side count")
		fig.tight_layout()
		fig.savefig("plot_profiling_side_by_side.png")
		fig.savefig(f"plot_profiling_side_by_side{suffix}.png")


		########################################
Expand All		@@ -231,21 +127,44 @@ def label(row):
		disable_optimization=True,
		providers=["CUDAExecutionProvider"],
		)
		prof_base.to_excel(f"prof_cuda_base{suffix}.xlsx", index=False)

		prof_opti = ort_profile(
		optimized,
		feeds,
		repeat=6,
		disable_optimization=True,
		providers=["CUDAExecutionProvider"],
		providers=["CUDAExecutionProvider", "CPUExecutionProvider"],
		)
		prof_opti.to_excel(f"prof_cuda_opti{suffix}.xlsx", index=False)

		unique_op = set(prof_base["args_op_name"])
		fig, ax = plt.subplots(2, 2, figsize=(10, len(unique_op)), sharex="col")
		plot_profile(prof_base, ax[0, 0], ax[0, 1], title="baseline")
		plot_profile(prof_opt, ax[1, 0], ax[1, 1], title="optimized")
		fig.savefig("plot_profiling_cuda.png")
		plot_ort_profile(prof_base, ax[0, 0], ax[0, 1], title="baseline")
		plot_ort_profile(prof_opti, ax[1, 0], ax[1, 1], title="optimized")
		fig.tight_layout()
		fig.savefig(f"plot_profiling_cuda{suffix}.png")

		merge, gr = merge_ort_profile(prof_base, prof_opti)
		merge.to_excel(f"plot_profiling_merged{suffix}.xlsx", index=False)
		gr.to_excel(f"plot_profiling_merged_details{suffix}.xlsx", index=False)

		grmax = gr["durbase"] + gr["duropti"]
		total = grmax.sum()
		grmax /= total
		gr = gr[grmax >= 0.01]

		fig, ax = plt.subplots(1, 2, figsize=(14, min(gr.shape[0], 500)), sharey=True)
		gr[["durbase", "duropti"]].plot.barh(ax=ax[0])
		ax[0].set_title("Side by side duration")
		gr = gr.copy()
		gr[["countbase", "countopti"]].plot.barh(ax=ax[1])
		ax[1].set_title("Side by side count")
		fig.tight_layout()
		fig.savefig(f"plot_profiling_side_by_side_cuda{suffix}.png")

		else:
		print(f"CUDA not available in {get_available_providers()}")
		print(f"CUDA not available in {get_available_providers()}.")
		fig, ax = None, None

		ax
Original file line number	Diff line number	Diff line change
		@@ -1,10 +1,11 @@
		import unittest
		import os
		import numpy as np
		from pandas import DataFrame
		from pandas import DataFrame, read_excel
		from onnx_array_api.npx import absolute, jit_onnx
		from onnx_array_api.ext_test_case import ExtTestCase
		from onnx_array_api.ort.ort_optimizers import ort_optimized_model
		from onnx_array_api.ort.ort_profile import ort_profile
		from onnx_array_api.ort.ort_profile import ort_profile, merge_ort_profile


		class TestOrtProfile(ExtTestCase):
Expand All		@@ -27,10 +28,36 @@ def myloss(x, y):
		self.assertRaise(lambda: ort_optimized_model(onx, "NO"), ValueError)
		optimized = ort_optimized_model(onx)
		prof = ort_profile(optimized, feeds)
		prof.to_csv("prof.csv", index=False)
		self.assertIsInstance(prof, DataFrame)
		prof = ort_profile(optimized, feeds, as_df=False)
		self.assertIsInstance(prof, list)

		def test_merge_ort_profile(self):
		data = os.path.join(os.path.dirname(__file__), "data")
		df1 = read_excel(os.path.join(data, "prof_base.xlsx"))
		df2 = read_excel(os.path.join(data, "prof_opti.xlsx"))
		merged, gr = merge_ort_profile(df1, df2)
		self.assertEqual(merged.shape, (23, 9))
		self.assertEqual(
		list(merged.columns),
		[
		"args_op_name",
		"args_output_type_shape",
		"args_input_type_shape",
		"args_provider",
		"idx",
		"durbase",
		"countbase",
		"duropti",
		"countopti",
		],
		)
		self.assertEqual(gr.shape, (19, 4))
		self.assertEqual(
		list(gr.columns), ["durbase", "duropti", "countbase", "countopti"]
		)


		if __name__ == "__main__":
		unittest.main(verbosity=2)