Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commite2f9930

Browse files
authored
Merge pull request#4 from sdpython/bench
Adds a benchmark to the documentation (TreeEnsemble)
2 parentse2f8c14 +d669884 commite2f9930

File tree

6 files changed

+331
-6
lines changed

6 files changed

+331
-6
lines changed

‎.gitignore‎

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,13 @@
33
*.dylib
44
*.so
55
coverage.html/*
6+
_cache/*
67
.coverage
78
dist/*
89
build/*
910
.eggs/*
1011
*egg-info/*
12+
_doc/examples/_cache/*
1113
_doc/auto_examples/*
1214
_doc/examples/plot_*.png
1315
_doc/_static/require.js

‎_doc/examples/plot_benchmark_rf.py‎

Lines changed: 306 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,306 @@
1+
"""
2+
.. _l-example-benchmark-tree-implementation:
3+
4+
Benchmark of TreeEnsemble implementation
5+
========================================
6+
7+
The following example compares the inference time between
8+
:epkg:`onnxruntime` and :class:`sklearn.ensemble.RandomForestRegressor`,
9+
fow different number of estimators, max depth, and parallelization.
10+
It does it for a fixed number of rows and features.
11+
12+
import and registration of necessary converters
13+
++++++++++++++++++++++++++++++++++++++++++++++++
14+
"""
15+
importpickle
16+
importos
17+
importtime
18+
fromitertoolsimportproduct
19+
20+
importmatplotlib.pyplotasplt
21+
importnumpy
22+
importpandas
23+
fromlightgbmimportLGBMRegressor
24+
fromonnxmltools.convert.lightgbm.operator_converters.LightGbmimportconvert_lightgbm
25+
fromonnxmltools.convert.xgboost.operator_converters.XGBoostimportconvert_xgboost
26+
fromonnxruntimeimportInferenceSession,SessionOptions
27+
frompsutilimportcpu_count
28+
frompyquickhelper.loghelperimportrun_cmd
29+
fromskl2onnximportto_onnx,update_registered_converter
30+
fromskl2onnx.common.shape_calculatorimportcalculate_linear_regressor_output_shapes
31+
fromsklearnimportset_config
32+
fromsklearn.ensembleimportRandomForestRegressor
33+
fromtqdmimporttqdm
34+
fromxgboostimportXGBRegressor
35+
36+
37+
defskl2onnx_convert_lightgbm(scope,operator,container):
38+
options=scope.get_options(operator.raw_operator)
39+
if"split"inoptions:
40+
operator.split=options["split"]
41+
else:
42+
operator.split=None
43+
convert_lightgbm(scope,operator,container)
44+
45+
46+
update_registered_converter(
47+
LGBMRegressor,
48+
"LightGbmLGBMRegressor",
49+
calculate_linear_regressor_output_shapes,
50+
skl2onnx_convert_lightgbm,
51+
options={"split":None},
52+
)
53+
update_registered_converter(
54+
XGBRegressor,
55+
"XGBoostXGBRegressor",
56+
calculate_linear_regressor_output_shapes,
57+
convert_xgboost,
58+
)
59+
60+
# The following instruction reduces the time spent by scikit-learn
61+
# to validate the data.
62+
set_config(assume_finite=True)
63+
64+
##########################################
65+
# Machine details
66+
# +++++++++++++++
67+
68+
69+
print(f"Number of cores:{cpu_count()}")
70+
71+
###############################################
72+
# But this information is not usually enough.
73+
# Let's extract the cache information.
74+
75+
try:
76+
out,err=run_cmd("lscpu")
77+
print(out)
78+
exceptExceptionase:
79+
print(f"lscpu not available:{e}")
80+
81+
###############################################
82+
# Or with the following command.
83+
out,err=run_cmd("cat /proc/cpuinfo")
84+
85+
###############################################
86+
# Fonction to measure inference time
87+
# ++++++++++++++++++++++++++++++++++
88+
89+
90+
defmeasure_inference(fct,X,repeat,max_time=5,quantile=1):
91+
"""
92+
Run *repeat* times the same function on data *X*.
93+
94+
:param fct: fonction to run
95+
:param X: data
96+
:param repeat: number of times to run
97+
:param max_time: maximum time to use to measure the inference
98+
:return: number of runs, sum of the time, average, median
99+
"""
100+
times= []
101+
forninrange(repeat):
102+
perf=time.perf_counter()
103+
fct(X)
104+
delta=time.perf_counter()-perf
105+
times.append(delta)
106+
iflen(times)<3:
107+
continue
108+
ifmax_timeisnotNoneandsum(times)>=max_time:
109+
break
110+
times.sort()
111+
quantile=0if (len(times)-quantile*2)<3elsequantile
112+
ifquantile==0:
113+
tt=times
114+
else:
115+
tt=times[quantile:-quantile]
116+
return (len(times),sum(times),sum(tt)/len(tt),times[len(times)//2])
117+
118+
119+
###############################################
120+
# Benchmark
121+
# +++++++++
122+
#
123+
# The following script benchmarks the inference for the same
124+
# model for a random forest and onnxruntime after it was converted
125+
# into ONNX and for the following configurations.
126+
127+
legend="parallel-batch-4096-block"
128+
129+
small=cpu_count()<12
130+
ifsmall:
131+
N=1000
132+
n_features=10
133+
n_jobs= [1,cpu_count()//2,cpu_count()]
134+
n_ests= [10,20,30]
135+
depth= [4,6,8,10]
136+
Regressor=RandomForestRegressor
137+
else:
138+
N=100000
139+
n_features=50
140+
n_jobs= [cpu_count(),cpu_count()//2,1]
141+
n_ests= [100,200,400]
142+
depth= [6,8,10,12,14]
143+
Regressor=RandomForestRegressor
144+
145+
# avoid duplicates on machine with 1 or 2 cores.
146+
n_jobs=list(sorted(set(n_jobs),reverse=True))
147+
148+
##############################################
149+
# Benchmark parameters
150+
repeat=7# repeat n times the same inference
151+
quantile=1# exclude extreme times
152+
max_time=5# maximum number of seconds to spend on one configuration
153+
154+
##############################################
155+
# Data
156+
157+
158+
X=numpy.random.randn(N,n_features).astype(numpy.float32)
159+
noise= (numpy.random.randn(X.shape[0])/ (n_features//5)).astype(numpy.float32)
160+
y=X.mean(axis=1)+noise
161+
n_train=min(N,N//3)
162+
163+
164+
data= []
165+
couples=list(product(n_jobs,depth,n_ests))
166+
bar=tqdm(couples)
167+
cache_dir="_cache"
168+
ifnotos.path.exists(cache_dir):
169+
os.mkdir(cache_dir)
170+
171+
forn_j,max_depth,n_estimatorsinbar:
172+
ifn_j==1andn_estimators>n_ests[0]:
173+
# skipping
174+
continue
175+
176+
# parallelization
177+
cache_name=os.path.join(
178+
cache_dir,f"rf-J-{n_j}-E-{n_estimators}-D-{max_depth}.pkl"
179+
)
180+
ifos.path.exists(cache_name):
181+
withopen(cache_name,"rb")asf:
182+
rf=pickle.load(f)
183+
else:
184+
bar.set_description(f"J={n_j} E={n_estimators} D={max_depth} train rf")
185+
ifn_j==1andissubclass(Regressor,RandomForestRegressor):
186+
rf=Regressor(max_depth=max_depth,n_estimators=n_estimators,n_jobs=-1)
187+
rf.fit(X[:n_train],y[:n_train])
188+
rf.n_jobs=1
189+
else:
190+
rf=Regressor(max_depth=max_depth,n_estimators=n_estimators,n_jobs=n_j)
191+
rf.fit(X[:n_train],y[:n_train])
192+
withopen(cache_name,"wb")asf:
193+
pickle.dump(rf,f)
194+
195+
bar.set_description(f"J={n_j} E={n_estimators} D={max_depth} ISession")
196+
so=SessionOptions()
197+
so.intra_op_num_threads=n_j
198+
cache_name=os.path.join(
199+
cache_dir,f"rf-J-{n_j}-E-{n_estimators}-D-{max_depth}.onnx"
200+
)
201+
ifos.path.exists(cache_name):
202+
sess=InferenceSession(cache_name,so)
203+
else:
204+
bar.set_description(f"J={n_j} E={n_estimators} D={max_depth} cvt onnx")
205+
onx=to_onnx(rf,X[:1])
206+
withopen(cache_name,"wb")asf:
207+
f.write(onx.SerializeToString())
208+
sess=InferenceSession(cache_name,so)
209+
onx_size=os.stat(cache_name).st_size
210+
211+
# run once to avoid counting the first run
212+
bar.set_description(f"J={n_j} E={n_estimators} D={max_depth} predict1")
213+
rf.predict(X)
214+
sess.run(None, {"X":X})
215+
216+
# fixed data
217+
obs=dict(
218+
n_jobs=n_j,
219+
max_depth=max_depth,
220+
n_estimators=n_estimators,
221+
repeat=repeat,
222+
max_time=max_time,
223+
name=rf.__class__.__name__,
224+
n_rows=X.shape[0],
225+
n_features=X.shape[1],
226+
onnx_size=onx_size,
227+
)
228+
229+
# baseline
230+
bar.set_description(f"J={n_j} E={n_estimators} D={max_depth} predictB")
231+
r,t,mean,med=measure_inference(rf.predict,X,repeat=repeat,max_time=max_time)
232+
o1=obs.copy()
233+
o1.update(dict(avg=mean,med=med,n_runs=r,ttime=t,name="base"))
234+
data.append(o1)
235+
236+
# baseline
237+
bar.set_description(f"J={n_j} E={n_estimators} D={max_depth} predictO")
238+
r,t,mean,med=measure_inference(
239+
lambdax:sess.run(None, {"X":x}),X,repeat=repeat,max_time=max_time
240+
)
241+
o2=obs.copy()
242+
o2.update(dict(avg=mean,med=med,n_runs=r,ttime=t,name="ort_"))
243+
data.append(o2)
244+
245+
246+
###################################################
247+
# Saving data
248+
# +++++++++++
249+
250+
name=os.path.join(cache_dir,"plot_beanchmark_rf")
251+
print(f"Saving data into{name!r}")
252+
253+
df=pandas.DataFrame(data)
254+
df2=df.copy()
255+
df2["legend"]=legend
256+
df2.to_csv(f"{name}-{legend}.csv",index=False)
257+
258+
#######################################################
259+
# Printing the data
260+
print(df)
261+
262+
#####################################################
263+
# Plot
264+
# ++++
265+
266+
n_rows=len(n_jobs)
267+
n_cols=len(n_ests)
268+
269+
270+
fig,axes=plt.subplots(n_rows,n_cols,figsize=(4*n_cols,4*n_rows))
271+
fig.suptitle(f"{rf.__class__.__name__}")
272+
273+
forn_j,n_estimatorsintqdm(product(n_jobs,n_ests)):
274+
i=n_jobs.index(n_j)
275+
j=n_ests.index(n_estimators)
276+
ax=axes[i,j]
277+
278+
subdf=df[(df.n_estimators==n_estimators)& (df.n_jobs==n_j)]
279+
ifsubdf.shape[0]==0:
280+
continue
281+
piv=subdf.pivot(index="max_depth",columns="name",values=["avg","med"])
282+
piv.plot(ax=ax,title=f"jobs={n_j}, trees={n_estimators}")
283+
ax.set_ylabel(f"n_jobs={n_j}",fontsize="small")
284+
ax.set_xlabel("max_depth",fontsize="small")
285+
286+
# ratio
287+
ax2=ax.twinx()
288+
piv1=subdf.pivot(index="max_depth",columns="name",values="avg")
289+
piv1["speedup"]=piv1.base/piv1.ort_
290+
ax2.plot(piv1.index,piv1.speedup,"b--",label="speedup avg")
291+
292+
piv1=subdf.pivot(index="max_depth",columns="name",values="med")
293+
piv1["speedup"]=piv1.base/piv1.ort_
294+
ax2.plot(piv1.index,piv1.speedup,"y--",label="speedup med")
295+
ax2.legend(fontsize="x-small")
296+
297+
# 1
298+
ax2.plot(piv1.index, [1for_inpiv1.index],"k--",label="no speedup")
299+
300+
foriinrange(axes.shape[0]):
301+
forjinrange(axes.shape[1]):
302+
axes[i,j].legend(fontsize="small")
303+
304+
fig.tight_layout()
305+
fig.savefig(f"{name}-{legend}.png")
306+
# plt.show()

‎_doc/tutorial/benchmarks.rst‎

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
.. _l-benchmarks:
2+
3+
==========
4+
Benchmarks
5+
==========
6+
7+
A list of benchmark used to improve to the performance of
8+
ONNX components (onnx, onnxruntime, onnx-array-api, ...).
9+
10+
..toctree::
11+
12+
../auto_examples/plot_benchmark_rf

‎_doc/tutorial/index.rst‎

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,4 +7,4 @@ Tutorial
77
:maxdepth:1
88

99
overview
10-
10+
benchmarks

‎azure-pipelines.yml‎

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,8 @@ jobs:
2727
displayName:'Install Requirements'
2828
-script:pip install -r requirements-dev.txt
2929
displayName:'Install Requirements dev'
30+
-script:pip install onnxmltools --no-deps
31+
displayName:'Install onnxmltools'
3032
-script:|
3133
ruff .
3234
displayName: 'Ruff'
@@ -52,7 +54,7 @@ jobs:
5254
vmImage:'windows-latest'
5355
strategy:
5456
matrix:
55-
Python310-Linux:
57+
Python310-Windows:
5658
python.version:'3.10'
5759
maxParallel:3
5860

@@ -67,6 +69,8 @@ jobs:
6769
displayName:'Install Requirements'
6870
-script:pip install -r requirements-dev.txt
6971
displayName:'Install Requirements dev'
72+
-script:pip install onnxmltools --no-deps
73+
displayName:'Install onnxmltools'
7074
-script:|
7175
python -m pytest -v
7276
displayName: 'Runs Unit Tests'
@@ -107,10 +111,6 @@ jobs:
107111
displayName:'Install omp'
108112
-script:brew install p7zip
109113
displayName:'Install p7zip'
110-
# - script: brew install pandoc
111-
# displayName: 'Install Pandoc'
112-
# - script: brew install graphviz
113-
# displayName: 'Install Graphviz'
114114
-script:python -m pip install --upgrade pip setuptools wheel
115115
displayName:'Install tools'
116116
-script:brew install pybind11
@@ -119,6 +119,8 @@ jobs:
119119
displayName:'Install Requirements'
120120
-script:pip install -r requirements-dev.txt
121121
displayName:'Install Requirements dev'
122+
-script:pip install onnxmltools --no-deps
123+
displayName:'Install onnxmltools'
122124
-script:|
123125
python -m pytest -v -v
124126
displayName: 'Runs Unit Tests'

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp