|
14 | 14 |
|
15 | 15 | This example illustrates both methods on an artificial dataset, which
|
16 | 16 | consists of a sinusoidal target function and strong noise added to every fifth
|
17 |
| -datapoint. The first figure compares the learned model of KRR and SVR when both |
18 |
| -complexity/regularization and bandwidth of the RBF kernel are optimized using |
19 |
| -grid-search. The learned functions are very similar; however, fitting KRR is |
20 |
| -approx. seven times faster than fitting SVR (both with grid-search). However, |
21 |
| -prediction of 100000 target values is more than tree times faster with SVR |
22 |
| -since it has learned a sparse model using only approx. 1/3 of the 100 training |
23 |
| -datapoints as support vectors. |
24 |
| -
|
25 |
| -The next figure compares the time for fitting and prediction of KRR and SVR for |
26 |
| -different sizes of the training set. Fitting KRR is faster than SVR for medium- |
27 |
| -sized training sets (less than 1000 samples); however, for larger training sets |
28 |
| -SVR scales better. With regard to prediction time, SVR is faster than |
29 |
| -KRR for all sizes of the training set because of the learned sparse |
30 |
| -solution. Note that the degree of sparsity and thus the prediction time depends |
31 |
| -on the parameters epsilon and C of the SVR. |
| 17 | +datapoint. |
32 | 18 |
|
33 | 19 | """
|
34 |
| - |
| 20 | +# %% |
35 | 21 | # Authors: Jan Hendrik Metzen <jhm@informatik.uni-bremen.de>
|
36 | 22 | # License: BSD 3 clause
|
37 | 23 |
|
38 |
| -importtime |
39 |
| - |
| 24 | +# %% |
| 25 | +# Generate sample data |
| 26 | +# -------------------- |
40 | 27 | importnumpyasnp
|
41 | 28 |
|
42 |
| -fromsklearn.svmimportSVR |
43 |
| -fromsklearn.model_selectionimportGridSearchCV |
44 |
| -fromsklearn.model_selectionimportlearning_curve |
45 |
| -fromsklearn.kernel_ridgeimportKernelRidge |
46 |
| -importmatplotlib.pyplotasplt |
47 |
| - |
48 | 29 | rng=np.random.RandomState(42)
|
49 | 30 |
|
50 |
| -# ############################################################################# |
51 |
| -# Generate sample data |
52 | 31 | X=5*rng.rand(10000,1)
|
53 | 32 | y=np.sin(X).ravel()
|
54 | 33 |
|
|
57 | 36 |
|
58 | 37 | X_plot=np.linspace(0,5,100000)[:,None]
|
59 | 38 |
|
60 |
| -# ############################################################################# |
61 |
| -# Fit regression model |
| 39 | +# %% |
| 40 | +# Construct the kernel-based regression models |
| 41 | +# -------------------------------------------- |
| 42 | + |
| 43 | +fromsklearn.model_selectionimportGridSearchCV |
| 44 | +fromsklearn.svmimportSVR |
| 45 | +fromsklearn.kernel_ridgeimportKernelRidge |
| 46 | + |
62 | 47 | train_size=100
|
| 48 | + |
63 | 49 | svr=GridSearchCV(
|
64 | 50 | SVR(kernel="rbf",gamma=0.1),
|
65 | 51 | param_grid={"C": [1e0,1e1,1e2,1e3],"gamma":np.logspace(-2,2,5)},
|
|
70 | 56 | param_grid={"alpha": [1e0,0.1,1e-2,1e-3],"gamma":np.logspace(-2,2,5)},
|
71 | 57 | )
|
72 | 58 |
|
| 59 | +# %% |
| 60 | +# Compare times of SVR and Kernel Ridge Regression |
| 61 | +# ------------------------------------------------ |
| 62 | + |
| 63 | +importtime |
| 64 | + |
73 | 65 | t0=time.time()
|
74 | 66 | svr.fit(X[:train_size],y[:train_size])
|
75 | 67 | svr_fit=time.time()-t0
|
| 68 | +print(f"Best SVR with params:{svr.best_params_} and R2 score:{svr.best_score_:.3f}") |
76 | 69 | print("SVR complexity and bandwidth selected and model fitted in %.3f s"%svr_fit)
|
77 | 70 |
|
78 | 71 | t0=time.time()
|
79 | 72 | kr.fit(X[:train_size],y[:train_size])
|
80 | 73 | kr_fit=time.time()-t0
|
| 74 | +print(f"Best KRR with params:{kr.best_params_} and R2 score:{kr.best_score_:.3f}") |
81 | 75 | print("KRR complexity and bandwidth selected and model fitted in %.3f s"%kr_fit)
|
82 | 76 |
|
83 | 77 | sv_ratio=svr.best_estimator_.support_.shape[0]/train_size
|
|
93 | 87 | kr_predict=time.time()-t0
|
94 | 88 | print("KRR prediction for %d inputs in %.3f s"% (X_plot.shape[0],kr_predict))
|
95 | 89 |
|
96 |
| - |
97 |
| -# ############################################################################# |
| 90 | +# %% |
98 | 91 | # Look at the results
|
| 92 | +# ------------------- |
| 93 | + |
| 94 | +importmatplotlib.pyplotasplt |
| 95 | + |
99 | 96 | sv_ind=svr.best_estimator_.support_
|
100 | 97 | plt.scatter(
|
101 | 98 | X[sv_ind],
|
|
119 | 116 | plt.xlabel("data")
|
120 | 117 | plt.ylabel("target")
|
121 | 118 | plt.title("SVR versus Kernel Ridge")
|
122 |
| -plt.legend() |
| 119 | +_=plt.legend() |
| 120 | + |
| 121 | +# %% |
| 122 | +# The previous figure compares the learned model of KRR and SVR when both |
| 123 | +# complexity/regularization and bandwidth of the RBF kernel are optimized using |
| 124 | +# grid-search. The learned functions are very similar; however, fitting KRR is |
| 125 | +# approximatively 3-4 times faster than fitting SVR (both with grid-search). |
| 126 | +# |
| 127 | +# Prediction of 100000 target values could be in theory approximately three |
| 128 | +# times faster with SVR since it has learned a sparse model using only |
| 129 | +# approximately 1/3 of the training datapoints as support vectors. However, in |
| 130 | +# practice, this is not necessarily the case because of implementation details |
| 131 | +# in the way the kernel function is computed for each model that can make the |
| 132 | +# KRR model as fast or even faster despite computing more arithmetic |
| 133 | +# operations. |
| 134 | + |
| 135 | +# %% |
| 136 | +# Visualize training and prediction times |
| 137 | +# --------------------------------------- |
123 | 138 |
|
124 |
| -# Visualize training and prediction time |
125 | 139 | plt.figure()
|
126 | 140 |
|
127 |
| -# Generate sample data |
128 |
| -X=5*rng.rand(10000,1) |
129 |
| -y=np.sin(X).ravel() |
130 |
| -y[::5]+=3* (0.5-rng.rand(X.shape[0]//5)) |
131 | 141 | sizes=np.logspace(1,3.8,7).astype(int)
|
132 | 142 | forname,estimatorin {
|
133 | 143 | "KRR":KernelRidge(kernel="rbf",alpha=0.01,gamma=10),
|
|
164 | 174 | plt.xlabel("Train size")
|
165 | 175 | plt.ylabel("Time (seconds)")
|
166 | 176 | plt.title("Execution Time")
|
167 |
| -plt.legend(loc="best") |
| 177 | +_=plt.legend(loc="best") |
| 178 | + |
| 179 | +# %% |
| 180 | +# This figure compares the time for fitting and prediction of KRR and SVR for |
| 181 | +# different sizes of the training set. Fitting KRR is faster than SVR for |
| 182 | +# medium-sized training sets (less than a few thousand samples); however, for |
| 183 | +# larger training sets SVR scales better. With regard to prediction time, SVR |
| 184 | +# should be faster than KRR for all sizes of the training set because of the |
| 185 | +# learned sparse solution, however this is not necessarily the case in practice |
| 186 | +# because of implementation details. Note that the degree of sparsity and thus |
| 187 | +# the prediction time depends on the parameters epsilon and C of the SVR. |
| 188 | + |
| 189 | +# %% |
| 190 | +# Visualize the learning curves |
| 191 | +# ----------------------------- |
| 192 | + |
| 193 | +fromsklearn.model_selectionimportlearning_curve |
168 | 194 |
|
169 |
| -# Visualize learning curves |
170 | 195 | plt.figure()
|
171 | 196 |
|
172 | 197 | svr=SVR(kernel="rbf",C=1e1,gamma=0.1)
|
|
188 | 213 | cv=10,
|
189 | 214 | )
|
190 | 215 |
|
191 |
| -plt.plot(train_sizes,-test_scores_svr.mean(1),"o-",color="r",label="SVR") |
192 |
| -plt.plot(train_sizes,-test_scores_kr.mean(1),"o-",color="g",label="KRR") |
| 216 | +plt.plot(train_sizes,-test_scores_kr.mean(1),"o--",color="g",label="KRR") |
| 217 | +plt.plot(train_sizes,-test_scores_svr.mean(1),"o--",color="r",label="SVR") |
193 | 218 | plt.xlabel("Train size")
|
194 | 219 | plt.ylabel("Mean Squared Error")
|
195 | 220 | plt.title("Learning curves")
|
|