|
87 | 87 | "R^2 Score: 0.46 (+/- 0.29) [SVM]\n",
|
88 | 88 | "R^2 Score: 0.43 (+/- 0.14) [Lasso]\n",
|
89 | 89 | "R^2 Score: 0.53 (+/- 0.28) [Random Forest]\n",
|
90 |
| - "R^2 Score: 0.58 (+/- 0.23) [StackingCVRegressor]\n" |
| 90 | + "R^2 Score: 0.57 (+/- 0.24) [StackingCVRegressor]\n" |
91 | 91 | ]
|
92 | 92 | }
|
93 | 93 | ],
|
|
109 | 109 | "rf = RandomForestRegressor(n_estimators=5, \n",
|
110 | 110 | " random_state=RANDOM_SEED)\n",
|
111 | 111 | "\n",
|
112 |
| - "# The StackingCVRegressor uses scikit-learn's check_cv\n", |
113 |
| - "# internally, which doesn't support a random seed. Thus\n", |
114 |
| - "# NumPy's random seed need to be specified explicitely for\n", |
115 |
| - "# deterministic behavior\n", |
116 |
| - "np.random.seed(RANDOM_SEED)\n", |
| 112 | + "# Starting from v0.16.0, StackingCVRegressor supports\n", |
| 113 | + "# `random_state` to get deterministic result.\n", |
117 | 114 | "stack = StackingCVRegressor(regressors=(svr, lasso, rf),\n",
|
118 |
| - " meta_regressor=lasso)\n", |
| 115 | + " meta_regressor=lasso,\n", |
| 116 | + " random_state=RANDOM_SEED)\n", |
119 | 117 | "\n",
|
120 | 118 | "print('5-fold cross validation scores:\\n')\n",
|
121 | 119 | "\n",
|
|
141 | 139 | "Neg. MSE Score: -33.34 (+/- 22.36) [SVM]\n",
|
142 | 140 | "Neg. MSE Score: -35.53 (+/- 16.99) [Lasso]\n",
|
143 | 141 | "Neg. MSE Score: -27.25 (+/- 16.76) [Random Forest]\n",
|
144 |
| - "Neg. MSE Score: -25.56 (+/- 18.22) [StackingCVRegressor]\n" |
| 142 | + "Neg. MSE Score: -25.82 (+/- 18.10) [StackingCVRegressor]\n" |
145 | 143 | ]
|
146 | 144 | }
|
147 | 145 | ],
|
148 | 146 | "source": [
|
149 |
| - "# The StackingCVRegressor uses scikit-learn's check_cv\n", |
150 |
| - "# internally, which doesn't support a random seed. Thus\n", |
151 |
| - "# NumPy's random seed need to be specified explicitely for\n", |
152 |
| - "# deterministic behavior\n", |
153 |
| - "np.random.seed(RANDOM_SEED)\n", |
154 | 147 | "stack = StackingCVRegressor(regressors=(svr, lasso, rf),\n",
|
155 | 148 | " meta_regressor=lasso)\n",
|
156 | 149 | "\n",
|
|
186 | 179 | "metadata": {},
|
187 | 180 | "outputs": [
|
188 | 181 | {
|
189 |
| - "name": "stderr", |
| 182 | + "name": "stdout", |
190 | 183 | "output_type": "stream",
|
191 | 184 | "text": [
|
192 |
| - "/Users/guq/miniconda3/envs/python3/lib/python3.7/site-packages/sklearn/model_selection/_search.py:841: DeprecationWarning: The default of the `iid` parameter will change from True to False in version 0.22 and will be removed in 0.24. This will change numeric results when test-set sizes are unequal.\n", |
193 |
| - " DeprecationWarning)\n" |
| 185 | + "Best: 0.679576 using {'lasso__alpha': 1.2, 'meta_regressor__n_estimators': 10, 'ridge__alpha': 0.4}\n" |
194 | 186 | ]
|
195 | 187 | },
|
196 | 188 | {
|
197 |
| - "name": "stdout", |
| 189 | + "name": "stderr", |
198 | 190 | "output_type": "stream",
|
199 | 191 | "text": [
|
200 |
| - "Best: 0.674237 using {'lasso__alpha': 1.6, 'meta_regressor__n_estimators': 100, 'ridge__alpha': 0.2}\n" |
| 192 | + "/Users/guq/miniconda3/envs/python3/lib/python3.7/site-packages/sklearn/model_selection/_search.py:841: DeprecationWarning: The default of the `iid` parameter will change from True to False in version 0.22 and will be removed in 0.24. This will change numeric results when test-set sizes are unequal.\n", |
| 193 | + " DeprecationWarning)\n" |
201 | 194 | ]
|
202 | 195 | }
|
203 | 196 | ],
|
|
215 | 208 | "lasso = Lasso(random_state=RANDOM_SEED)\n",
|
216 | 209 | "rf = RandomForestRegressor(random_state=RANDOM_SEED)\n",
|
217 | 210 | "\n",
|
218 |
| - "# The StackingCVRegressor uses scikit-learn's check_cv\n", |
219 |
| - "# internally, which doesn't support a random seed. Thus\n", |
220 |
| - "# NumPy's random seed need to be specified explicitely for\n", |
221 |
| - "# deterministic behavior\n", |
222 |
| - "np.random.seed(RANDOM_SEED)\n", |
223 | 211 | "stack = StackingCVRegressor(regressors=(lasso, ridge),\n",
|
224 | 212 | " meta_regressor=rf, \n",
|
| 213 | + " random_state=RANDOM_SEED,\n", |
225 | 214 | " use_features_in_secondary=True)\n",
|
226 | 215 | "\n",
|
227 | 216 | "params = {'lasso__alpha': [0.1, 1.0, 10.0],\n",
|
|
252 | 241 | "name": "stdout",
|
253 | 242 | "output_type": "stream",
|
254 | 243 | "text": [
|
255 |
| - "0.616 +/- 0.09 {'lasso__alpha': 0.2, 'meta_regressor__n_estimators': 10, 'ridge__alpha': 0.05}\n", |
| 244 | + "0.637 +/- 0.09 {'lasso__alpha': 0.2, 'meta_regressor__n_estimators': 10, 'ridge__alpha': 0.05}\n", |
256 | 245 | "0.656 +/- 0.08 {'lasso__alpha': 0.2, 'meta_regressor__n_estimators': 10, 'ridge__alpha': 0.1}\n",
|
257 |
| - "0.653 +/- 0.09 {'lasso__alpha': 0.2, 'meta_regressor__n_estimators': 10, 'ridge__alpha': 0.15}\n", |
258 |
| - "0.669 +/- 0.09 {'lasso__alpha': 0.2, 'meta_regressor__n_estimators': 10, 'ridge__alpha': 0.2}\n", |
259 |
| - "0.632 +/- 0.08 {'lasso__alpha': 0.2, 'meta_regressor__n_estimators': 10, 'ridge__alpha': 0.25}\n", |
260 |
| - "0.664 +/- 0.08 {'lasso__alpha': 0.2, 'meta_regressor__n_estimators': 10, 'ridge__alpha': 0.3}\n", |
261 |
| - "0.632 +/- 0.08 {'lasso__alpha': 0.2, 'meta_regressor__n_estimators': 10, 'ridge__alpha': 0.35}\n", |
262 |
| - "0.642 +/- 0.08 {'lasso__alpha': 0.2, 'meta_regressor__n_estimators': 10, 'ridge__alpha': 0.4}\n", |
263 |
| - "0.653 +/- 0.09 {'lasso__alpha': 0.2, 'meta_regressor__n_estimators': 10, 'ridge__alpha': 0.45}\n", |
264 |
| - "0.657 +/- 0.09 {'lasso__alpha': 0.2, 'meta_regressor__n_estimators': 100, 'ridge__alpha': 0.05}\n", |
265 |
| - "0.650 +/- 0.09 {'lasso__alpha': 0.2, 'meta_regressor__n_estimators': 100, 'ridge__alpha': 0.1}\n", |
266 |
| - "0.648 +/- 0.09 {'lasso__alpha': 0.2, 'meta_regressor__n_estimators': 100, 'ridge__alpha': 0.15}\n", |
| 246 | + "0.635 +/- 0.09 {'lasso__alpha': 0.2, 'meta_regressor__n_estimators': 10, 'ridge__alpha': 0.15}\n", |
| 247 | + "0.647 +/- 0.08 {'lasso__alpha': 0.2, 'meta_regressor__n_estimators': 10, 'ridge__alpha': 0.2}\n", |
| 248 | + "0.630 +/- 0.09 {'lasso__alpha': 0.2, 'meta_regressor__n_estimators': 10, 'ridge__alpha': 0.25}\n", |
| 249 | + "0.628 +/- 0.09 {'lasso__alpha': 0.2, 'meta_regressor__n_estimators': 10, 'ridge__alpha': 0.3}\n", |
| 250 | + "0.639 +/- 0.09 {'lasso__alpha': 0.2, 'meta_regressor__n_estimators': 10, 'ridge__alpha': 0.35}\n", |
| 251 | + "0.641 +/- 0.09 {'lasso__alpha': 0.2, 'meta_regressor__n_estimators': 10, 'ridge__alpha': 0.4}\n", |
| 252 | + "0.653 +/- 0.08 {'lasso__alpha': 0.2, 'meta_regressor__n_estimators': 10, 'ridge__alpha': 0.45}\n", |
| 253 | + "0.644 +/- 0.09 {'lasso__alpha': 0.2, 'meta_regressor__n_estimators': 100, 'ridge__alpha': 0.05}\n", |
| 254 | + "0.642 +/- 0.09 {'lasso__alpha': 0.2, 'meta_regressor__n_estimators': 100, 'ridge__alpha': 0.1}\n", |
| 255 | + "0.646 +/- 0.09 {'lasso__alpha': 0.2, 'meta_regressor__n_estimators': 100, 'ridge__alpha': 0.15}\n", |
267 | 256 | "...\n",
|
268 |
| - "Best parameters: {'lasso__alpha': 1.6, 'meta_regressor__n_estimators': 100, 'ridge__alpha': 0.2}\n", |
269 |
| - "Accuracy: 0.67\n" |
| 257 | + "Best parameters: {'lasso__alpha': 1.2, 'meta_regressor__n_estimators': 10, 'ridge__alpha': 0.4}\n", |
| 258 | + "Accuracy: 0.68\n" |
270 | 259 | ]
|
271 | 260 | }
|
272 | 261 | ],
|
|
318 | 307 | "text": [
|
319 | 308 | "## StackingCVRegressor\n",
|
320 | 309 | "\n",
|
321 |
| - "*StackingCVRegressor(regressors, meta_regressor, cv=5, shuffle=True, use_features_in_secondary=False, store_train_meta_features=False, refit=True)*\n", |
| 310 | + "*StackingCVRegressor(regressors, meta_regressor, cv=5, shuffle=True, random_state=None, verbose=0, refit=True, use_features_in_secondary=False, store_train_meta_features=False, n_jobs=None, pre_dispatch='2*n_jobs')*\n", |
322 | 311 | "\n",
|
323 | 312 | "A 'Stacking Cross-Validation' regressor for scikit-learn estimators.\n",
|
324 | 313 | "\n",
|
325 | 314 | "New in mlxtend v0.7.0\n",
|
326 | 315 | "\n",
|
327 |
| - "**Notes**\n", |
328 |
| - "\n", |
329 |
| - "The StackingCVRegressor uses scikit-learn's check_cv\n", |
330 |
| - "internally, which doesn't support a random seed. Thus\n", |
331 |
| - "NumPy's random seed need to be specified explicitely for\n", |
332 |
| - "deterministic behavior, for instance, by setting\n", |
333 |
| - "np.random.seed(RANDOM_SEED)\n", |
334 |
| - "prior to fitting the StackingCVRegressor\n", |
335 |
| - "\n", |
336 | 316 | "**Parameters**\n",
|
337 | 317 | "\n",
|
338 | 318 | "- `regressors` : array-like, shape = [n_regressors]\n",
|
|
357 | 337 | " - An iterable yielding train, test splits.\n",
|
358 | 338 | " For integer/None inputs, it will use `KFold` cross-validation\n",
|
359 | 339 | "\n",
|
360 |
| - "- `use_features_in_secondary` : bool (default: False)\n", |
361 |
| - "\n", |
362 |
| - " If True, the meta-regressor will be trained both on\n", |
363 |
| - " the predictions of the original regressors and the\n", |
364 |
| - " original dataset.\n", |
365 |
| - " If False, the meta-regressor will be trained only on\n", |
366 |
| - " the predictions of the original regressors.\n", |
367 |
| - "\n", |
368 | 340 | "- `shuffle` : bool (default: True)\n",
|
369 | 341 | "\n",
|
370 | 342 | " If True, and the `cv` argument is integer, the training data will\n",
|
371 | 343 | " be shuffled at fitting stage prior to cross-validation. If the `cv`\n",
|
372 | 344 | " argument is a specific cross validation technique, this argument is\n",
|
373 | 345 | " omitted.\n",
|
374 | 346 | "\n",
|
375 |
| - "- `store_train_meta_features` : bool (default: False)\n", |
| 347 | + "- `random_state` : int, RandomState instance or None, optional (default: None)\n", |
376 | 348 | "\n",
|
377 |
| - " If True, the meta-features computed from the training data\n", |
378 |
| - " used for fitting the\n", |
379 |
| - " meta-regressor stored in the `self.train_meta_features_` array,\n", |
380 |
| - " which can be\n", |
381 |
| - " accessed after calling `fit`.\n", |
| 349 | + " Constrols the randomness of the cv splitter. Used when `cv` is\n", |
| 350 | + " integer and `shuffle=True`. New in v0.16.0.\n", |
| 351 | + "\n", |
| 352 | + "- `verbose` : int, optional (default=0)\n", |
| 353 | + "\n", |
| 354 | + " Controls the verbosity of the building process. New in v0.16.0\n", |
382 | 355 | "\n",
|
383 | 356 | "- `refit` : bool (default: True)\n",
|
384 | 357 | "\n",
|
|
389 | 362 | " the scikit-learn fit/predict API interface but are not compatible\n",
|
390 | 363 | " to scikit-learn's `clone` function.\n",
|
391 | 364 | "\n",
|
| 365 | + "- `use_features_in_secondary` : bool (default: False)\n", |
| 366 | + "\n", |
| 367 | + " If True, the meta-regressor will be trained both on\n", |
| 368 | + " the predictions of the original regressors and the\n", |
| 369 | + " original dataset.\n", |
| 370 | + " If False, the meta-regressor will be trained only on\n", |
| 371 | + " the predictions of the original regressors.\n", |
| 372 | + "\n", |
| 373 | + "- `store_train_meta_features` : bool (default: False)\n", |
| 374 | + "\n", |
| 375 | + " If True, the meta-features computed from the training data\n", |
| 376 | + " used for fitting the\n", |
| 377 | + " meta-regressor stored in the `self.train_meta_features_` array,\n", |
| 378 | + " which can be\n", |
| 379 | + " accessed after calling `fit`.\n", |
| 380 | + "\n", |
| 381 | + "- `n_jobs` : int or None, optional (default=None)\n", |
| 382 | + "\n", |
| 383 | + " The number of CPUs to use to do the computation.\n", |
| 384 | + " ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n", |
| 385 | + " ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n", |
| 386 | + " for more details. New in v0.16.0.\n", |
| 387 | + "\n", |
| 388 | + "- `pre_dispatch` : int, or string, optional\n", |
| 389 | + "\n", |
| 390 | + " Controls the number of jobs that get dispatched during parallel\n", |
| 391 | + " execution. Reducing this number can be useful to avoid an\n", |
| 392 | + " explosion of memory consumption when more jobs get dispatched\n", |
| 393 | + " than CPUs can process. This parameter can be:\n", |
| 394 | + " - None, in which case all the jobs are immediately\n", |
| 395 | + " created and spawned. Use this for lightweight and\n", |
| 396 | + " fast-running jobs, to avoid delays due to on-demand\n", |
| 397 | + " spawning of the jobs\n", |
| 398 | + " - An int, giving the exact number of total jobs that are\n", |
| 399 | + " spawned\n", |
| 400 | + " - A string, giving an expression as a function of n_jobs,\n", |
| 401 | + " as in '2*n_jobs'\n", |
| 402 | + " New in v0.16.0.\n", |
| 403 | + "\n", |
392 | 404 | "**Attributes**\n",
|
393 | 405 | "\n",
|
394 | 406 | "- `train_meta_features` : numpy array, shape = [n_samples, n_regressors]\n",
|
|
546 | 558 | "\n",
|
547 | 559 | "- `X` : array-like, shape = (n_samples, n_features)\n",
|
548 | 560 | "\n",
|
549 |
| - " Test samples.\n", |
| 561 | + " Test samples. For some estimators this may be a\n", |
| 562 | + " precomputed kernel matrix instead, shape = (n_samples,\n", |
| 563 | + " n_samples_fitted], where n_samples_fitted is the number of\n", |
| 564 | + " samples used in the fitting for the estimator.\n", |
550 | 565 | "\n",
|
551 | 566 | "\n",
|
552 | 567 | "- `y` : array-like, shape = (n_samples) or (n_samples, n_outputs)\n",
|
|
570 | 585 | "\n",
|
571 | 586 | "Set the parameters of this estimator.\n",
|
572 | 587 | "\n",
|
573 |
| - "The method works on simple estimators as well as on nested objects\n", |
574 |
| - "(such as pipelines). The latter have parameters of the form\n", |
575 |
| - "``<component>__<parameter>`` so that it's possible to update each\n", |
576 |
| - "component of a nested object.\n", |
| 588 | + "Valid parameter keys can be listed with ``get_params()``.\n", |
577 | 589 | "\n",
|
578 | 590 | "**Returns**\n",
|
579 | 591 | "\n",
|
580 | 592 | "self\n",
|
581 | 593 | "\n",
|
| 594 | + "### Properties\n", |
| 595 | + "\n", |
| 596 | + "<hr>\n", |
| 597 | + "\n", |
| 598 | + "*named_regressors*\n", |
| 599 | + "\n", |
| 600 | + "**Returns**\n", |
| 601 | + "\n", |
| 602 | + "List of named estimator tuples, like [('svc', SVC(...))]\n", |
| 603 | + "\n", |
582 | 604 | "\n"
|
583 | 605 | ]
|
584 | 606 | }
|
|
0 commit comments