rasbt · rasbt · May 12, 2019 · May 12, 2019 · May 12, 2019
diff --git a/docs/sources/CHANGELOG.md b/docs/sources/CHANGELOG.md
@@ -18,6 +18,8 @@ The CHANGELOG for the current development version is available at
 ##### New Features
 
 - `StackingCVClassifier` and `StackingCVRegressor` now support `random_state` parameter, which, together with `shuffle`, controls the randomness in the cv splitting. ([#523](https://github.com/rasbt/mlxtend/pull/523) via [Qiang Gu](https://github.com/qiaguhttps://github.com/qiagu))
+- `StackingCVClassifier` and `StackingCVRegressor` now have a new `drop_last_proba` parameter. It drops the last "probability" column in the feature set since if `True`,
+        because it is redundant: p(y_c) = 1 - p(y_1) + p(y_2) + ... + p(y_{c-1}). This can be useful for meta-classifiers that are sensitive to perfectly collinear features. ([#532](https://github.com/rasbt/mlxtend/pull/532))
 - Other stacking estimators, including `StackingClassifier`, `StackingCVClassifier` and `StackingRegressor`, support grid search over the `regressors` and even a single base regressor. ([#522](https://github.com/rasbt/mlxtend/pull/522) via [Qiang Gu](https://github.com/qiaguhttps://github.com/qiagu))
 - Adds multiprocessing support to `StackingCVClassifier`. ([#522](https://github.com/rasbt/mlxtend/pull/522) via [Qiang Gu](https://github.com/qiaguhttps://github.com/qiagu))
 - Adds multiprocessing support to `StackingCVRegressor`. ([#512](https://github.com/rasbt/mlxtend/pull/512) via [Qiang Gu](https://github.com/qiaguhttps://github.com/qiagu))
@@ -27,9 +29,8 @@ The CHANGELOG for the current development version is available at
 
 ##### Changes
 
-- The same change mentioned below is now applied to other stacking estimators, including `StackingClassifier`, `StackingCVClassifier` and `StackingRegressor`. ([#522](https://github.com/rasbt/mlxtend/pull/522) via [Qiang Gu](https://github.com/qiaguhttps://github.com/qiagu))
 - Due to new features, restructuring, and better scikit-learn support (for `GridSearchCV`, etc.) the `StackingCVRegressor`'s meta regressor is now being accessed via `'meta_regressor__*` in the parameter grid. E.g., if a `RandomForestRegressor` as meta- egressor was previously tuned via `'randomforestregressor__n_estimators'`, this has now changed to `'meta_regressor__n_estimators'`. ([#515](https://github.com/rasbt/mlxtend/pull/512) via [Qiang Gu](https://github.com/qiaguhttps://github.com/qiagu))
-
+- The same change mentioned above is now applied to other stacking estimators, including `StackingClassifier`, `StackingCVClassifier` and `StackingRegressor`. ([#522](https://github.com/rasbt/mlxtend/pull/522) via [Qiang Gu](https://github.com/qiaguhttps://github.com/qiagu))
 
 ##### Bug Fixes
 

diff --git a/docs/sources/user_guide/classifier/StackingCVClassifier.ipynb b/docs/sources/user_guide/classifier/StackingCVClassifier.ipynb
diff --git a/docs/sources/user_guide/classifier/StackingClassifier.ipynb b/docs/sources/user_guide/classifier/StackingClassifier.ipynb
diff --git a/mlxtend/classifier/stacking_classification.py b/mlxtend/classifier/stacking_classification.py
@@ -37,8 +37,15 @@ class StackingClassifier(_BaseXComposition, ClassifierMixin,
     use_probas : bool (default: False)
         If True, trains meta-classifier based on predicted probabilities
         instead of class labels.
+    drop_last_proba : bool (default: False)
+        Drops the last "probability" column in the feature set since if `True`,
+        because it is redundant:
+        p(y_c) = 1 - p(y_1) + p(y_2) + ... + p(y_{c-1}).
+        This can be useful for meta-classifiers that are sensitive to
+        perfectly collinear features. Only relevant if `use_probas=True`.
     average_probas : bool (default: False)
-        Averages the probabilities as meta features if True.
+        Averages the probabilities as meta features if `True`.
+        Only relevant if `use_probas=True`.
     verbose : int, optional (default=0)
         Controls the verbosity of the building process.
         - `verbose=0` (default): Prints nothing
@@ -86,14 +93,16 @@ class StackingClassifier(_BaseXComposition, ClassifierMixin,
 
     """
     def __init__(self, classifiers, meta_classifier,
-                 use_probas=False, average_probas=False, verbose=0,
+                 use_probas=False, drop_last_proba=False,
+                 average_probas=False, verbose=0,
                  use_features_in_secondary=False,
                  store_train_meta_features=False,
                  use_clones=True):
 
         self.classifiers = classifiers
         self.meta_classifier = meta_classifier
         self.use_probas = use_probas
+        self.drop_last_proba = drop_last_proba
         self.average_probas = average_probas
         self.verbose = verbose
         self.use_features_in_secondary = use_features_in_secondary
@@ -205,8 +214,12 @@ def predict_meta_features(self, X):
         """
         check_is_fitted(self, 'clfs_')
         if self.use_probas:
-            probas = np.asarray([clf.predict_proba(X)
-                                 for clf in self.clfs_])
+            if self.drop_last_proba:
+                probas = np.asarray([clf.predict_proba(X)[:, :-1]
+                                     for clf in self.clfs_])
+            else:
+                probas = np.asarray([clf.predict_proba(X)
+                                     for clf in self.clfs_])
             if self.average_probas:
                 vals = np.average(probas, axis=0)
             else:

diff --git a/mlxtend/classifier/stacking_cv_classification.py b/mlxtend/classifier/stacking_cv_classification.py
@@ -42,6 +42,12 @@ class StackingCVClassifier(_BaseXComposition, ClassifierMixin,
     use_probas : bool (default: False)
         If True, trains meta-classifier based on predicted probabilities
         instead of class labels.
+    drop_last_proba : bool (default: False)
+        Drops the last "probability" column in the feature set since if `True`,
+        because it is redundant:
+        p(y_c) = 1 - p(y_1) + p(y_2) + ... + p(y_{c-1}).
+        This can be useful for meta-classifiers that are sensitive to
+        perfectly collinear features. Only relevant if `use_probas=True.
     cv : int, cross-validation generator or an iterable, optional (default: 2)
         Determines the cross-validation splitting strategy.
         Possible inputs for cv are:
@@ -131,7 +137,8 @@ class StackingCVClassifier(_BaseXComposition, ClassifierMixin,
 
     """
     def __init__(self, classifiers, meta_classifier,
-                 use_probas=False, cv=2, shuffle=True,
+                 use_probas=False, drop_last_proba=False,
+                 cv=2, shuffle=True,
                  random_state=None, stratify=True, verbose=0,
                  use_features_in_secondary=False,
                  store_train_meta_features=False,
@@ -141,6 +148,7 @@ def __init__(self, classifiers, meta_classifier,
         self.classifiers = classifiers
         self.meta_classifier = meta_classifier
         self.use_probas = use_probas
+        self.drop_last_proba = drop_last_proba
         self.cv = cv
         self.shuffle = shuffle
         self.random_state = random_state
@@ -232,6 +240,8 @@ def fit(self, X, y, groups=None, sample_weight=None):
 
             if not self.use_probas:
                 prediction = prediction[:, np.newaxis]
+            elif self.drop_last_proba:
+                prediction = prediction[:, :-1]
 
             if meta_features is None:
                 meta_features = prediction
@@ -302,7 +312,10 @@ def predict_meta_features(self, X):
             if not self.use_probas:
                 prediction = model.predict(X)[:, np.newaxis]
             else:
-                prediction = model.predict_proba(X)
+                if self.drop_last_proba:
+                    prediction = model.predict_proba(X)[:, :-1]
+                else:
+                    prediction = model.predict_proba(X)
 
             per_model_preds.append(prediction)
 

diff --git a/mlxtend/classifier/tests/test_stacking_classifier.py b/mlxtend/classifier/tests/test_stacking_classifier.py
@@ -195,6 +195,38 @@ def test_StackingClassifier_avg_vs_concat():
     np.array_equal(r2[0][:3], r2[0][3:])
 
 
+def test_StackingClassifier_drop_last_proba():
+    np.random.seed(123)
+    lr1 = LogisticRegression(solver='liblinear',
+                             multi_class='ovr')
+    sclf1 = StackingClassifier(classifiers=[lr1, lr1],
+                               use_probas=True,
+                               drop_last_proba=False,
+                               meta_classifier=lr1)
+
+    sclf1.fit(X, y)
+    r1 = sclf1.predict_meta_features(X[:2])
+    assert r1.shape == (2, 6)
+
+    sclf2 = StackingClassifier(classifiers=[lr1, lr1],
+                               use_probas=True,
+                               drop_last_proba=True,
+                               meta_classifier=lr1)
+
+    sclf2.fit(X, y)
+    r2 = sclf2.predict_meta_features(X[:2])
+    assert r2.shape == (2, 4), r2.shape
+
+    sclf3 = StackingClassifier(classifiers=[lr1, lr1],
+                               use_probas=True,
+                               drop_last_proba=True,
+                               meta_classifier=lr1)
+
+    sclf3.fit(X[0:100], y[0:100])  # only 2 classes
+    r3 = sclf3.predict_meta_features(X[:2])
+    assert r3.shape == (2, 2), r3.shape
+
+
 def test_multivariate_class():
     np.random.seed(123)
     meta = KNeighborsClassifier()
@@ -405,6 +437,7 @@ def test_get_params():
     got = sorted(list({s.split('__')[0] for s in sclf.get_params().keys()}))
     expect = ['average_probas',
               'classifiers',
+              'drop_last_proba',
               'gaussiannb',
               'kneighborsclassifier',
               'meta_classifier',

diff --git a/mlxtend/classifier/tests/test_stacking_cv_classifier.py b/mlxtend/classifier/tests/test_stacking_cv_classifier.py
@@ -341,6 +341,7 @@ def test_get_params():
 
     expect = ['classifiers',
               'cv',
+              'drop_last_proba',
               'gaussiannb',
               'kneighborsclassifier',
               'meta_classifier',
@@ -479,6 +480,38 @@ def test_sparse_inputs_with_features_in_secondary():
         round(stclf.score(X_train, y_train), 2)
 
 
+def test_StackingClassifier_drop_last_proba():
+    np.random.seed(123)
+    lr1 = LogisticRegression(solver='liblinear',
+                             multi_class='ovr')
+    sclf1 = StackingCVClassifier(classifiers=[lr1, lr1],
+                                 use_probas=True,
+                                 drop_last_proba=False,
+                                 meta_classifier=lr1)
+
+    sclf1.fit(X_iris, y_iris)
+    r1 = sclf1.predict_meta_features(X_iris[:2])
+    assert r1.shape == (2, 6)
+
+    sclf2 = StackingCVClassifier(classifiers=[lr1, lr1],
+                                 use_probas=True,
+                                 drop_last_proba=True,
+                                 meta_classifier=lr1)
+
+    sclf2.fit(X_iris, y_iris)
+    r2 = sclf2.predict_meta_features(X_iris[:2])
+    assert r2.shape == (2, 4), r2.shape
+
+    sclf3 = StackingCVClassifier(classifiers=[lr1, lr1],
+                                 use_probas=True,
+                                 drop_last_proba=True,
+                                 meta_classifier=lr1)
+
+    sclf3.fit(X_iris[0:100], y_iris[0:100])  # only 2 classes
+    r3 = sclf3.predict_meta_features(X_iris[:2])
+    assert r3.shape == (2, 2), r3.shape
+
+
 def test_works_with_df_if_fold_indexes_missing():
     """This is a regression test to make sure fitting will still work even if
     training data has ids that cannot be indexed using the indexes from the cv