rasbt · rasbt · Dec 1, 2017 · Nov 30, 2017 · Nov 30, 2017 · Nov 30, 2017
diff --git a/docs/sources/CHANGELOG.md b/docs/sources/CHANGELOG.md
@@ -15,6 +15,9 @@ The CHANGELOG for the current development version is available at
 ##### New Features
 
 - New `max_len` parameter for the frequent itemset generation via the `apriori` function to allow for early stopping. ([#270](https://github.com/rasbt/mlxtend/pull/270))
+- New `store_train_meta_features` parameter for `fit` in StackingCVRegressor. if True, train meta-features are stored in `self.train_meta_features_`.
+    New `pred_meta_features` method for StackingCVRegressor. People can get test meta-features using this method. ([#294](https://github.com/rasbt/mlxtend/pull/294))
+    via [takashioya](https://github.com/takashioya))
 
 ##### Changes
 

diff --git a/mlxtend/regressor/stacking_cv_regression.py b/mlxtend/regressor/stacking_cv_regression.py
@@ -66,10 +66,22 @@ class StackingCVRegressor(BaseEstimator, RegressorMixin, TransformerMixin):
         be shuffled at fitting stage prior to cross-validation. If the `cv`
         argument is a specific cross validation technique, this argument is
         omitted.
+    store_train_meta_features : bool (default: False)
+        If True, the meta-features computed from the training data used for fitting the
+        meta-regressor stored in the `self.train_meta_features_` array, which can be
+        accessed after calling `fit`.
+
+    Attributes
+    ----------
+    train_meta_features : numpy array, shape = [n_samples, len(self.regressors)]
+        meta-features for training data, where n_samples is the number of samples
+        in training data and len(self.regressors) is the number of regressors.
+
     """
     def __init__(self, regressors, meta_regressor, cv=5,
                  shuffle=True,
-                 use_features_in_secondary=False):
+                 use_features_in_secondary=False,
+                 store_train_meta_features=False):
 
         self.regressors = regressors
         self.meta_regressor = meta_regressor
@@ -82,6 +94,7 @@ def __init__(self, regressors, meta_regressor, cv=5,
         self.cv = cv
         self.shuffle = shuffle
         self.use_features_in_secondary = use_features_in_secondary
+        self.store_train_meta_features = store_train_meta_features
 
     def fit(self, X, y, groups=None):
         """ Fit ensemble regressors and the meta-regressor.
@@ -137,6 +150,10 @@ def fit(self, X, y, groups=None):
                 y_pred = instance.predict(X[holdout_idx])
                 meta_features[holdout_idx, i] = y_pred
 
+        # save meta-features for training data
+        if self.store_train_meta_features:
+            self.train_meta_features_ = meta_features
+
         # Train meta-model on the out-of-fold predictions
         if self.use_features_in_secondary:
             self.meta_regr_.fit(np.hstack((X, meta_features)), y)
@@ -163,6 +180,24 @@ def predict(self, X):
         else:
             return self.meta_regr_.predict(meta_features)
 
+    def predict_meta_features(self, X):
+        """ Get meta-features of test-data.
+
+        Parameters
+        ----------
+        X : numpy array, shape = [n_samples, n_features]
+            Test vectors, where n_samples is the number of samples and
+            n_features is the number of features.
+
+        Returns
+        -------
+        meta-features : numpy array, shape = [n_samples, len(self.regressors)]
+            meta-features for test data, where n_samples is the number of
+            samples in test data and len(self.regressors) is the number of regressors.
+
+        """
+        return np.column_stack([regr.predict(X) for regr in self.regr_])
+
     def get_params(self, deep=True):
         #
         # Return estimator parameter names for GridSearch support.

diff --git a/mlxtend/regressor/tests/test_cv_stacking_regression.py b/mlxtend/regressor/tests/test_cv_stacking_regression.py
@@ -12,7 +12,7 @@
 from sklearn.linear_model import Ridge
 from sklearn.svm import SVR
 import numpy as np
-from sklearn.model_selection import GridSearchCV
+from sklearn.model_selection import GridSearchCV, train_test_split
 
 
 # Some test data
@@ -120,7 +120,8 @@ def test_get_params():
               'regressors',
               'ridge',
               'shuffle',
-              'use_features_in_secondary']
+              'store_train_meta_features',
+              'use_features_in_secondary',]
     assert got == expect, got
 
 
@@ -140,3 +141,28 @@ def test_regressor_gridsearch():
     grid.fit(X1, y)
 
     assert len(grid.best_params_['regressors']) == 3
+
+
+def test_predict_meta_features():
+    lr = LinearRegression()
+    svr_rbf = SVR(kernel='rbf')
+    ridge = Ridge(random_state=1)
+    stregr = StackingCVRegressor(regressors=[lr, ridge],
+                                 meta_regressor=svr_rbf)
+    X_train, X_test, y_train, y_test = train_test_split(X2, y, test_size=0.3)
+    stregr.fit(X_train, y_train)
+    test_meta_features = stregr.predict(X_test)
+    assert test_meta_features.shape[0] == X_test.shape[0]
+
+
+def test_train_meta_features_():
+    lr = LinearRegression()
+    svr_rbf = SVR(kernel='rbf')
+    ridge = Ridge(random_state=1)
+    stregr = StackingCVRegressor(regressors=[lr, ridge],
+                                 meta_regressor=svr_rbf,
+                                 store_train_meta_features=True)
+    X_train, X_test, y_train, y_test = train_test_split(X2, y, test_size=0.3)
+    stregr.fit(X_train, y_train)
+    train_meta_features = stregr.train_meta_features_
+    assert train_meta_features.shape[0] == X_train.shape[0]