sklearn/examples/compose/plot_feature_union.py

63 lines
1.9 KiB
Python

"""
=================================================
Concatenating multiple feature extraction methods
=================================================
In many real-world examples, there are many ways to extract features from a
dataset. Often it is beneficial to combine several methods to obtain good
performance. This example shows how to use ``FeatureUnion`` to combine
features obtained by PCA and univariate selection.
Combining features using this transformer has the benefit that it allows
cross validation and grid searches over the whole process.
The combination used in this example is not particularly helpful on this
dataset and is only used to illustrate the usage of FeatureUnion.
"""
# Author: Andreas Mueller <amueller@ais.uni-bonn.de>
#
# License: BSD 3 clause
from sklearn.datasets import load_iris
from sklearn.decomposition import PCA
from sklearn.feature_selection import SelectKBest
from sklearn.model_selection import GridSearchCV
from sklearn.pipeline import FeatureUnion, Pipeline
from sklearn.svm import SVC
iris = load_iris()
X, y = iris.data, iris.target
# This dataset is way too high-dimensional. Better do PCA:
pca = PCA(n_components=2)
# Maybe some original features were good, too?
selection = SelectKBest(k=1)
# Build estimator from PCA and Univariate selection:
combined_features = FeatureUnion([("pca", pca), ("univ_select", selection)])
# Use combined features to transform dataset:
X_features = combined_features.fit(X, y).transform(X)
print("Combined space has", X_features.shape[1], "features")
svm = SVC(kernel="linear")
# Do grid search over k, n_components and C:
pipeline = Pipeline([("features", combined_features), ("svm", svm)])
param_grid = dict(
features__pca__n_components=[1, 2, 3],
features__univ_select__k=[1, 2],
svm__C=[0.1, 1, 10],
)
grid_search = GridSearchCV(pipeline, param_grid=param_grid, verbose=10)
grid_search.fit(X, y)
print(grid_search.best_estimator_)