96 lines
3.2 KiB
Python
96 lines
3.2 KiB
Python
"""
|
|
===============================
|
|
Plot classification probability
|
|
===============================
|
|
|
|
Plot the classification probability for different classifiers. We use a 3 class
|
|
dataset, and we classify it with a Support Vector classifier, L1 and L2
|
|
penalized logistic regression (multinomial multiclass), a One-Vs-Rest version with
|
|
logistic regression, and Gaussian process classification.
|
|
|
|
Linear SVC is not a probabilistic classifier by default but it has a built-in
|
|
calibration option enabled in this example (`probability=True`).
|
|
|
|
The logistic regression with One-Vs-Rest is not a multiclass classifier out of
|
|
the box. As a result it has more trouble in separating class 2 and 3 than the
|
|
other estimators.
|
|
|
|
"""
|
|
|
|
# Author: Alexandre Gramfort <alexandre.gramfort@inria.fr>
|
|
# License: BSD 3 clause
|
|
|
|
import matplotlib.pyplot as plt
|
|
import numpy as np
|
|
from matplotlib import cm
|
|
|
|
from sklearn import datasets
|
|
from sklearn.gaussian_process import GaussianProcessClassifier
|
|
from sklearn.gaussian_process.kernels import RBF
|
|
from sklearn.inspection import DecisionBoundaryDisplay
|
|
from sklearn.linear_model import LogisticRegression
|
|
from sklearn.metrics import accuracy_score
|
|
from sklearn.multiclass import OneVsRestClassifier
|
|
from sklearn.svm import SVC
|
|
|
|
iris = datasets.load_iris()
|
|
X = iris.data[:, 0:2] # we only take the first two features for visualization
|
|
y = iris.target
|
|
|
|
n_features = X.shape[1]
|
|
|
|
C = 10
|
|
kernel = 1.0 * RBF([1.0, 1.0]) # for GPC
|
|
|
|
# Create different classifiers.
|
|
classifiers = {
|
|
"L1 logistic": LogisticRegression(C=C, penalty="l1", solver="saga", max_iter=10000),
|
|
"L2 logistic (Multinomial)": LogisticRegression(
|
|
C=C, penalty="l2", solver="saga", max_iter=10000
|
|
),
|
|
"L2 logistic (OvR)": OneVsRestClassifier(
|
|
LogisticRegression(C=C, penalty="l2", solver="saga", max_iter=10000)
|
|
),
|
|
"Linear SVC": SVC(kernel="linear", C=C, probability=True, random_state=0),
|
|
"GPC": GaussianProcessClassifier(kernel),
|
|
}
|
|
|
|
n_classifiers = len(classifiers)
|
|
|
|
fig, axes = plt.subplots(
|
|
nrows=n_classifiers,
|
|
ncols=len(iris.target_names),
|
|
figsize=(3 * 2, n_classifiers * 2),
|
|
)
|
|
for classifier_idx, (name, classifier) in enumerate(classifiers.items()):
|
|
y_pred = classifier.fit(X, y).predict(X)
|
|
accuracy = accuracy_score(y, y_pred)
|
|
print(f"Accuracy (train) for {name}: {accuracy:0.1%}")
|
|
for label in np.unique(y):
|
|
# plot the probability estimate provided by the classifier
|
|
disp = DecisionBoundaryDisplay.from_estimator(
|
|
classifier,
|
|
X,
|
|
response_method="predict_proba",
|
|
class_of_interest=label,
|
|
ax=axes[classifier_idx, label],
|
|
vmin=0,
|
|
vmax=1,
|
|
)
|
|
axes[classifier_idx, label].set_title(f"Class {label}")
|
|
# plot data predicted to belong to given class
|
|
mask_y_pred = y_pred == label
|
|
axes[classifier_idx, label].scatter(
|
|
X[mask_y_pred, 0], X[mask_y_pred, 1], marker="o", c="w", edgecolor="k"
|
|
)
|
|
axes[classifier_idx, label].set(xticks=(), yticks=())
|
|
axes[classifier_idx, 0].set_ylabel(name)
|
|
|
|
ax = plt.axes([0.15, 0.04, 0.7, 0.02])
|
|
plt.title("Probability")
|
|
_ = plt.colorbar(
|
|
cm.ScalarMappable(norm=None, cmap="viridis"), cax=ax, orientation="horizontal"
|
|
)
|
|
|
|
plt.show()
|