sklearn/examples/cluster/plot_face_compress.py

192 lines
7.0 KiB
Python
Raw Permalink Normal View History

2024-08-05 09:32:03 +02:00
"""
===========================
Vector Quantization Example
===========================
This example shows how one can use :class:`~sklearn.preprocessing.KBinsDiscretizer`
to perform vector quantization on a set of toy image, the raccoon face.
"""
# Authors: Gael Varoquaux
# Jaques Grobler
# License: BSD 3 clause
# %%
# Original image
# --------------
#
# We start by loading the raccoon face image from SciPy. We will additionally check
# a couple of information regarding the image, such as the shape and data type used
# to store the image.
#
# Note that depending of the SciPy version, we have to adapt the import since the
# function returning the image is not located in the same module. Also, SciPy >= 1.10
# requires the package `pooch` to be installed.
try: # Scipy >= 1.10
from scipy.datasets import face
except ImportError:
from scipy.misc import face
raccoon_face = face(gray=True)
print(f"The dimension of the image is {raccoon_face.shape}")
print(f"The data used to encode the image is of type {raccoon_face.dtype}")
print(f"The number of bytes taken in RAM is {raccoon_face.nbytes}")
# %%
# Thus the image is a 2D array of 768 pixels in height and 1024 pixels in width. Each
# value is a 8-bit unsigned integer, which means that the image is encoded using 8
# bits per pixel. The total memory usage of the image is 786 kilobytes (1 byte equals
# 8 bits).
#
# Using 8-bit unsigned integer means that the image is encoded using 256 different
# shades of gray, at most. We can check the distribution of these values.
import matplotlib.pyplot as plt
fig, ax = plt.subplots(ncols=2, figsize=(12, 4))
ax[0].imshow(raccoon_face, cmap=plt.cm.gray)
ax[0].axis("off")
ax[0].set_title("Rendering of the image")
ax[1].hist(raccoon_face.ravel(), bins=256)
ax[1].set_xlabel("Pixel value")
ax[1].set_ylabel("Count of pixels")
ax[1].set_title("Distribution of the pixel values")
_ = fig.suptitle("Original image of a raccoon face")
# %%
# Compression via vector quantization
# -----------------------------------
#
# The idea behind compression via vector quantization is to reduce the number of
# gray levels to represent an image. For instance, we can use 8 values instead
# of 256 values. Therefore, it means that we could efficiently use 3 bits instead
# of 8 bits to encode a single pixel and therefore reduce the memory usage by a
# factor of approximately 2.5. We will later discuss about this memory usage.
#
# Encoding strategy
# """""""""""""""""
#
# The compression can be done using a
# :class:`~sklearn.preprocessing.KBinsDiscretizer`. We need to choose a strategy
# to define the 8 gray values to sub-sample. The simplest strategy is to define
# them equally spaced, which correspond to setting `strategy="uniform"`. From
# the previous histogram, we know that this strategy is certainly not optimal.
from sklearn.preprocessing import KBinsDiscretizer
n_bins = 8
encoder = KBinsDiscretizer(
n_bins=n_bins,
encode="ordinal",
strategy="uniform",
random_state=0,
)
compressed_raccoon_uniform = encoder.fit_transform(raccoon_face.reshape(-1, 1)).reshape(
raccoon_face.shape
)
fig, ax = plt.subplots(ncols=2, figsize=(12, 4))
ax[0].imshow(compressed_raccoon_uniform, cmap=plt.cm.gray)
ax[0].axis("off")
ax[0].set_title("Rendering of the image")
ax[1].hist(compressed_raccoon_uniform.ravel(), bins=256)
ax[1].set_xlabel("Pixel value")
ax[1].set_ylabel("Count of pixels")
ax[1].set_title("Sub-sampled distribution of the pixel values")
_ = fig.suptitle("Raccoon face compressed using 3 bits and a uniform strategy")
# %%
# Qualitatively, we can spot some small regions where we see the effect of the
# compression (e.g. leaves on the bottom right corner). But after all, the resulting
# image is still looking good.
#
# We observe that the distribution of pixels values have been mapped to 8
# different values. We can check the correspondence between such values and the
# original pixel values.
bin_edges = encoder.bin_edges_[0]
bin_center = bin_edges[:-1] + (bin_edges[1:] - bin_edges[:-1]) / 2
bin_center
# %%
_, ax = plt.subplots()
ax.hist(raccoon_face.ravel(), bins=256)
color = "tab:orange"
for center in bin_center:
ax.axvline(center, color=color)
ax.text(center - 10, ax.get_ybound()[1] + 100, f"{center:.1f}", color=color)
# %%
# As previously stated, the uniform sampling strategy is not optimal. Notice for
# instance that the pixels mapped to the value 7 will encode a rather small
# amount of information, whereas the mapped value 3 will represent a large
# amount of counts. We can instead use a clustering strategy such as k-means to
# find a more optimal mapping.
encoder = KBinsDiscretizer(
n_bins=n_bins,
encode="ordinal",
strategy="kmeans",
random_state=0,
)
compressed_raccoon_kmeans = encoder.fit_transform(raccoon_face.reshape(-1, 1)).reshape(
raccoon_face.shape
)
fig, ax = plt.subplots(ncols=2, figsize=(12, 4))
ax[0].imshow(compressed_raccoon_kmeans, cmap=plt.cm.gray)
ax[0].axis("off")
ax[0].set_title("Rendering of the image")
ax[1].hist(compressed_raccoon_kmeans.ravel(), bins=256)
ax[1].set_xlabel("Pixel value")
ax[1].set_ylabel("Number of pixels")
ax[1].set_title("Distribution of the pixel values")
_ = fig.suptitle("Raccoon face compressed using 3 bits and a K-means strategy")
# %%
bin_edges = encoder.bin_edges_[0]
bin_center = bin_edges[:-1] + (bin_edges[1:] - bin_edges[:-1]) / 2
bin_center
# %%
_, ax = plt.subplots()
ax.hist(raccoon_face.ravel(), bins=256)
color = "tab:orange"
for center in bin_center:
ax.axvline(center, color=color)
ax.text(center - 10, ax.get_ybound()[1] + 100, f"{center:.1f}", color=color)
# %%
# The counts in the bins are now more balanced and their centers are no longer
# equally spaced. Note that we could enforce the same number of pixels per bin
# by using the `strategy="quantile"` instead of `strategy="kmeans"`.
#
# Memory footprint
# """"""""""""""""
#
# We previously stated that we should save 8 times less memory. Let's verify it.
print(f"The number of bytes taken in RAM is {compressed_raccoon_kmeans.nbytes}")
print(f"Compression ratio: {compressed_raccoon_kmeans.nbytes / raccoon_face.nbytes}")
# %%
# It is quite surprising to see that our compressed image is taking x8 more
# memory than the original image. This is indeed the opposite of what we
# expected. The reason is mainly due to the type of data used to encode the
# image.
print(f"Type of the compressed image: {compressed_raccoon_kmeans.dtype}")
# %%
# Indeed, the output of the :class:`~sklearn.preprocessing.KBinsDiscretizer` is
# an array of 64-bit float. It means that it takes x8 more memory. However, we
# use this 64-bit float representation to encode 8 values. Indeed, we will save
# memory only if we cast the compressed image into an array of 3-bits integers. We
# could use the method `numpy.ndarray.astype`. However, a 3-bits integer
# representation does not exist and to encode the 8 values, we would need to use
# the 8-bit unsigned integer representation as well.
#
# In practice, observing a memory gain would require the original image to be in
# a 64-bit float representation.