sklearn/examples/cluster/plot_face_compress.py

"""
===========================
Vector Quantization Example
===========================

This example shows how one can use :class:`~sklearn.preprocessing.KBinsDiscretizer`
to perform vector quantization on a set of toy image, the raccoon face.
"""

# Authors: Gael Varoquaux
#          Jaques Grobler
# License: BSD 3 clause

# %%
# Original image
# --------------
#
# We start by loading the raccoon face image from SciPy. We will additionally check
# a couple of information regarding the image, such as the shape and data type used
# to store the image.
#
# Note that depending of the SciPy version, we have to adapt the import since the
# function returning the image is not located in the same module. Also, SciPy >= 1.10
# requires the package `pooch` to be installed.
try:  # Scipy >= 1.10
    from scipy.datasets import face
except ImportError:
    from scipy.misc import face

raccoon_face = face(gray=True)

print(f"The dimension of the image is {raccoon_face.shape}")
print(f"The data used to encode the image is of type {raccoon_face.dtype}")
print(f"The number of bytes taken in RAM is {raccoon_face.nbytes}")

# %%
# Thus the image is a 2D array of 768 pixels in height and 1024 pixels in width. Each
# value is a 8-bit unsigned integer, which means that the image is encoded using 8
# bits per pixel. The total memory usage of the image is 786 kilobytes (1 byte equals
# 8 bits).
#
# Using 8-bit unsigned integer means that the image is encoded using 256 different
# shades of gray, at most. We can check the distribution of these values.
import matplotlib.pyplot as plt

fig, ax = plt.subplots(ncols=2, figsize=(12, 4))

ax[0].imshow(raccoon_face, cmap=plt.cm.gray)
ax[0].axis("off")
ax[0].set_title("Rendering of the image")
ax[1].hist(raccoon_face.ravel(), bins=256)
ax[1].set_xlabel("Pixel value")
ax[1].set_ylabel("Count of pixels")
ax[1].set_title("Distribution of the pixel values")
_ = fig.suptitle("Original image of a raccoon face")

# %%
# Compression via vector quantization
# -----------------------------------
#
# The idea behind compression via vector quantization is to reduce the number of
# gray levels to represent an image. For instance, we can use 8 values instead
# of 256 values. Therefore, it means that we could efficiently use 3 bits instead
# of 8 bits to encode a single pixel and therefore reduce the memory usage by a
# factor of approximately 2.5. We will later discuss about this memory usage.
#
# Encoding strategy
# """""""""""""""""
#
# The compression can be done using a
# :class:`~sklearn.preprocessing.KBinsDiscretizer`. We need to choose a strategy
# to define the 8 gray values to sub-sample. The simplest strategy is to define
# them equally spaced, which correspond to setting `strategy="uniform"`. From
# the previous histogram, we know that this strategy is certainly not optimal.

from sklearn.preprocessing import KBinsDiscretizer

n_bins = 8
encoder = KBinsDiscretizer(
    n_bins=n_bins,
    encode="ordinal",
    strategy="uniform",
    random_state=0,
)
compressed_raccoon_uniform = encoder.fit_transform(raccoon_face.reshape(-1, 1)).reshape(
    raccoon_face.shape
)

fig, ax = plt.subplots(ncols=2, figsize=(12, 4))
ax[0].imshow(compressed_raccoon_uniform, cmap=plt.cm.gray)
ax[0].axis("off")
ax[0].set_title("Rendering of the image")
ax[1].hist(compressed_raccoon_uniform.ravel(), bins=256)
ax[1].set_xlabel("Pixel value")
ax[1].set_ylabel("Count of pixels")
ax[1].set_title("Sub-sampled distribution of the pixel values")
_ = fig.suptitle("Raccoon face compressed using 3 bits and a uniform strategy")

# %%
# Qualitatively, we can spot some small regions where we see the effect of the
# compression (e.g. leaves on the bottom right corner). But after all, the resulting
# image is still looking good.
#
# We observe that the distribution of pixels values have been mapped to 8
# different values. We can check the correspondence between such values and the
# original pixel values.

bin_edges = encoder.bin_edges_[0]
bin_center = bin_edges[:-1] + (bin_edges[1:] - bin_edges[:-1]) / 2
bin_center

# %%
_, ax = plt.subplots()
ax.hist(raccoon_face.ravel(), bins=256)
color = "tab:orange"
for center in bin_center:
    ax.axvline(center, color=color)
    ax.text(center - 10, ax.get_ybound()[1] + 100, f"{center:.1f}", color=color)

# %%
# As previously stated, the uniform sampling strategy is not optimal. Notice for
# instance that the pixels mapped to the value 7 will encode a rather small
# amount of information, whereas the mapped value 3 will represent a large
# amount of counts. We can instead use a clustering strategy such as k-means to
# find a more optimal mapping.

encoder = KBinsDiscretizer(
    n_bins=n_bins,
    encode="ordinal",
    strategy="kmeans",
    random_state=0,
)
compressed_raccoon_kmeans = encoder.fit_transform(raccoon_face.reshape(-1, 1)).reshape(
    raccoon_face.shape
)

fig, ax = plt.subplots(ncols=2, figsize=(12, 4))
ax[0].imshow(compressed_raccoon_kmeans, cmap=plt.cm.gray)
ax[0].axis("off")
ax[0].set_title("Rendering of the image")
ax[1].hist(compressed_raccoon_kmeans.ravel(), bins=256)
ax[1].set_xlabel("Pixel value")
ax[1].set_ylabel("Number of pixels")
ax[1].set_title("Distribution of the pixel values")
_ = fig.suptitle("Raccoon face compressed using 3 bits and a K-means strategy")

# %%
bin_edges = encoder.bin_edges_[0]
bin_center = bin_edges[:-1] + (bin_edges[1:] - bin_edges[:-1]) / 2
bin_center

# %%
_, ax = plt.subplots()
ax.hist(raccoon_face.ravel(), bins=256)
color = "tab:orange"
for center in bin_center:
    ax.axvline(center, color=color)
    ax.text(center - 10, ax.get_ybound()[1] + 100, f"{center:.1f}", color=color)

# %%
# The counts in the bins are now more balanced and their centers are no longer
# equally spaced. Note that we could enforce the same number of pixels per bin
# by using the `strategy="quantile"` instead of `strategy="kmeans"`.
#
# Memory footprint
# """"""""""""""""
#
# We previously stated that we should save 8 times less memory. Let's verify it.

print(f"The number of bytes taken in RAM is {compressed_raccoon_kmeans.nbytes}")
print(f"Compression ratio: {compressed_raccoon_kmeans.nbytes / raccoon_face.nbytes}")

# %%
# It is quite surprising to see that our compressed image is taking x8 more
# memory than the original image. This is indeed the opposite of what we
# expected. The reason is mainly due to the type of data used to encode the
# image.

print(f"Type of the compressed image: {compressed_raccoon_kmeans.dtype}")

# %%
# Indeed, the output of the :class:`~sklearn.preprocessing.KBinsDiscretizer` is
# an array of 64-bit float. It means that it takes x8 more memory. However, we
# use this 64-bit float representation to encode 8 values. Indeed, we will save
# memory only if we cast the compressed image into an array of 3-bits integers. We
# could use the method `numpy.ndarray.astype`. However, a 3-bits integer
# representation does not exist and to encode the 8 values, we would need to use
# the 8-bit unsigned integer representation as well.
#
# In practice, observing a memory gain would require the original image to be in
# a 64-bit float representation.
first commit 2024-08-05 09:32:03 +02:00			`"""`
			`===========================`
			`Vector Quantization Example`
			`===========================`

			This example shows how one can use :class:`~sklearn.preprocessing.KBinsDiscretizer`
			`to perform vector quantization on a set of toy image, the raccoon face.`
			`"""`

			`# Authors: Gael Varoquaux`
			`# Jaques Grobler`
			`# License: BSD 3 clause`

			`# %%`
			`# Original image`
			`# --------------`
			`#`
			`# We start by loading the raccoon face image from SciPy. We will additionally check`
			`# a couple of information regarding the image, such as the shape and data type used`
			`# to store the image.`
			`#`
			`# Note that depending of the SciPy version, we have to adapt the import since the`
			`# function returning the image is not located in the same module. Also, SciPy >= 1.10`
			# requires the package `pooch` to be installed.
			`try: # Scipy >= 1.10`
			`from scipy.datasets import face`
			`except ImportError:`
			`from scipy.misc import face`

			`raccoon_face = face(gray=True)`

			`print(f"The dimension of the image is {raccoon_face.shape}")`
			`print(f"The data used to encode the image is of type {raccoon_face.dtype}")`
			`print(f"The number of bytes taken in RAM is {raccoon_face.nbytes}")`

			`# %%`
			`# Thus the image is a 2D array of 768 pixels in height and 1024 pixels in width. Each`
			`# value is a 8-bit unsigned integer, which means that the image is encoded using 8`
			`# bits per pixel. The total memory usage of the image is 786 kilobytes (1 byte equals`
			`# 8 bits).`
			`#`
			`# Using 8-bit unsigned integer means that the image is encoded using 256 different`
			`# shades of gray, at most. We can check the distribution of these values.`
			`import matplotlib.pyplot as plt`

			`fig, ax = plt.subplots(ncols=2, figsize=(12, 4))`

			`ax[0].imshow(raccoon_face, cmap=plt.cm.gray)`
			`ax[0].axis("off")`
			`ax[0].set_title("Rendering of the image")`
			`ax[1].hist(raccoon_face.ravel(), bins=256)`
			`ax[1].set_xlabel("Pixel value")`
			`ax[1].set_ylabel("Count of pixels")`
			`ax[1].set_title("Distribution of the pixel values")`
			`_ = fig.suptitle("Original image of a raccoon face")`

			`# %%`
			`# Compression via vector quantization`
			`# -----------------------------------`
			`#`
			`# The idea behind compression via vector quantization is to reduce the number of`
			`# gray levels to represent an image. For instance, we can use 8 values instead`
			`# of 256 values. Therefore, it means that we could efficiently use 3 bits instead`
			`# of 8 bits to encode a single pixel and therefore reduce the memory usage by a`
			`# factor of approximately 2.5. We will later discuss about this memory usage.`
			`#`
			`# Encoding strategy`
			`# """""""""""""""""`
			`#`
			`# The compression can be done using a`
			# :class:`~sklearn.preprocessing.KBinsDiscretizer`. We need to choose a strategy
			`# to define the 8 gray values to sub-sample. The simplest strategy is to define`
			# them equally spaced, which correspond to setting `strategy="uniform"`. From
			`# the previous histogram, we know that this strategy is certainly not optimal.`

			`from sklearn.preprocessing import KBinsDiscretizer`

			`n_bins = 8`
			`encoder = KBinsDiscretizer(`
			`n_bins=n_bins,`
			`encode="ordinal",`
			`strategy="uniform",`
			`random_state=0,`
			`)`
			`compressed_raccoon_uniform = encoder.fit_transform(raccoon_face.reshape(-1, 1)).reshape(`
			`raccoon_face.shape`
			`)`

			`fig, ax = plt.subplots(ncols=2, figsize=(12, 4))`
			`ax[0].imshow(compressed_raccoon_uniform, cmap=plt.cm.gray)`
			`ax[0].axis("off")`
			`ax[0].set_title("Rendering of the image")`
			`ax[1].hist(compressed_raccoon_uniform.ravel(), bins=256)`
			`ax[1].set_xlabel("Pixel value")`
			`ax[1].set_ylabel("Count of pixels")`
			`ax[1].set_title("Sub-sampled distribution of the pixel values")`
			`_ = fig.suptitle("Raccoon face compressed using 3 bits and a uniform strategy")`

			`# %%`
			`# Qualitatively, we can spot some small regions where we see the effect of the`
			`# compression (e.g. leaves on the bottom right corner). But after all, the resulting`
			`# image is still looking good.`
			`#`
			`# We observe that the distribution of pixels values have been mapped to 8`
			`# different values. We can check the correspondence between such values and the`
			`# original pixel values.`

			`bin_edges = encoder.bin_edges_[0]`
			`bin_center = bin_edges[:-1] + (bin_edges[1:] - bin_edges[:-1]) / 2`
			`bin_center`

			`# %%`
			`_, ax = plt.subplots()`
			`ax.hist(raccoon_face.ravel(), bins=256)`
			`color = "tab:orange"`
			`for center in bin_center:`
			`ax.axvline(center, color=color)`
			`ax.text(center - 10, ax.get_ybound()[1] + 100, f"{center:.1f}", color=color)`

			`# %%`
			`# As previously stated, the uniform sampling strategy is not optimal. Notice for`
			`# instance that the pixels mapped to the value 7 will encode a rather small`
			`# amount of information, whereas the mapped value 3 will represent a large`
			`# amount of counts. We can instead use a clustering strategy such as k-means to`
			`# find a more optimal mapping.`

			`encoder = KBinsDiscretizer(`
			`n_bins=n_bins,`
			`encode="ordinal",`
			`strategy="kmeans",`
			`random_state=0,`
			`)`
			`compressed_raccoon_kmeans = encoder.fit_transform(raccoon_face.reshape(-1, 1)).reshape(`
			`raccoon_face.shape`
			`)`

			`fig, ax = plt.subplots(ncols=2, figsize=(12, 4))`
			`ax[0].imshow(compressed_raccoon_kmeans, cmap=plt.cm.gray)`
			`ax[0].axis("off")`
			`ax[0].set_title("Rendering of the image")`
			`ax[1].hist(compressed_raccoon_kmeans.ravel(), bins=256)`
			`ax[1].set_xlabel("Pixel value")`
			`ax[1].set_ylabel("Number of pixels")`
			`ax[1].set_title("Distribution of the pixel values")`
			`_ = fig.suptitle("Raccoon face compressed using 3 bits and a K-means strategy")`

			`# %%`
			`bin_edges = encoder.bin_edges_[0]`
			`bin_center = bin_edges[:-1] + (bin_edges[1:] - bin_edges[:-1]) / 2`
			`bin_center`

			`# %%`
			`_, ax = plt.subplots()`
			`ax.hist(raccoon_face.ravel(), bins=256)`
			`color = "tab:orange"`
			`for center in bin_center:`
			`ax.axvline(center, color=color)`
			`ax.text(center - 10, ax.get_ybound()[1] + 100, f"{center:.1f}", color=color)`

			`# %%`
			`# The counts in the bins are now more balanced and their centers are no longer`
			`# equally spaced. Note that we could enforce the same number of pixels per bin`
			# by using the `strategy="quantile"` instead of `strategy="kmeans"`.
			`#`
			`# Memory footprint`
			`# """"""""""""""""`
			`#`
			`# We previously stated that we should save 8 times less memory. Let's verify it.`

			`print(f"The number of bytes taken in RAM is {compressed_raccoon_kmeans.nbytes}")`
			`print(f"Compression ratio: {compressed_raccoon_kmeans.nbytes / raccoon_face.nbytes}")`

			`# %%`
			`# It is quite surprising to see that our compressed image is taking x8 more`
			`# memory than the original image. This is indeed the opposite of what we`
			`# expected. The reason is mainly due to the type of data used to encode the`
			`# image.`

			`print(f"Type of the compressed image: {compressed_raccoon_kmeans.dtype}")`

			`# %%`
			# Indeed, the output of the :class:`~sklearn.preprocessing.KBinsDiscretizer` is
			`# an array of 64-bit float. It means that it takes x8 more memory. However, we`
			`# use this 64-bit float representation to encode 8 values. Indeed, we will save`
			`# memory only if we cast the compressed image into an array of 3-bits integers. We`
			# could use the method `numpy.ndarray.astype`. However, a 3-bits integer
			`# representation does not exist and to encode the 8 values, we would need to use`
			`# the 8-bit unsigned integer representation as well.`
			`#`
			`# In practice, observing a memory gain would require the original image to be in`
			`# a 64-bit float representation.`