sklearn/setup.py

630 lines
22 KiB
Python
Executable File

#! /usr/bin/env python
#
# Copyright (C) 2007-2009 Cournapeau David <cournape@gmail.com>
# 2010 Fabian Pedregosa <fabian.pedregosa@inria.fr>
# License: 3-clause BSD
import importlib
import os
import platform
import shutil
import sys
import traceback
from os.path import join
from setuptools import Command, Extension, setup
from setuptools.command.build_ext import build_ext
try:
import builtins
except ImportError:
# Python 2 compat: just to be able to declare that Python >=3.8 is needed.
import __builtin__ as builtins
# This is a bit (!) hackish: we are setting a global variable so that the main
# sklearn __init__ can detect if it is being loaded by the setup routine, to
# avoid attempting to load components that aren't built yet.
# TODO: can this be simplified or removed since the switch to setuptools
# away from numpy.distutils?
builtins.__SKLEARN_SETUP__ = True
DISTNAME = "scikit-learn"
DESCRIPTION = "A set of python modules for machine learning and data mining"
with open("README.rst") as f:
LONG_DESCRIPTION = f.read()
MAINTAINER = "scikit-learn developers"
MAINTAINER_EMAIL = "scikit-learn@python.org"
URL = "https://scikit-learn.org"
DOWNLOAD_URL = "https://pypi.org/project/scikit-learn/#files"
LICENSE = "new BSD"
PROJECT_URLS = {
"Bug Tracker": "https://github.com/scikit-learn/scikit-learn/issues",
"Documentation": "https://scikit-learn.org/stable/documentation.html",
"Source Code": "https://github.com/scikit-learn/scikit-learn",
}
# We can actually import a restricted version of sklearn that
# does not need the compiled code
import sklearn # noqa
import sklearn._min_dependencies as min_deps # noqa
from sklearn._build_utils import _check_cython_version # noqa
from sklearn.externals._packaging.version import parse as parse_version # noqa
VERSION = sklearn.__version__
# Custom clean command to remove build artifacts
class CleanCommand(Command):
description = "Remove build artifacts from the source tree"
user_options = []
def initialize_options(self):
pass
def finalize_options(self):
pass
def run(self):
# Remove c files if we are not within a sdist package
cwd = os.path.abspath(os.path.dirname(__file__))
remove_c_files = not os.path.exists(os.path.join(cwd, "PKG-INFO"))
if remove_c_files:
print("Will remove generated .c files")
if os.path.exists("build"):
shutil.rmtree("build")
for dirpath, dirnames, filenames in os.walk("sklearn"):
for filename in filenames:
root, extension = os.path.splitext(filename)
if extension in [".so", ".pyd", ".dll", ".pyc"]:
os.unlink(os.path.join(dirpath, filename))
if remove_c_files and extension in [".c", ".cpp"]:
pyx_file = str.replace(filename, extension, ".pyx")
if os.path.exists(os.path.join(dirpath, pyx_file)):
os.unlink(os.path.join(dirpath, filename))
if remove_c_files and extension == ".tp":
if os.path.exists(os.path.join(dirpath, root)):
os.unlink(os.path.join(dirpath, root))
for dirname in dirnames:
if dirname == "__pycache__":
shutil.rmtree(os.path.join(dirpath, dirname))
# Custom build_ext command to set OpenMP compile flags depending on os and
# compiler. Also makes it possible to set the parallelism level via
# and environment variable (useful for the wheel building CI).
# build_ext has to be imported after setuptools
class build_ext_subclass(build_ext):
def finalize_options(self):
build_ext.finalize_options(self)
if self.parallel is None:
# Do not override self.parallel if already defined by
# command-line flag (--parallel or -j)
parallel = os.environ.get("SKLEARN_BUILD_PARALLEL")
if parallel:
self.parallel = int(parallel)
if self.parallel:
print("setting parallel=%d " % self.parallel)
def build_extensions(self):
from sklearn._build_utils.openmp_helpers import get_openmp_flag
# Always use NumPy 1.7 C API for all compiled extensions.
# See: https://numpy.org/doc/stable/reference/c-api/deprecations.html
DEFINE_MACRO_NUMPY_C_API = (
"NPY_NO_DEPRECATED_API",
"NPY_1_7_API_VERSION",
)
for ext in self.extensions:
ext.define_macros.append(DEFINE_MACRO_NUMPY_C_API)
if sklearn._OPENMP_SUPPORTED:
openmp_flag = get_openmp_flag()
for e in self.extensions:
e.extra_compile_args += openmp_flag
e.extra_link_args += openmp_flag
build_ext.build_extensions(self)
def run(self):
# Specifying `build_clib` allows running `python setup.py develop`
# fully from a fresh clone.
self.run_command("build_clib")
build_ext.run(self)
cmdclass = {
"clean": CleanCommand,
"build_ext": build_ext_subclass,
}
def check_package_status(package, min_version):
"""
Returns a dictionary containing a boolean specifying whether given package
is up-to-date, along with the version string (empty string if
not installed).
"""
package_status = {}
try:
module = importlib.import_module(package)
package_version = module.__version__
package_status["up_to_date"] = parse_version(package_version) >= parse_version(
min_version
)
package_status["version"] = package_version
except ImportError:
traceback.print_exc()
package_status["up_to_date"] = False
package_status["version"] = ""
req_str = "scikit-learn requires {} >= {}.\n".format(package, min_version)
instructions = (
"Installation instructions are available on the "
"scikit-learn website: "
"https://scikit-learn.org/stable/install.html\n"
)
if package_status["up_to_date"] is False:
if package_status["version"]:
raise ImportError(
"Your installation of {} {} is out-of-date.\n{}{}".format(
package, package_status["version"], req_str, instructions
)
)
else:
raise ImportError(
"{} is not installed.\n{}{}".format(package, req_str, instructions)
)
extension_config = {
"__check_build": [
{"sources": ["_check_build.pyx"]},
],
"": [
{"sources": ["_isotonic.pyx"]},
],
"_loss": [
{"sources": ["_loss.pyx.tp"]},
],
"cluster": [
{"sources": ["_dbscan_inner.pyx"], "language": "c++"},
{"sources": ["_hierarchical_fast.pyx"], "language": "c++", "include_np": True},
{"sources": ["_k_means_common.pyx"], "include_np": True},
{"sources": ["_k_means_lloyd.pyx"], "include_np": True},
{"sources": ["_k_means_elkan.pyx"], "include_np": True},
{"sources": ["_k_means_minibatch.pyx"], "include_np": True},
],
"cluster._hdbscan": [
{"sources": ["_linkage.pyx"], "include_np": True},
{"sources": ["_reachability.pyx"], "include_np": True},
{"sources": ["_tree.pyx"], "include_np": True},
],
"datasets": [
{
"sources": ["_svmlight_format_fast.pyx"],
"include_np": True,
"compile_for_pypy": False,
}
],
"decomposition": [
{"sources": ["_online_lda_fast.pyx"]},
{"sources": ["_cdnmf_fast.pyx"], "include_np": True},
],
"ensemble": [
{"sources": ["_gradient_boosting.pyx"], "include_np": True},
],
"ensemble._hist_gradient_boosting": [
{"sources": ["_gradient_boosting.pyx"]},
{"sources": ["histogram.pyx"]},
{"sources": ["splitting.pyx"]},
{"sources": ["_binning.pyx"]},
{"sources": ["_predictor.pyx"]},
{"sources": ["_bitset.pyx"]},
{"sources": ["common.pyx"]},
],
"feature_extraction": [
{"sources": ["_hashing_fast.pyx"], "language": "c++", "include_np": True},
],
"linear_model": [
{"sources": ["_cd_fast.pyx"]},
{"sources": ["_sgd_fast.pyx.tp"]},
{"sources": ["_sag_fast.pyx.tp"]},
],
"manifold": [
{"sources": ["_utils.pyx"]},
{"sources": ["_barnes_hut_tsne.pyx"], "include_np": True},
],
"metrics": [
{"sources": ["_pairwise_fast.pyx"]},
{
"sources": ["_dist_metrics.pyx.tp", "_dist_metrics.pxd.tp"],
"include_np": True,
},
],
"metrics.cluster": [
{"sources": ["_expected_mutual_info_fast.pyx"]},
],
"metrics._pairwise_distances_reduction": [
{
"sources": ["_datasets_pair.pyx.tp", "_datasets_pair.pxd.tp"],
"language": "c++",
"include_np": True,
"extra_compile_args": ["-std=c++11"],
},
{
"sources": ["_middle_term_computer.pyx.tp", "_middle_term_computer.pxd.tp"],
"language": "c++",
"extra_compile_args": ["-std=c++11"],
},
{
"sources": ["_base.pyx.tp", "_base.pxd.tp"],
"language": "c++",
"include_np": True,
"extra_compile_args": ["-std=c++11"],
},
{
"sources": ["_argkmin.pyx.tp", "_argkmin.pxd.tp"],
"language": "c++",
"include_np": True,
"extra_compile_args": ["-std=c++11"],
},
{
"sources": ["_argkmin_classmode.pyx.tp"],
"language": "c++",
"include_np": True,
"extra_compile_args": ["-std=c++11"],
},
{
"sources": ["_radius_neighbors.pyx.tp", "_radius_neighbors.pxd.tp"],
"language": "c++",
"include_np": True,
"extra_compile_args": ["-std=c++11"],
},
{
"sources": ["_radius_neighbors_classmode.pyx.tp"],
"language": "c++",
"include_np": True,
"extra_compile_args": ["-std=c++11"],
},
],
"preprocessing": [
{"sources": ["_csr_polynomial_expansion.pyx"]},
{
"sources": ["_target_encoder_fast.pyx"],
"language": "c++",
"extra_compile_args": ["-std=c++11"],
},
],
"neighbors": [
{"sources": ["_binary_tree.pxi.tp"], "include_np": True},
{"sources": ["_ball_tree.pyx.tp"], "include_np": True},
{"sources": ["_kd_tree.pyx.tp"], "include_np": True},
{"sources": ["_partition_nodes.pyx"], "language": "c++", "include_np": True},
{"sources": ["_quad_tree.pyx"], "include_np": True},
],
"svm": [
{
"sources": ["_newrand.pyx"],
"include_dirs": [join("src", "newrand")],
"language": "c++",
# Use C++11 random number generator fix
"extra_compile_args": ["-std=c++11"],
},
{
"sources": ["_libsvm.pyx"],
"depends": [
join("src", "libsvm", "libsvm_helper.c"),
join("src", "libsvm", "libsvm_template.cpp"),
join("src", "libsvm", "svm.cpp"),
join("src", "libsvm", "svm.h"),
join("src", "newrand", "newrand.h"),
],
"include_dirs": [
join("src", "libsvm"),
join("src", "newrand"),
],
"libraries": ["libsvm-skl"],
"extra_link_args": ["-lstdc++"],
},
{
"sources": ["_liblinear.pyx"],
"libraries": ["liblinear-skl"],
"include_dirs": [
join("src", "liblinear"),
join("src", "newrand"),
join("..", "utils"),
],
"depends": [
join("src", "liblinear", "tron.h"),
join("src", "liblinear", "linear.h"),
join("src", "liblinear", "liblinear_helper.c"),
join("src", "newrand", "newrand.h"),
],
"extra_link_args": ["-lstdc++"],
},
{
"sources": ["_libsvm_sparse.pyx"],
"libraries": ["libsvm-skl"],
"include_dirs": [
join("src", "libsvm"),
join("src", "newrand"),
],
"depends": [
join("src", "libsvm", "svm.h"),
join("src", "newrand", "newrand.h"),
join("src", "libsvm", "libsvm_sparse_helper.c"),
],
"extra_link_args": ["-lstdc++"],
},
],
"tree": [
{
"sources": ["_tree.pyx"],
"language": "c++",
"include_np": True,
"optimization_level": "O3",
},
{"sources": ["_splitter.pyx"], "include_np": True, "optimization_level": "O3"},
{"sources": ["_criterion.pyx"], "include_np": True, "optimization_level": "O3"},
{"sources": ["_utils.pyx"], "include_np": True, "optimization_level": "O3"},
],
"utils": [
{"sources": ["sparsefuncs_fast.pyx"]},
{"sources": ["_cython_blas.pyx"]},
{"sources": ["arrayfuncs.pyx"]},
{
"sources": ["murmurhash.pyx", join("src", "MurmurHash3.cpp")],
"include_dirs": ["src"],
},
{"sources": ["_fast_dict.pyx"], "language": "c++"},
{"sources": ["_openmp_helpers.pyx"]},
{"sources": ["_seq_dataset.pyx.tp", "_seq_dataset.pxd.tp"]},
{"sources": ["_weight_vector.pyx.tp", "_weight_vector.pxd.tp"]},
{"sources": ["_random.pyx"]},
{"sources": ["_typedefs.pyx"]},
{"sources": ["_heap.pyx"]},
{"sources": ["_sorting.pyx"]},
{"sources": ["_vector_sentinel.pyx"], "language": "c++", "include_np": True},
{"sources": ["_isfinite.pyx"]},
],
}
# Paths in `libraries` must be relative to the root directory because `libraries` is
# passed directly to `setup`
libraries = [
(
"libsvm-skl",
{
"sources": [
join("sklearn", "svm", "src", "libsvm", "libsvm_template.cpp"),
],
"depends": [
join("sklearn", "svm", "src", "libsvm", "svm.cpp"),
join("sklearn", "svm", "src", "libsvm", "svm.h"),
join("sklearn", "svm", "src", "newrand", "newrand.h"),
],
# Use C++11 to use the random number generator fix
"extra_compiler_args": ["-std=c++11"],
"extra_link_args": ["-lstdc++"],
},
),
(
"liblinear-skl",
{
"sources": [
join("sklearn", "svm", "src", "liblinear", "linear.cpp"),
join("sklearn", "svm", "src", "liblinear", "tron.cpp"),
],
"depends": [
join("sklearn", "svm", "src", "liblinear", "linear.h"),
join("sklearn", "svm", "src", "liblinear", "tron.h"),
join("sklearn", "svm", "src", "newrand", "newrand.h"),
],
# Use C++11 to use the random number generator fix
"extra_compiler_args": ["-std=c++11"],
"extra_link_args": ["-lstdc++"],
},
),
]
def configure_extension_modules():
# Skip cythonization as we do not want to include the generated
# C/C++ files in the release tarballs as they are not necessarily
# forward compatible with future versions of Python for instance.
if "sdist" in sys.argv or "--help" in sys.argv:
return []
import numpy
from sklearn._build_utils import cythonize_extensions, gen_from_templates
is_pypy = platform.python_implementation() == "PyPy"
np_include = numpy.get_include()
default_optimization_level = "O2"
if os.name == "posix":
default_libraries = ["m"]
else:
default_libraries = []
default_extra_compile_args = []
build_with_debug_symbols = (
os.environ.get("SKLEARN_BUILD_ENABLE_DEBUG_SYMBOLS", "0") != "0"
)
if os.name == "posix":
if build_with_debug_symbols:
default_extra_compile_args.append("-g")
else:
# Setting -g0 will strip symbols, reducing the binary size of extensions
default_extra_compile_args.append("-g0")
cython_exts = []
for submodule, extensions in extension_config.items():
submodule_parts = submodule.split(".")
parent_dir = join("sklearn", *submodule_parts)
for extension in extensions:
if is_pypy and not extension.get("compile_for_pypy", True):
continue
# Generate files with Tempita
tempita_sources = []
sources = []
for source in extension["sources"]:
source = join(parent_dir, source)
new_source_path, path_ext = os.path.splitext(source)
if path_ext != ".tp":
sources.append(source)
continue
# `source` is a Tempita file
tempita_sources.append(source)
# Only include source files that are pyx files
if os.path.splitext(new_source_path)[-1] == ".pyx":
sources.append(new_source_path)
gen_from_templates(tempita_sources)
# Do not progress if we only have a tempita file which we don't
# want to include like the .pxi.tp extension. In such a case
# sources would be empty.
if not sources:
continue
# By convention, our extensions always use the name of the first source
source_name = os.path.splitext(os.path.basename(sources[0]))[0]
if submodule:
name_parts = ["sklearn", submodule, source_name]
else:
name_parts = ["sklearn", source_name]
name = ".".join(name_parts)
# Make paths start from the root directory
include_dirs = [
join(parent_dir, include_dir)
for include_dir in extension.get("include_dirs", [])
]
if extension.get("include_np", False):
include_dirs.append(np_include)
depends = [
join(parent_dir, depend) for depend in extension.get("depends", [])
]
extra_compile_args = (
extension.get("extra_compile_args", []) + default_extra_compile_args
)
optimization_level = extension.get(
"optimization_level", default_optimization_level
)
if os.name == "posix":
extra_compile_args.append(f"-{optimization_level}")
else:
extra_compile_args.append(f"/{optimization_level}")
libraries_ext = extension.get("libraries", []) + default_libraries
new_ext = Extension(
name=name,
sources=sources,
language=extension.get("language", None),
include_dirs=include_dirs,
libraries=libraries_ext,
depends=depends,
extra_link_args=extension.get("extra_link_args", None),
extra_compile_args=extra_compile_args,
)
cython_exts.append(new_ext)
return cythonize_extensions(cython_exts)
def setup_package():
python_requires = ">=3.9"
required_python_version = (3, 9)
metadata = dict(
name=DISTNAME,
maintainer=MAINTAINER,
maintainer_email=MAINTAINER_EMAIL,
description=DESCRIPTION,
license=LICENSE,
url=URL,
download_url=DOWNLOAD_URL,
project_urls=PROJECT_URLS,
version=VERSION,
long_description=LONG_DESCRIPTION,
classifiers=[
"Intended Audience :: Science/Research",
"Intended Audience :: Developers",
"License :: OSI Approved :: BSD License",
"Programming Language :: C",
"Programming Language :: Python",
"Topic :: Software Development",
"Topic :: Scientific/Engineering",
"Development Status :: 5 - Production/Stable",
"Operating System :: Microsoft :: Windows",
"Operating System :: POSIX",
"Operating System :: Unix",
"Operating System :: MacOS",
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
"Programming Language :: Python :: Implementation :: CPython",
"Programming Language :: Python :: Implementation :: PyPy",
],
cmdclass=cmdclass,
python_requires=python_requires,
install_requires=min_deps.tag_to_packages["install"],
package_data={
"": ["*.csv", "*.gz", "*.txt", "*.pxd", "*.rst", "*.jpg", "*.css"]
},
zip_safe=False, # the package can run out of an .egg file
extras_require={
key: min_deps.tag_to_packages[key]
for key in ["examples", "docs", "tests", "benchmark"]
},
)
commands = [arg for arg in sys.argv[1:] if not arg.startswith("-")]
if not all(
command in ("egg_info", "dist_info", "clean", "check") for command in commands
):
if sys.version_info < required_python_version:
required_version = "%d.%d" % required_python_version
raise RuntimeError(
"Scikit-learn requires Python %s or later. The current"
" Python version is %s installed in %s."
% (required_version, platform.python_version(), sys.executable)
)
check_package_status("numpy", min_deps.NUMPY_MIN_VERSION)
check_package_status("scipy", min_deps.SCIPY_MIN_VERSION)
_check_cython_version()
metadata["ext_modules"] = configure_extension_modules()
metadata["libraries"] = libraries
setup(**metadata)
if __name__ == "__main__":
setup_package()