diff --git a/.gitignore b/.gitignore index ad42b86..56c6c12 100644 --- a/.gitignore +++ b/.gitignore @@ -9,6 +9,7 @@ __pycache__/ # Mac files .idea +.DS_Store # Distribution / packaging .Python @@ -50,6 +51,7 @@ nosetests.xml coverage.xml *.cover .hypothesis/ +notebooks/ # Translations *.mo @@ -110,3 +112,6 @@ venv.bak/ # Temporary install .unipls.egg-info + +# Claude config files +.claude/ \ No newline at end of file diff --git a/LICENSE.txt b/LICENSE.txt index 4e57206..b561f4b 100644 --- a/LICENSE.txt +++ b/LICENSE.txt @@ -1,6 +1,7 @@ BSD 3-Clause License Copyright (c) 2018, Andreas Baum and Laurent Vermue +Copyright (c) 2026, Lukas Kopecky, Imperial College London All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/README.rst b/README.rst index efa82d9..0ba0bcf 100644 --- a/README.rst +++ b/README.rst @@ -1,14 +1,12 @@ Multiblock Partial Least Squares Package ======================================== -.. image:: https://img.shields.io/pypi/v/mbpls.svg - :target: https://pypi.python.org/pypi/mbpls + +.. image:: https://img.shields.io/pypi/v/multiblock-pls.svg + :target: https://pypi.org/project/multiblock-pls/ :alt: Pypi Version -.. image:: https://travis-ci.com/DTUComputeStatisticsAndDataAnalysis/MBPLS.svg?branch=master - :target: https://travis-ci.com/DTUComputeStatisticsAndDataAnalysis/MBPLS - :alt: Build Status -.. image:: https://img.shields.io/pypi/l/mbpls.svg - :target: https://pypi.python.org/pypi/mbpls/ +.. image:: https://img.shields.io/pypi/l/multiblock-pls.svg + :target: https://pypi.python.org/pypi/multiblock-pls :alt: License .. image:: https://readthedocs.org/projects/mbpls/badge/?version=latest :target: https://mbpls.readthedocs.io/en/latest/?badge=latest @@ -17,6 +15,11 @@ Multiblock Partial Least Squares Package :target: https://doi.org/10.21105/joss.01190 :alt: JOSS Paper DOI + +*This is a newly maintained version of the MBPLS software originally developed by Andreas Baum and Laurent Vermue +(homepage: https://github.com/DTUComputeStatisticsAndDataAnalysis/MBPLS/). This maintained version has been updated to be compatible with Python 3.8 and later. +Lukas Kopecky, April 2026.* + An easy to use Python package for (Multiblock) Partial Least Squares prediction modelling of univariate or multivariate outcomes. Four state of the art algorithms have been implemented and optimized for robust @@ -27,7 +30,7 @@ toolbox. The documentation is available at https://mbpls.readthedocs.io and elaborate (real-world) Jupyter Notebook examples can be found at -https://github.com/DTUComputeStatisticsAndDataAnalysis/MBPLS/tree/master/examples +https://github.com/kopeckylukas/MB-PLS/tree/master/examples This package can be cited using the following reference. @@ -41,7 +44,7 @@ Installation - | Install the package for Python3 using the following command. Some dependencies might require an upgrade (scikit-learn, numpy and scipy). - | ``$ pip install mbpls`` + | ``$ pip install multiblock-pls`` - | Now you can import the MBPLS class by typing | ``from mbpls.mbpls import MBPLS`` diff --git a/mbpls/__init__.py b/mbpls/__init__.py index 721825e..0cd2072 100644 --- a/mbpls/__init__.py +++ b/mbpls/__init__.py @@ -22,4 +22,4 @@ __all__ = ["mbpls", "data"] -__version__ = "1.0.4" \ No newline at end of file +__version__ = "1.1.0" \ No newline at end of file diff --git a/mbpls/mbpls.py b/mbpls/mbpls.py index b4dcd73..eba11cf 100644 --- a/mbpls/mbpls.py +++ b/mbpls/mbpls.py @@ -290,7 +290,7 @@ def fit(self, X, Y): self.method = 'NIPALS' global U_, T_, R_ - Y = check_array(Y, dtype=np.float64, ensure_2d=False, force_all_finite=not self.sparse_data, copy=self.copy) + Y = check_array(Y, dtype=np.float64, ensure_2d=False, ensure_all_finite=not self.sparse_data, copy=self.copy) if self.sparse_data is True: self.sparse_Y_info_ = {} self.sparse_Y_info_['Y'] = self.check_sparsity_level(Y) @@ -308,14 +308,14 @@ def fit(self, X, Y): # Check dimensions check_consistent_length(X[block], Y) X[block] = check_array(X[block], dtype=np.float64, copy=self.copy, - force_all_finite=not self.sparse_data) + ensure_all_finite=not self.sparse_data) if self.sparse_data is True: self.sparse_X_info_[block] = self.check_sparsity_level(X[block]) X[block] = self.x_scalers_[block].fit_transform(X[block]) else: self.x_scalers_.append(StandardScaler(with_mean=True, with_std=True)) # Check dimensions - X = check_array(X, dtype=np.float64, copy=self.copy, force_all_finite=not self.sparse_data) + X = check_array(X, dtype=np.float64, copy=self.copy, ensure_all_finite=not self.sparse_data) if self.sparse_data is True: self.sparse_X_info_ = {} self.sparse_X_info_[0] = self.check_sparsity_level(X) @@ -334,12 +334,12 @@ def fit(self, X, Y): # Check dimensions check_consistent_length(X[block], Y) X[block] = check_array(X[block], dtype=np.float64, copy=self.copy, - force_all_finite=not self.sparse_data) + ensure_all_finite=not self.sparse_data) if self.sparse_data is True: self.sparse_X_info_[block] = self.check_sparsity_level(X[block]) else: # Check dimensions - X = check_array(X, dtype=np.float64, copy=self.copy, force_all_finite=not self.sparse_data) + X = check_array(X, dtype=np.float64, copy=self.copy, ensure_all_finite=not self.sparse_data) if self.sparse_data is True: self.sparse_X_info_ = {} self.sparse_X_info_[0] = self.check_sparsity_level(X) @@ -358,7 +358,7 @@ def fit(self, X, Y): self.W_ = [] self.W_non_normal_ = [] - if self.method is not 'SIMPLS': + if self.method != 'SIMPLS': self.A_ = np.empty((self.num_blocks_, 0)) self.A_corrected_ = np.empty((self.num_blocks_, 0)) self.T_ = [] @@ -372,7 +372,7 @@ def fit(self, X, Y): for block in range(self.num_blocks_): self.W_.append(np.empty((X[block].shape[1], 0))) self.W_non_normal_.append(np.empty((X[block].shape[1], 0))) - if self.method is not 'SIMPLS': + if self.method != 'SIMPLS': self.T_.append(np.empty((X[block].shape[0], 0))) # Concatenate X blocks @@ -1091,13 +1091,13 @@ def transform(self, X, Y=None, return_block_scores=False, copy=True): X = deepcopy(X) for block in range(len(X)): # Check dimensions - X[block] = check_array(X[block], dtype=np.float64, force_all_finite=not self.sparse_data, copy=copy) + X[block] = check_array(X[block], dtype=np.float64, ensure_all_finite=not self.sparse_data, copy=copy) if self.sparse_data: sparse_X_info_[block] = self.check_sparsity_level(X[block]) X[block] = self.x_scalers_[block].transform(X[block]) else: # Check dimensions - X = check_array(X, dtype=np.float64, force_all_finite=not self.sparse_data, copy=copy) + X = check_array(X, dtype=np.float64, ensure_all_finite=not self.sparse_data, copy=copy) if self.sparse_data: sparse_X_info_[0] = self.check_sparsity_level(X) X = [self.x_scalers_[0].transform(X)] @@ -1117,14 +1117,14 @@ def transform(self, X, Y=None, return_block_scores=False, copy=True): Ts_ = X_comp.dot(self.R_) if Y is not None: - Y = check_array(Y, dtype=np.float64, ensure_2d=False, force_all_finite=not self.sparse_data, copy=copy) + Y = check_array(Y, dtype=np.float64, ensure_2d=False, ensure_all_finite=not self.sparse_data, copy=copy) if self.sparse_data: sparse_Y_info_['Y'] = self.check_sparsity_level(Y) if Y.ndim == 1: Y = Y.reshape(-1, 1) Y = self.y_scaler_.transform(Y) # Here the block scores are calculated iteratively for new blocks - if self.method is not 'SIMPLS': + if self.method != 'SIMPLS': T_ = [] for block in range(self.num_blocks_): T_.append(np.empty((X[block].shape[0], 0))) @@ -1182,7 +1182,7 @@ def transform(self, X, Y=None, return_block_scores=False, copy=True): U_ = Y.dot(self.V_) / np.linalg.norm(Y.dot(self.V_), axis=0) return Ts_, U_ else: - if self.method is not 'SIMPLS': + if self.method != 'SIMPLS': # Here the block scores are calculated iteratively for new blocks T_ = [] for block in range(self.num_blocks_): @@ -1225,12 +1225,12 @@ def transform(self, X, Y=None, return_block_scores=False, copy=True): X = deepcopy(X) for block in range(len(X)): # Check dimensions - X[block] = check_array(X[block], dtype=np.float64, force_all_finite=not self.sparse_data, copy=copy) + X[block] = check_array(X[block], dtype=np.float64, ensure_all_finite=not self.sparse_data, copy=copy) if self.sparse_data: sparse_X_info_[block] = self.check_sparsity_level(X[block]) else: # Check dimensions - X = check_array(X, dtype=np.float64, force_all_finite=not self.sparse_data, copy=copy) + X = check_array(X, dtype=np.float64, ensure_all_finite=not self.sparse_data, copy=copy) if self.sparse_data: sparse_X_info_[0] = self.check_sparsity_level(X) X = [X] @@ -1250,7 +1250,7 @@ def transform(self, X, Y=None, return_block_scores=False, copy=True): Ts_ = X_comp.dot(self.R_) if Y is not None: - Y = check_array(Y, dtype=np.float64, ensure_2d=False, force_all_finite=not self.sparse_data, copy=copy) + Y = check_array(Y, dtype=np.float64, ensure_2d=False, ensure_all_finite=not self.sparse_data, copy=copy) if self.sparse_data: sparse_Y_info_['Y'] = self.check_sparsity_level(Y) if Y.ndim == 1: @@ -1365,10 +1365,10 @@ def predict(self, X, copy=True): X = deepcopy(X) for block in range(len(X)): # Check dimensions - X[block] = check_array(X[block], dtype=np.float64, force_all_finite=not self.sparse_data, copy=copy) + X[block] = check_array(X[block], dtype=np.float64, ensure_all_finite=not self.sparse_data, copy=copy) X[block] = self.x_scalers_[block].transform(X[block]) else: - X = check_array(X, dtype=np.float64, force_all_finite=not self.sparse_data, copy=copy) + X = check_array(X, dtype=np.float64, ensure_all_finite=not self.sparse_data, copy=copy) X = [self.x_scalers_[0].transform(X)] @@ -1390,9 +1390,9 @@ def predict(self, X, copy=True): X = deepcopy(X) for block in range(len(X)): # Check dimensions - X[block] = check_array(X[block], dtype=np.float64, force_all_finite=not self.sparse_data, copy=copy) + X[block] = check_array(X[block], dtype=np.float64, ensure_all_finite=not self.sparse_data, copy=copy) else: - X = check_array(X, dtype=np.float64, force_all_finite=not self.sparse_data, copy=copy) + X = check_array(X, dtype=np.float64, ensure_all_finite=not self.sparse_data, copy=copy) X = [X] X = np.hstack(X) if self.sparse_data: @@ -1493,7 +1493,7 @@ def plot(self, num_components=2): for block in range(self.num_blocks_): # Inverse transforming weights/loadings if self.standardize: - P_inv_trans.append(self.x_scalers_[block].inverse_transform(self.P_[block][:, comp])) + P_inv_trans.append(self.x_scalers_[block].inverse_transform(self.P_[block][:, comp].reshape(1, -1)).flatten()) else: P_inv_trans.append(self.P_[block][:, comp]) diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..737e278 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,3 @@ +[build-system] +requires = ["setuptools>=64", "wheel"] +build-backend = "setuptools.build_meta" \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 4df1cd2..6b47760 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,5 @@ -numpy>=1.13.3 -scipy>=1.0.0 -scikit-learn>=0.20.0 -pandas>=0.20.0 \ No newline at end of file +numpy>=1.19.5 +scipy>=1.6.0 +scikit-learn>=1.6.0 +pandas>=2.0.0 +matplotlib>=3.0.0 diff --git a/setup.py b/setup.py index 605396f..606c0cc 100644 --- a/setup.py +++ b/setup.py @@ -9,21 +9,23 @@ from setuptools import setup, find_packages -import mbpls +# import mbpls -NAME = "mbpls" -VERSION = mbpls.__version__ +NAME = "multiblock-pls" +# VERSION = mbpls.__version__ +VERSION = '1.1.0' DESCRIPTION = "An implementation of the most common partial least squares algorithms as multi-block methods" -URL = 'https://github.com/DTUComputeStatisticsAndDataAnalysis/MBPLS' -AUTHORS = "Andreas Baum, Laurent Vermue" -AUTHOR_MAILS = ", " +URL = 'https://github.com/kopeckylukas/MB-PLS' +AUTHORS = "Andreas Baum, Laurent Vermue, Lukas Kopecky" +AUTHOR_MAILS = ", , " LICENSE = 'new BSD' # This is the lowest tested version. Below might work as well -NUMPY_MIN_VERSION = '1.13.3' -SCIPY_MIN_VERSION = '1.0.0' -SCIKIT_LEARN_MIN_VERSION = '0.22.1' -PANDAS_MIN_VERSION = '0.20.0' +NUMPY_MIN_VERSION = '1.19.5' +SCIPY_MIN_VERSION = '1.6.0' +SCIKIT_LEARN_MIN_VERSION = '1.6.0' +PANDAS_MIN_VERSION = '2.0.0' +MATPLOTLIB_MIN_VERSION = '3.0.0' def setup_package(): with open('README.rst') as f: @@ -50,9 +52,12 @@ def setup_package(): 'Operating System :: POSIX', 'Operating System :: Unix', 'Operating System :: MacOS', - 'Programming Language :: Python :: 3.5', - 'Programming Language :: Python :: 3.6', - 'Programming Language :: Python :: 3.7', + 'Programming Language :: Python :: 3.9', + 'Programming Language :: Python :: 3.10', + 'Programming Language :: Python :: 3.11', + 'Programming Language :: Python :: 3.12', + 'Programming Language :: Python :: 3.13', + 'Programming Language :: Python :: 3.14', # 'Development Status :: 4 - Beta', 'Development Status :: 5 - Production/Stable' ], @@ -60,7 +65,8 @@ def setup_package(): 'numpy>={0}'.format(NUMPY_MIN_VERSION), 'scipy>={0}'.format(SCIPY_MIN_VERSION), 'scikit-learn>={0}'.format(SCIKIT_LEARN_MIN_VERSION), - 'pandas>={0}'.format(PANDAS_MIN_VERSION) + 'pandas>={0}'.format(PANDAS_MIN_VERSION), + 'matplotlib>={0}'.format(MATPLOTLIB_MIN_VERSION) ], extras_require={ 'tests': [ @@ -70,12 +76,9 @@ def setup_package(): 'sphinx_rtd_theme', 'nbsphinx', 'nbsphinx_link' - ], - 'extras': [ - 'matplotlib', ], }, - python_requires='>=3.5', + python_requires='>=3.9', ) if __name__ == '__main__':