Not compatible with high dimentional data

HI, I use your example and want to apply for high dim data, however, it reports error

import numpy as np
from coroica import CoroICA, UwedgeICA
from matplotlib import pyplot as plt
from sklearn.decomposition import FastICA
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import cross_val_predict
from sklearn.pipeline import Pipeline

X = np.random.randn(348, 22792) #your example 25 is fine
y = np.random.randn(348,)

group_index = np.zeros(348,)
group_index[150:] = 1
define coroICA-based pipeline

model_coroICA = Pipeline(steps=[
('coroICA', CoroICA(n_components=10,
max_matrices='no_partitions',
pairing='allpairs')),
('regression', LinearRegression())])
get cross-validated predictions with coroICA-based pipeline

y_hat_coroICA = cross_val_predict(
model_coroICA,
X,
y,
fit_params={'coroICA__group_index': group_index})

Cell In[20], line 24
17 model_coroICA = Pipeline(steps=[
18 ('coroICA', CoroICA(n_components=10,
19 max_matrices='no_partitions',
20 pairing='allpairs')),
21 ('regression', LinearRegression())])
23 # get cross-validated predictions with coroICA-based pipeline
---> 24 y_hat_coroICA = cross_val_predict(
25 model_coroICA,
26 X,
27 y,
28 fit_params={'coroICA__group_index': group_index})

File ~/anaconda3/lib/python3.8/site-packages/sklearn/model_selection/_validation.py:962, in cross_val_predict(estimator, X, y, groups, cv, n_jobs, verbose, fit_params, pre_dispatch, method)
959 # We clone the estimator to make sure that all the folds are
960 # independent, and that it is pickle-able.
961 parallel = Parallel(n_jobs=n_jobs, verbose=verbose, pre_dispatch=pre_dispatch)
--> 962 predictions = parallel(
963 delayed(_fit_and_predict)(
964 clone(estimator), X, y, train, test, verbose, fit_params, method
965 )
966 for train, test in splits
967 )
969 inv_test_indices = np.empty(len(test_indices), dtype=int)
970 inv_test_indices[test_indices] = np.arange(len(test_indices))

File ~/anaconda3/lib/python3.8/site-packages/joblib/parallel.py:1085, in Parallel.call(self, iterable)
1076 try:
1077 # Only set self._iterating to True if at least a batch
1078 # was dispatched. In particular this covers the edge
(...)
1082 # was very quick and its callback already dispatched all the
1083 # remaining jobs.
1084 self._iterating = False
-> 1085 if self.dispatch_one_batch(iterator):
1086 self._iterating = self._original_iterator is not None
1088 while self.dispatch_one_batch(iterator):

File ~/anaconda3/lib/python3.8/site-packages/joblib/parallel.py:901, in Parallel.dispatch_one_batch(self, iterator)
899 return False
900 else:
--> 901 self._dispatch(tasks)
902 return True

File ~/anaconda3/lib/python3.8/site-packages/joblib/parallel.py:819, in Parallel._dispatch(self, batch)
817 with self._lock:
818 job_idx = len(self._jobs)
--> 819 job = self._backend.apply_async(batch, callback=cb)
820 # A job can complete so quickly than its callback is
821 # called before we get here, causing self._jobs to
822 # grow. To ensure correct results ordering, .insert is
823 # used (rather than .append) in the following line
824 self._jobs.insert(job_idx, job)

File ~/anaconda3/lib/python3.8/site-packages/joblib/_parallel_backends.py:208, in SequentialBackend.apply_async(self, func, callback)
206 def apply_async(self, func, callback=None):
207 """Schedule a func to be run"""
--> 208 result = ImmediateResult(func)
209 if callback:
210 callback(result)

File ~/anaconda3/lib/python3.8/site-packages/joblib/_parallel_backends.py:597, in ImmediateResult.init(self, batch)
594 def init(self, batch):
595 # Don't delay the application, to avoid keeping the input
596 # arguments in memory
--> 597 self.results = batch()

File ~/anaconda3/lib/python3.8/site-packages/joblib/parallel.py:288, in BatchedCalls.call(self)
284 def call(self):
285 # Set the default nested backend to self._backend but do not set the
286 # change the default number of processes to -1
287 with parallel_backend(self._backend, n_jobs=self._n_jobs):
--> 288 return [func(*args, **kwargs)
289 for func, args, kwargs in self.items]

File ~/anaconda3/lib/python3.8/site-packages/joblib/parallel.py:288, in (.0)
284 def call(self):
285 # Set the default nested backend to self._backend but do not set the
286 # change the default number of processes to -1
287 with parallel_backend(self._backend, n_jobs=self._n_jobs):
--> 288 return [func(*args, **kwargs)
289 for func, args, kwargs in self.items]

File ~/anaconda3/lib/python3.8/site-packages/sklearn/utils/fixes.py:216, in _FuncWrapper.call(self, *args, **kwargs)
214 def call(self, *args, **kwargs):
215 with config_context(**self.config):
--> 216 return self.function(*args, **kwargs)

File ~/anaconda3/lib/python3.8/site-packages/sklearn/model_selection/_validation.py:1044, in _fit_and_predict(estimator, X, y, train, test, verbose, fit_params, method)
1042 estimator.fit(X_train, **fit_params)
1043 else:
-> 1044 estimator.fit(X_train, y_train, **fit_params)
1045 func = getattr(estimator, method)
1046 predictions = func(X_test)

File ~/anaconda3/lib/python3.8/site-packages/sklearn/pipeline.py:390, in Pipeline.fit(self, X, y, **fit_params)
364 """Fit the model.
365
366 Fit all the transformers one after the other and transform the
(...)
387 Pipeline with fitted steps.
388 """
389 fit_params_steps = self._check_fit_params(**fit_params)
--> 390 Xt = self._fit(X, y, **fit_params_steps)
391 with _print_elapsed_time("Pipeline", self._log_message(len(self.steps) - 1)):
392 if self._final_estimator != "passthrough":

File ~/anaconda3/lib/python3.8/site-packages/sklearn/pipeline.py:348, in Pipeline._fit(self, X, y, **fit_params_steps)
346 cloned_transformer = clone(transformer)
347 # Fit or load from cache the current transformer
--> 348 X, fitted_transformer = fit_transform_one_cached(
349 cloned_transformer,
350 X,
351 y,
352 None,
353 message_clsname="Pipeline",
354 message=self._log_message(step_idx),
355 **fit_params_steps[name],
356 )
357 # Replace the transformer of the step with the fitted
358 # transformer. This is necessary when loading the transformer
359 # from the cache.
360 self.steps[step_idx] = (name, fitted_transformer)

File ~/anaconda3/lib/python3.8/site-packages/joblib/memory.py:349, in NotMemorizedFunc.call(self, *args, **kwargs)
348 def call(self, *args, **kwargs):
--> 349 return self.func(*args, **kwargs)

File ~/anaconda3/lib/python3.8/site-packages/sklearn/pipeline.py:893, in _fit_transform_one(transformer, X, y, weight, message_clsname, message, **fit_params)
891 with _print_elapsed_time(message_clsname, message):
892 if hasattr(transformer, "fit_transform"):
--> 893 res = transformer.fit_transform(X, y, **fit_params)
894 else:
895 res = transformer.fit(X, y, **fit_params).transform(X)

File ~/anaconda3/lib/python3.8/site-packages/sklearn/base.py:855, in TransformerMixin.fit_transform(self, X, y, **fit_params)
852 return self.fit(X, **fit_params).transform(X)
853 else:
854 # fit method of arity 2 (supervised transformation)
--> 855 return self.fit(X, y, **fit_params).transform(X)

File ~/anaconda3/lib/python3.8/site-packages/coroica/coroica.py:312, in CoroICA.fit(self, X, y, group_index, partition_index)
309 Rx0 = np.cov(X)
311 # joint diagonalisation
--> 312 self.V_, self.converged_, self.n_iter_, self.meanoffdiag_ = uwedge(
313 covmats,
314 Rx0=Rx0,
315 eps=self.tol,
316 minimize_loss=self.minimize_loss,
317 n_iter_max=self.max_iter,
318 n_components=self.n_components_uwedge,
319 condition_threshold=self.condition_threshold)
321 # normalise V
322 normaliser = np.diag(self.V_.dot(Rx0.dot(self.V_.T.conj())))

File ~/anaconda3/lib/python3.8/site-packages/coroica/uwedge.py:59, in uwedge(Rx, init, Rx0, return_diagonals, eps, n_iter_max, minimize_loss, verbose, n_components, condition_threshold)
55 TINY = np.finfo(V.dtype).eps
57 for iteration in itertools.count():
58 # 1) Generate Rs
---> 59 Rs = np.stack([V.dot(Rxx.dot(V.T.conj())) for Rxx in Rx])
61 # 2) Use Rs to construct A, equation (24) in paper with W=Id
62 # 3) Set A1=Id and substitute off-diagonals
63 Rsdiag = Rs.diagonal(axis1=1, axis2=2)

File <array_function internals>:180, in stack(*args, **kwargs)

File ~/anaconda3/lib/python3.8/site-packages/numpy/core/shape_base.py:422, in stack(arrays, axis, out)
420 arrays = [asanyarray(arr) for arr in arrays]
421 if not arrays:
--> 422 raise ValueError('need at least one array to stack')
424 shapes = {arr.shape for arr in arrays}
425 if len(shapes) != 1:

ValueError: need at least one array to stack

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Not compatible with high dimentional data #3

Metadata

Assignees

Labels

Projects

Milestone

Relationships

Development

Not compatible with high dimentional data #3

Description

Metadata

Metadata

Assignees

Labels

Projects

Milestone

Relationships

Development

Issue actions