Python sklearn.externals.joblib.delayed() Examples

The following are 27 code examples of sklearn.externals.joblib.delayed(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module sklearn.externals.joblib , or try the search function .
Example #1
Source File: pronoun_resolution.py    From gap with MIT License 6 votes vote down vote up
def batch_predict(fn):
        def _predict(self, df, preprocessor=None, **kwargs):
            # print('Is given instance a df? ', isinstance(df, pd.DataFrame))
            if isinstance(df, pd.DataFrame):
                if preprocessor:
                    preprocessor(df)
                
                rows = []
                if self.n_jobs != 1:
                    with Parallel(n_jobs=self.n_jobs, verbose=self.verbose, backend=self.backend) as parallel:
                        rows = parallel([delayed(fn)(*(self, row), **kwargs) for idx, row in df.iterrows()])
                else:
                    with tqdm(total=df.shape[0]) as pbar:
                        for idx, row in df.iterrows():
                            rows.append(fn(self, row, **{**row, **kwargs}))
                            pbar.update()
                return rows
            else:
                return fn(self, df, **kwargs)
        return _predict 
Example #2
Source File: Policy.py    From slates_semisynth_expts with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def setupGamma(self, ranking_size):
        if self.gammaRankingSize is not None and self.gammaRankingSize==ranking_size:
            print("UniformPolicy:setupGamma [INFO] Gamma has been pre-computed for this ranking_size. Size of Gamma cache:", len(self.gammas), flush=True)
            return
        
        gammaFile=Settings.DATA_DIR+self.dataset.name+'_'+self.name+'_'+str(ranking_size)+'.z'
        if os.path.exists(gammaFile):
            self.gammas=joblib.load(gammaFile)
            self.gammaRankingSize=ranking_size
            print("UniformPolicy:setupGamma [INFO] Using precomputed gamma", gammaFile, flush=True)
            
        else:
            self.gammas={}
            self.gammaRankingSize=ranking_size
            
            candidateSet=set(self.dataset.docsPerQuery)
            
            responses=joblib.Parallel(n_jobs=-2, verbose=50)(joblib.delayed(UniformGamma)(i, ranking_size, self.allowRepetitions) for i in candidateSet)
            
            for tup in responses:
                self.gammas[tup[0]]=tup[1]
            
            joblib.dump(self.gammas, gammaFile, compress=9, protocol=-1)
            print("", flush=True)
            print("UniformPolicy:setupGamma [INFO] Finished creating Gamma_pinv cache. Size", len(self.gammas), flush=True) 
Example #3
Source File: optimize.py    From SimulatedAnnealing with Apache License 2.0 6 votes vote down vote up
def fit_score(self, X, Y):
        if isinstance(self.cv, int):
            n_folds = self.cv
            self.cv = KFold(n_splits=n_folds).split(X)

        # Formatting is kinda ugly but provides best debugging view
        out = Parallel(n_jobs=self.n_jobs,
                       verbose=self.verbose,
                       pre_dispatch=self.pre_dispatch)\
            (delayed(_fit_and_score)(clone(self.clf), X, Y, self.metric,
                                     train, test, self.verbose, {},
                                     {}, return_parameters=False,
                                     error_score='raise')
             for train, test in self.cv)

        # Out is a list of triplet: score, estimator, n_test_samples
        scores = list(zip(*out))[0]
        return np.mean(scores), np.std(scores) 
Example #4
Source File: relieff.py    From scikit-rebate with MIT License 6 votes vote down vote up
def _run_algorithm(self):
        """ Runs nearest neighbor (NN) identification and feature scoring to yield ReliefF scores. """

        # Find nearest neighbors
        NNlist = map(self._find_neighbors, range(self._datalen))

        # Feature scoring - using identified nearest neighbors
        nan_entries = np.isnan(self._X)  # boolean mask for missing data values

        # Call the scoring method for the ReliefF algorithm
        scores = np.sum(Parallel(n_jobs=self.n_jobs)(delayed(
            ReliefF_compute_scores)(instance_num, self.attr, nan_entries, self._num_attributes, self.mcmap,
                                    NN, self._headers, self._class_type, self._X, self._y, self._labels_std, self.data_type)
            for instance_num, NN in zip(range(self._datalen), NNlist)), axis=0)

        return np.array(scores) 
Example #5
Source File: relieff.py    From scikit-rebate with MIT License 6 votes vote down vote up
def _distarray_missing(self, xc, xd, cdiffs):
        """Distance array calculation for data with missing values"""
        cindices = []
        dindices = []
        # Get Boolean mask locating missing values for continuous and discrete features separately. These correspond to xc and xd respectively.
        for i in range(self._datalen):
            cindices.append(np.where(np.isnan(xc[i]))[0])
            dindices.append(np.where(np.isnan(xd[i]))[0])

        if self.n_jobs != 1:
            dist_array = Parallel(n_jobs=self.n_jobs)(delayed(get_row_missing)(
                xc, xd, cdiffs, index, cindices, dindices) for index in range(self._datalen))
        else:
            # For each instance calculate distance from all other instances (in non-redundant manner) (i.e. computes triangle, and puts zeros in for rest to form square).
            dist_array = [get_row_missing(xc, xd, cdiffs, index, cindices, dindices)
                          for index in range(self._datalen)]

        return np.array(dist_array)
    #==================================================================#

############################# ReliefF ############################################ 
Example #6
Source File: surf.py    From scikit-rebate with MIT License 6 votes vote down vote up
def _run_algorithm(self):
        """ Runs nearest neighbor (NN) identification and feature scoring to yield SURF scores. """
        sm = cnt = 0
        for i in range(self._datalen):
            sm += sum(self._distance_array[i])
            cnt += len(self._distance_array[i])
        avg_dist = sm / float(cnt)

        nan_entries = np.isnan(self._X)

        NNlist = [self._find_neighbors(datalen, avg_dist) for datalen in range(self._datalen)]
        scores = np.sum(Parallel(n_jobs=self.n_jobs)(delayed(
            SURF_compute_scores)(instance_num, self.attr, nan_entries, self._num_attributes, self.mcmap,
                                 NN, self._headers, self._class_type, self._X, self._y, self._labels_std, self.data_type)
            for instance_num, NN in zip(range(self._datalen), NNlist)), axis=0)

        return np.array(scores) 
Example #7
Source File: stacking.py    From civisml-extensions with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def _base_est_fit(self, X, y, **fit_params):
        """Fit the base estimators on X and y.
        """
        fit_params_ests = self._extract_fit_params(**fit_params)

        _jobs = []
        for name, est in self.estimator_list[:-1]:
            _jobs.append(delayed(_fit_est)(
                clone(est), X, y, **fit_params_ests[name]))

        _out = Parallel(
            n_jobs=self.n_jobs,
            verbose=self.verbose,
            pre_dispatch=self.pre_dispatch)(_jobs)

        for name, _ in self.estimator_list[:-1]:
            self._replace_est('estimator_list', name, _out.pop(0)) 
Example #8
Source File: _base.py    From ibex with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def fit_transform(self, X, y=None, **fit_params):
        """
        Fits the transformer using ``X`` (and possibly ``y``). Transforms
        ``X`` using the transformers, uses :func:`pandas.concat`
        to horizontally concatenate the results.

        Returns:

            ``self``
        """
        verify_x_type(X)
        verify_y_type(y)

        Xts = joblib.Parallel(n_jobs=self.n_jobs)(
            joblib.delayed(_fit_transform)(trans, weight, X, y, **fit_params) for _, trans, weight in self._iter())
        return self.__concat(Xts) 
Example #9
Source File: multi_decompose_fmri.py    From modl with BSD 2-Clause "Simplified" License 6 votes vote down vote up
def run(n_seeds, n_jobs, _run, _seed):
    seed_list = check_random_state(_seed).randint(np.iinfo(np.uint32).max,
                                                  size=n_seeds)
    exps = []
    exps += [{'method': 'sgd',
              'step_size': step_size}
             for step_size in np.logspace(-7, -7, 1)]
    exps += [{'method': 'gram',
              'reduction': reduction}
             for reduction in [12]]

    rundir = join(basedir, str(_run._id), 'run')
    if not os.path.exists(rundir):
        os.makedirs(rundir)

    Parallel(n_jobs=n_jobs,
             verbose=10)(delayed(single_run)(config_updates, rundir, i)
                         for i, config_updates in enumerate(exps)) 
Example #10
Source File: multi_decompose_images.py    From modl with BSD 2-Clause "Simplified" License 6 votes vote down vote up
def run(n_seeds, n_jobs, _run, _seed):
    seed_list = check_random_state(_seed).randint(np.iinfo(np.uint32).max,
                                                  size=n_seeds)
    exps = []
    exps += [{'method': 'sgd',
              'step_size': step_size}
             for step_size in np.logspace(-3, 3, 7)]
    exps += [{'method': 'gram',
             'reduction': reduction}
            for reduction in [1, 4, 6, 8, 12, 24]]

    rundir = join(basedir, str(_run._id), 'run')
    if not os.path.exists(rundir):
        os.makedirs(rundir)

    Parallel(n_jobs=n_jobs,
             verbose=10)(delayed(single_run)(config_updates, rundir, i)
                         for i, config_updates in enumerate(exps)) 
Example #11
Source File: chemTopicModel.py    From CheTo with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def _generateFragments(self):
        voc=set(self.vocabulary)
        fpsdict = dict([(idx,{}) for idx in self.moldata.index])
        nrows = self.moldata.shape[0]
        counter = 0
        with Parallel(n_jobs=self.n_jobs,verbose=self.verbose) as parallel:
            while counter < nrows:
                nextChunk = min(counter+(self.n_jobs*self.chunksize),nrows)
                result = parallel(delayed(_generateMolFrags)(mollist, voc,
                                                    self.fragmentMethod, 
                                                    self.fragIdx)
                                   for mollist in self._produceDataChunks(counter,nextChunk,self.chunksize))
                for r in result:
                    counter+=len(r)
                    fpsdict.update(r)            
        self.moldata['fps'] = np.array(sorted(fpsdict.items()))[:,1]                
    
    # construct the molecule-fragment matrix as input for the LDA algorithm 
Example #12
Source File: evaluation.py    From few with GNU General Public License v3.0 6 votes vote down vote up
def calc_fitness(self,X,labels,fit_choice,sel):
        """computes fitness of individual output yhat.
        yhat: output of a program.
        labels: correct outputs
        fit_choice: choice of fitness function
        """

        if 'lexicase' in sel:
            # return list(map(lambda yhat: self.f_vec[fit_choice](labels,yhat),X))
            return np.asarray(
                              [self.proper(self.f_vec[fit_choice](labels,
                                                        yhat)) for yhat in X],
                                                        order='F')
            # return list(Parallel(n_jobs=-1)(delayed(self.f_vec[fit_choice])(labels,yhat) for yhat in X))
        else:
            # return list(map(lambda yhat: self.f[fit_choice](labels,yhat),X))
            return np.asarray([self.f[fit_choice](labels,yhat) for yhat in X],
                            order='F').reshape(-1)

            # return list(Parallel(n_jobs=-1)(delayed(self.f[fit_choice])(labels,yhat) for yhat in X)) 
Example #13
Source File: few.py    From few with GNU General Public License v3.0 5 votes vote down vote up
def transform(self,x,inds=None,labels = None):
        """return a transformation of x using population outputs"""
        if inds:
            # return np.asarray(Parallel(n_jobs=10)(delayed(self.out)(I,x,labels,self.otype) 
            #                           for I in inds)).transpose()
            return np.asarray(
                [self.out(I,x,labels,self.otype) for I in inds]).transpose()
        elif self._best_inds:
            # return np.asarray(Parallel(n_jobs=10)(delayed(self.out)(I,x,labels,self.otype) 
            #                                   for I in self._best_inds)).transpose()
            return np.asarray(
                [self.out(I,x,labels,self.otype) for I in self._best_inds]).transpose()
        else:
            return x 
Example #14
Source File: Policy.py    From slates_semisynth_expts with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def setupGamma(self, ranking_size):
        if self.gammaRankingSize is not None and self.gammaRankingSize==ranking_size:
            print("NonUniformPolicy:setupGamma [INFO] Gamma has been pre-computed for this ranking_size. Size of Gamma cache:", len(self.gammas), flush=True)
            return
        
        gammaFile=Settings.DATA_DIR+self.dataset.name+'_'+self.name+'_'+str(ranking_size)+'.z'
        if os.path.exists(gammaFile):
            self.gammas, self.multinomials=joblib.load(gammaFile)
            self.gammaRankingSize=ranking_size
            print("NonUniformPolicy:setupGamma [INFO] Using precomputed gamma", gammaFile, flush=True)
            
        else:
            self.gammas={}
            self.multinomials={}
            self.gammaRankingSize=ranking_size
            
            candidateSet=set(self.dataset.docsPerQuery)
            responses=joblib.Parallel(n_jobs=-2, verbose=50)(joblib.delayed(NonUniformGamma)(i, self.decay, ranking_size, self.allowRepetitions) for i in candidateSet)
            
            for tup in responses:
                self.gammas[tup[0]]=tup[2]
                self.multinomials[tup[0]]=tup[1]
            
            joblib.dump((self.gammas, self.multinomials), gammaFile, compress=9, protocol=-1)
            print("", flush=True)
            print("NonUniformPolicy:setupGamma [INFO] Finished creating Gamma_pinv cache. Size", len(self.gammas), flush=True)

        self.policy.predictAll(-1) 
Example #15
Source File: split_train_test.py    From EvalNE with MIT License 5 votes vote down vote up
def compute_splits_parallel(G, output_path, owa=True, train_frac=0.51, num_fe_train=None, num_fe_test=None,
                            num_splits=10):
    r"""
    Computes in parallel the required number of train/test splits of edges and non-edges from an input graph
    and writes the data to files. The train sets are always connected / weakly connected and span all nodes
    of the input graph. Input graphs (digraphs) cannot contain more than one (weakly) connected component.
    
    Parameters
    ----------
    G : graph
       A NetworkX graph
    output_path : string
       Indicates the path where data will be stored. Can include a name for all splits to share.  
    owa : bool, optional
       Encodes the belief that the network respects or not the open world assumption. Default is True.
       If OWA=True, false train edges can be true test edges. False edges sampled from train graph. 
       If OWA=False, closed world is assumed so false train edges are known to be false (not in G)
    train_frac : float, optional
       The relative size (in range (0.0, 1.0]) of the train set with respect to the total number of edges in the graph.
       Default is 0.51.
    num_fe_train : int, optional
       The number of train false edges to generate. Default is same number as true train edges.
    num_fe_test : int, optional
       The number of test false edges to generate. Default is same number as true test edges.
    num_splits : int, optional
       The number of train/test splits to generate. Default is 10.
    """
    # Compute the splits sequentially or in parallel
    backend = 'multiprocessing'
    path_func = delayed(_compute_one_split)
    Parallel(n_jobs=num_splits, verbose=True, backend=backend)(
        path_func(G, output_path, owa, train_frac, num_fe_train, num_fe_test, split) for split in range(num_splits)) 
Example #16
Source File: classification.py    From decoding-brain-challenge-2016 with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def _predict_distances(self, covtest):
        """Helper to predict the distance. equivalent to transform."""
        Nc = len(self.covmeans_)

        if self.n_jobs == 1:
            dist = [distance(covtest, self.covmeans_[m], self.metric_dist)
                    for m in range(Nc)]
        else:
            dist = Parallel(n_jobs=self.n_jobs)(delayed(distance)(
                covtest, self.covmeans_[m], self.metric_dist)
                for m in range(Nc))

        dist = numpy.concatenate(dist, axis=1)
        return dist 
Example #17
Source File: classification.py    From decoding-brain-challenge-2016 with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def fit(self, X, y, sample_weight=None):
        """Fit (estimates) the centroids.

        Parameters
        ----------
        X : ndarray, shape (n_trials, n_channels, n_channels)
            ndarray of SPD matrices.
        y : ndarray shape (n_trials, 1)
            labels corresponding to each trial.
        sample_weight : None | ndarray shape (n_trials, 1)
            the weights of each sample. if None, each sample is treated with
            equal weights.

        Returns
        -------
        self : MDM instance
            The MDM instance.
        """
        self.classes_ = numpy.unique(y)

        self.covmeans_ = []

        if sample_weight is None:
            sample_weight = numpy.ones(X.shape[0])

        if self.n_jobs == 1:
            for l in self.classes_:
                self.covmeans_.append(
                    mean_covariance(X[y == l], metric=self.metric_mean,
                                    sample_weight=sample_weight[y == l]))
        else:
            self.covmeans_ = Parallel(n_jobs=self.n_jobs)(
                delayed(mean_covariance)(X[y == l], metric=self.metric_mean,
                                         sample_weight=sample_weight[y == l])
                for l in self.classes_)

        return self 
Example #18
Source File: mi.py    From mifs with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def get_first_mi_vector(MI_FS, k):
    """
    Calculates the Mututal Information between each feature in X and y.

    This function is for when |S| = 0. We select the first feautre in S.
    """
    n, p = MI_FS.X.shape
    MIs = Parallel(n_jobs=MI_FS.n_jobs)(delayed(_get_first_mi)(i, k, MI_FS)
                                        for i in range(p))
    return MIs 
Example #19
Source File: multisurfstar.py    From scikit-rebate with MIT License 5 votes vote down vote up
def _run_algorithm(self):
        """ Runs nearest neighbor (NN) identification and feature scoring to yield MultiSURF* scores. """
        nan_entries = np.isnan(self._X)

        NNlist = [self._find_neighbors(datalen) for datalen in range(self._datalen)]
        NN_near_list = [i[0] for i in NNlist]
        NN_far_list = [i[1] for i in NNlist]

        scores = np.sum(Parallel(n_jobs=self.n_jobs)(delayed(
            MultiSURFstar_compute_scores)(instance_num, self.attr, nan_entries, self._num_attributes, self.mcmap,
                                          NN_near, NN_far, self._headers, self._class_type, self._X, self._y, self._labels_std, self.data_type)
            for instance_num, NN_near, NN_far in zip(range(self._datalen), NN_near_list, NN_far_list)), axis=0)

        return np.array(scores) 
Example #20
Source File: multisurf.py    From scikit-rebate with MIT License 5 votes vote down vote up
def _run_algorithm(self):
        """ Runs nearest neighbor (NN) identification and feature scoring to yield MultiSURF scores. """
        nan_entries = np.isnan(self._X)

        NNlist = [self._find_neighbors(datalen) for datalen in range(self._datalen)]

        scores = np.sum(Parallel(n_jobs=self.n_jobs)(delayed(
            MultiSURF_compute_scores)(instance_num, self.attr, nan_entries, self._num_attributes, self.mcmap,
                                      NN_near, self._headers, self._class_type, self._X, self._y, self._labels_std, self.data_type)
            for instance_num, NN_near in zip(range(self._datalen), NNlist)), axis=0)

        return np.array(scores) 
Example #21
Source File: _base.py    From ibex with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def transform(self, X, *args, **kwargs):
        """
        Transforms ``X`` using the transformers, uses :func:`pandas.concat`
        to horizontally concatenate the results.
        """
        verify_x_type(X)

        Xts = joblib.Parallel(n_jobs=self.n_jobs)(
            joblib.delayed(_transform)(trans, weight, X, *args, **kwargs) for _, trans, weight in self._iter())
        return self.__concat(Xts) 
Example #22
Source File: pipline.py    From MachineLearning with Apache License 2.0 5 votes vote down vote up
def transform(self, X, y=None):
        parallel = Parallel(
            n_jobs=self.n_jobs,
            pre_dispatch=self.pre_dispatch,
            verbose=self.verbose
        )
        stats_list = parallel(delayed(self._get_stats)(X[i_smpl, :]) for i_smpl in range(len(X)))
        return np.array(stats_list) 
Example #23
Source File: mi.py    From mifs with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def get_mi_vector(MI_FS, F, s):
    """
    Calculates the Mututal Information between each feature in F and s.

    This function is for when |S| > 1. s is the previously selected feature.
    We exploite the fact that this step is embarrassingly parallel.
    """

    MIs = Parallel(n_jobs=MI_FS.n_jobs)(delayed(_get_mi)(f, s, MI_FS)
                                        for f in F)
    return MIs 
Example #24
Source File: parallel.py    From vnpy_crypto with MIT License 4 votes vote down vote up
def parallel_func(func, n_jobs, verbose=5):
    """Return parallel instance with delayed function

    Util function to use joblib only if available

    Parameters
    ----------
    func: callable
        A function
    n_jobs: int
        Number of jobs to run in parallel
    verbose: int
        Verbosity level

    Returns
    -------
    parallel: instance of joblib.Parallel or list
        The parallel object
    my_func: callable
        func if not parallel or delayed(func)
    n_jobs: int
        Number of jobs >= 0

    Examples
    --------
    >>> from math import sqrt
    >>> from statsmodels.tools.parallel import parallel_func
    >>> parallel, p_func, n_jobs = parallel_func(sqrt, n_jobs=-1, verbose=0)
    >>> print(n_jobs)
    >>> parallel(p_func(i**2) for i in range(10))
    """
    try:
        try:
            from joblib import Parallel, delayed
        except ImportError:
            from sklearn.externals.joblib import Parallel, delayed

        parallel = Parallel(n_jobs, verbose=verbose)
        my_func = delayed(func)

        if n_jobs == -1:
            try:
                import multiprocessing
                n_jobs = multiprocessing.cpu_count()
            except (ImportError, NotImplementedError):
                import warnings
                warnings.warn(module_unavailable_doc.format('multiprocessing'),
                              ModuleUnavailableWarning)
                n_jobs = 1

    except ImportError:
        import warnings
        warnings.warn(module_unavailable_doc.format('joblib'),
                      ModuleUnavailableWarning)
        n_jobs = 1
        my_func = func
        parallel = list
    return parallel, my_func, n_jobs 
Example #25
Source File: parallel.py    From Splunking-Crime with GNU Affero General Public License v3.0 4 votes vote down vote up
def parallel_func(func, n_jobs, verbose=5):
    """Return parallel instance with delayed function

    Util function to use joblib only if available

    Parameters
    ----------
    func: callable
        A function
    n_jobs: int
        Number of jobs to run in parallel
    verbose: int
        Verbosity level

    Returns
    -------
    parallel: instance of joblib.Parallel or list
        The parallel object
    my_func: callable
        func if not parallel or delayed(func)
    n_jobs: int
        Number of jobs >= 0

    Examples
    --------
    >>> from math import sqrt
    >>> from statsmodels.tools.parallel import parallel_func
    >>> parallel, p_func, n_jobs = parallel_func(sqrt, n_jobs=-1, verbose=0)
    >>> print(n_jobs)
    >>> parallel(p_func(i**2) for i in range(10))
    """
    try:
        try:
            from joblib import Parallel, delayed
        except ImportError:
            from sklearn.externals.joblib import Parallel, delayed

        parallel = Parallel(n_jobs, verbose=verbose)
        my_func = delayed(func)

        if n_jobs == -1:
            try:
                import multiprocessing
                n_jobs = multiprocessing.cpu_count()
            except (ImportError, NotImplementedError):
                import warnings
                warnings.warn(module_unavailable_doc.format('multiprocessing'),
                              ModuleUnavailableWarning)
                n_jobs = 1

    except ImportError:
        import warnings
        warnings.warn(module_unavailable_doc.format('joblib'),
                      ModuleUnavailableWarning)
        n_jobs = 1
        my_func = func
        parallel = list
    return parallel, my_func, n_jobs 
Example #26
Source File: stability_selection.py    From stability-selection with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
def fit(self, X, y):
        """Fit the stability selection model on the given data.

        Parameters
        ----------
        X : {array-like, sparse matrix}, shape = [n_samples, n_features]
            The training input samples.

        y : array-like, shape = [n_samples]
            The target values.
        """

        self._validate_input()

        X, y = check_X_y(X, y, accept_sparse='csr')

        n_samples, n_variables = X.shape
        n_subsamples = np.floor(self.sample_fraction * n_samples).astype(int)
        n_lambdas = self.lambda_grid.shape[0]

        base_estimator = clone(self.base_estimator)
        random_state = check_random_state(self.random_state)
        stability_scores = np.zeros((n_variables, n_lambdas))

        for idx, lambda_value in enumerate(self.lambda_grid):
            if self.verbose > 0:
                print("Fitting estimator for lambda = %.5f (%d / %d) on %d bootstrap samples" %
                      (lambda_value, idx + 1, n_lambdas, self.n_bootstrap_iterations))

            bootstrap_samples = _bootstrap_generator(self.n_bootstrap_iterations,
                                                     self.bootstrap_func, y,
                                                     n_subsamples, random_state=random_state)

            selected_variables = Parallel(
                n_jobs=self.n_jobs, verbose=self.verbose,
                pre_dispatch=self.pre_dispatch
            )(delayed(_fit_bootstrap_sample)(clone(base_estimator),
                                             X=X[safe_mask(X, subsample), :],
                                             y=y[subsample],
                                             lambda_name=self.lambda_name,
                                             lambda_value=lambda_value,
                                             threshold=self.bootstrap_threshold)
              for subsample in bootstrap_samples)

            stability_scores[:, idx] = np.vstack(selected_variables).mean(axis=0)

        self.stability_scores_ = stability_scores
        return self 
Example #27
Source File: unmask.py    From modl with BSD 2-Clause "Simplified" License 4 votes vote down vote up
def transform_imgs(self, imgs_list, confounds=None, copy=True, n_jobs=1,
                       mmap_mode=None):
        """Prepare multi subject data in parallel

        Parameters
        ----------

        imgs_list: list of Niimg-like objects
            See http://nilearn.github.io/manipulating_images/input_output.html.
            List of imgs file to prepare. One item per subject.

        confounds: list of confounds, optional
            List of confounds (2D arrays or filenames pointing to CSV
            files). Must be of same length than imgs_list.

        copy: boolean, optional
            If True, guarantees that output array has no memory in common with
            input array.

        n_jobs: integer, optional
            The number of cpus to use to do the computation. -1 means
            'all cpus'.

        Returns
        -------
        region_signals: list of 2D numpy.ndarray
            List of signal for each element per subject.
            shape: list of (number of scans, number of elements)
        """
        self._check_fitted()
        raw = True
        # Check whether all imgs from imgs_list are numpy instance, or fallback
        # to MultiNiftiMasker (could handle hybrid imgs_list but we do not
        #  need it for the moment)
        for imgs in imgs_list:
            if isinstance(imgs, str):
                name, ext = os.path.splitext(imgs)
                if ext != '.npy':
                    raw = False
                    break
            elif not isinstance(imgs, np.ndarray):
                raw = False
                break
        if raw:
            data = Parallel(n_jobs=n_jobs)(delayed(np.load)(imgs,
                                                            mmap_mode=mmap_mode)
                                           for imgs in imgs_list)
            return data
        else:
            return MultiNiftiMasker.transform_imgs(self, imgs_list,
                                                   confounds=confounds,
                                                   copy=copy,
                                                   n_jobs=n_jobs, )