Python sklearn.externals.joblib.delayed() Examples
The following are 27
code examples of sklearn.externals.joblib.delayed().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
sklearn.externals.joblib
, or try the search function
.
Example #1
Source File: pronoun_resolution.py From gap with MIT License | 6 votes |
def batch_predict(fn): def _predict(self, df, preprocessor=None, **kwargs): # print('Is given instance a df? ', isinstance(df, pd.DataFrame)) if isinstance(df, pd.DataFrame): if preprocessor: preprocessor(df) rows = [] if self.n_jobs != 1: with Parallel(n_jobs=self.n_jobs, verbose=self.verbose, backend=self.backend) as parallel: rows = parallel([delayed(fn)(*(self, row), **kwargs) for idx, row in df.iterrows()]) else: with tqdm(total=df.shape[0]) as pbar: for idx, row in df.iterrows(): rows.append(fn(self, row, **{**row, **kwargs})) pbar.update() return rows else: return fn(self, df, **kwargs) return _predict
Example #2
Source File: Policy.py From slates_semisynth_expts with BSD 3-Clause "New" or "Revised" License | 6 votes |
def setupGamma(self, ranking_size): if self.gammaRankingSize is not None and self.gammaRankingSize==ranking_size: print("UniformPolicy:setupGamma [INFO] Gamma has been pre-computed for this ranking_size. Size of Gamma cache:", len(self.gammas), flush=True) return gammaFile=Settings.DATA_DIR+self.dataset.name+'_'+self.name+'_'+str(ranking_size)+'.z' if os.path.exists(gammaFile): self.gammas=joblib.load(gammaFile) self.gammaRankingSize=ranking_size print("UniformPolicy:setupGamma [INFO] Using precomputed gamma", gammaFile, flush=True) else: self.gammas={} self.gammaRankingSize=ranking_size candidateSet=set(self.dataset.docsPerQuery) responses=joblib.Parallel(n_jobs=-2, verbose=50)(joblib.delayed(UniformGamma)(i, ranking_size, self.allowRepetitions) for i in candidateSet) for tup in responses: self.gammas[tup[0]]=tup[1] joblib.dump(self.gammas, gammaFile, compress=9, protocol=-1) print("", flush=True) print("UniformPolicy:setupGamma [INFO] Finished creating Gamma_pinv cache. Size", len(self.gammas), flush=True)
Example #3
Source File: optimize.py From SimulatedAnnealing with Apache License 2.0 | 6 votes |
def fit_score(self, X, Y): if isinstance(self.cv, int): n_folds = self.cv self.cv = KFold(n_splits=n_folds).split(X) # Formatting is kinda ugly but provides best debugging view out = Parallel(n_jobs=self.n_jobs, verbose=self.verbose, pre_dispatch=self.pre_dispatch)\ (delayed(_fit_and_score)(clone(self.clf), X, Y, self.metric, train, test, self.verbose, {}, {}, return_parameters=False, error_score='raise') for train, test in self.cv) # Out is a list of triplet: score, estimator, n_test_samples scores = list(zip(*out))[0] return np.mean(scores), np.std(scores)
Example #4
Source File: relieff.py From scikit-rebate with MIT License | 6 votes |
def _run_algorithm(self): """ Runs nearest neighbor (NN) identification and feature scoring to yield ReliefF scores. """ # Find nearest neighbors NNlist = map(self._find_neighbors, range(self._datalen)) # Feature scoring - using identified nearest neighbors nan_entries = np.isnan(self._X) # boolean mask for missing data values # Call the scoring method for the ReliefF algorithm scores = np.sum(Parallel(n_jobs=self.n_jobs)(delayed( ReliefF_compute_scores)(instance_num, self.attr, nan_entries, self._num_attributes, self.mcmap, NN, self._headers, self._class_type, self._X, self._y, self._labels_std, self.data_type) for instance_num, NN in zip(range(self._datalen), NNlist)), axis=0) return np.array(scores)
Example #5
Source File: relieff.py From scikit-rebate with MIT License | 6 votes |
def _distarray_missing(self, xc, xd, cdiffs): """Distance array calculation for data with missing values""" cindices = [] dindices = [] # Get Boolean mask locating missing values for continuous and discrete features separately. These correspond to xc and xd respectively. for i in range(self._datalen): cindices.append(np.where(np.isnan(xc[i]))[0]) dindices.append(np.where(np.isnan(xd[i]))[0]) if self.n_jobs != 1: dist_array = Parallel(n_jobs=self.n_jobs)(delayed(get_row_missing)( xc, xd, cdiffs, index, cindices, dindices) for index in range(self._datalen)) else: # For each instance calculate distance from all other instances (in non-redundant manner) (i.e. computes triangle, and puts zeros in for rest to form square). dist_array = [get_row_missing(xc, xd, cdiffs, index, cindices, dindices) for index in range(self._datalen)] return np.array(dist_array) #==================================================================# ############################# ReliefF ############################################
Example #6
Source File: surf.py From scikit-rebate with MIT License | 6 votes |
def _run_algorithm(self): """ Runs nearest neighbor (NN) identification and feature scoring to yield SURF scores. """ sm = cnt = 0 for i in range(self._datalen): sm += sum(self._distance_array[i]) cnt += len(self._distance_array[i]) avg_dist = sm / float(cnt) nan_entries = np.isnan(self._X) NNlist = [self._find_neighbors(datalen, avg_dist) for datalen in range(self._datalen)] scores = np.sum(Parallel(n_jobs=self.n_jobs)(delayed( SURF_compute_scores)(instance_num, self.attr, nan_entries, self._num_attributes, self.mcmap, NN, self._headers, self._class_type, self._X, self._y, self._labels_std, self.data_type) for instance_num, NN in zip(range(self._datalen), NNlist)), axis=0) return np.array(scores)
Example #7
Source File: stacking.py From civisml-extensions with BSD 3-Clause "New" or "Revised" License | 6 votes |
def _base_est_fit(self, X, y, **fit_params): """Fit the base estimators on X and y. """ fit_params_ests = self._extract_fit_params(**fit_params) _jobs = [] for name, est in self.estimator_list[:-1]: _jobs.append(delayed(_fit_est)( clone(est), X, y, **fit_params_ests[name])) _out = Parallel( n_jobs=self.n_jobs, verbose=self.verbose, pre_dispatch=self.pre_dispatch)(_jobs) for name, _ in self.estimator_list[:-1]: self._replace_est('estimator_list', name, _out.pop(0))
Example #8
Source File: _base.py From ibex with BSD 3-Clause "New" or "Revised" License | 6 votes |
def fit_transform(self, X, y=None, **fit_params): """ Fits the transformer using ``X`` (and possibly ``y``). Transforms ``X`` using the transformers, uses :func:`pandas.concat` to horizontally concatenate the results. Returns: ``self`` """ verify_x_type(X) verify_y_type(y) Xts = joblib.Parallel(n_jobs=self.n_jobs)( joblib.delayed(_fit_transform)(trans, weight, X, y, **fit_params) for _, trans, weight in self._iter()) return self.__concat(Xts)
Example #9
Source File: multi_decompose_fmri.py From modl with BSD 2-Clause "Simplified" License | 6 votes |
def run(n_seeds, n_jobs, _run, _seed): seed_list = check_random_state(_seed).randint(np.iinfo(np.uint32).max, size=n_seeds) exps = [] exps += [{'method': 'sgd', 'step_size': step_size} for step_size in np.logspace(-7, -7, 1)] exps += [{'method': 'gram', 'reduction': reduction} for reduction in [12]] rundir = join(basedir, str(_run._id), 'run') if not os.path.exists(rundir): os.makedirs(rundir) Parallel(n_jobs=n_jobs, verbose=10)(delayed(single_run)(config_updates, rundir, i) for i, config_updates in enumerate(exps))
Example #10
Source File: multi_decompose_images.py From modl with BSD 2-Clause "Simplified" License | 6 votes |
def run(n_seeds, n_jobs, _run, _seed): seed_list = check_random_state(_seed).randint(np.iinfo(np.uint32).max, size=n_seeds) exps = [] exps += [{'method': 'sgd', 'step_size': step_size} for step_size in np.logspace(-3, 3, 7)] exps += [{'method': 'gram', 'reduction': reduction} for reduction in [1, 4, 6, 8, 12, 24]] rundir = join(basedir, str(_run._id), 'run') if not os.path.exists(rundir): os.makedirs(rundir) Parallel(n_jobs=n_jobs, verbose=10)(delayed(single_run)(config_updates, rundir, i) for i, config_updates in enumerate(exps))
Example #11
Source File: chemTopicModel.py From CheTo with BSD 3-Clause "New" or "Revised" License | 6 votes |
def _generateFragments(self): voc=set(self.vocabulary) fpsdict = dict([(idx,{}) for idx in self.moldata.index]) nrows = self.moldata.shape[0] counter = 0 with Parallel(n_jobs=self.n_jobs,verbose=self.verbose) as parallel: while counter < nrows: nextChunk = min(counter+(self.n_jobs*self.chunksize),nrows) result = parallel(delayed(_generateMolFrags)(mollist, voc, self.fragmentMethod, self.fragIdx) for mollist in self._produceDataChunks(counter,nextChunk,self.chunksize)) for r in result: counter+=len(r) fpsdict.update(r) self.moldata['fps'] = np.array(sorted(fpsdict.items()))[:,1] # construct the molecule-fragment matrix as input for the LDA algorithm
Example #12
Source File: evaluation.py From few with GNU General Public License v3.0 | 6 votes |
def calc_fitness(self,X,labels,fit_choice,sel): """computes fitness of individual output yhat. yhat: output of a program. labels: correct outputs fit_choice: choice of fitness function """ if 'lexicase' in sel: # return list(map(lambda yhat: self.f_vec[fit_choice](labels,yhat),X)) return np.asarray( [self.proper(self.f_vec[fit_choice](labels, yhat)) for yhat in X], order='F') # return list(Parallel(n_jobs=-1)(delayed(self.f_vec[fit_choice])(labels,yhat) for yhat in X)) else: # return list(map(lambda yhat: self.f[fit_choice](labels,yhat),X)) return np.asarray([self.f[fit_choice](labels,yhat) for yhat in X], order='F').reshape(-1) # return list(Parallel(n_jobs=-1)(delayed(self.f[fit_choice])(labels,yhat) for yhat in X))
Example #13
Source File: few.py From few with GNU General Public License v3.0 | 5 votes |
def transform(self,x,inds=None,labels = None): """return a transformation of x using population outputs""" if inds: # return np.asarray(Parallel(n_jobs=10)(delayed(self.out)(I,x,labels,self.otype) # for I in inds)).transpose() return np.asarray( [self.out(I,x,labels,self.otype) for I in inds]).transpose() elif self._best_inds: # return np.asarray(Parallel(n_jobs=10)(delayed(self.out)(I,x,labels,self.otype) # for I in self._best_inds)).transpose() return np.asarray( [self.out(I,x,labels,self.otype) for I in self._best_inds]).transpose() else: return x
Example #14
Source File: Policy.py From slates_semisynth_expts with BSD 3-Clause "New" or "Revised" License | 5 votes |
def setupGamma(self, ranking_size): if self.gammaRankingSize is not None and self.gammaRankingSize==ranking_size: print("NonUniformPolicy:setupGamma [INFO] Gamma has been pre-computed for this ranking_size. Size of Gamma cache:", len(self.gammas), flush=True) return gammaFile=Settings.DATA_DIR+self.dataset.name+'_'+self.name+'_'+str(ranking_size)+'.z' if os.path.exists(gammaFile): self.gammas, self.multinomials=joblib.load(gammaFile) self.gammaRankingSize=ranking_size print("NonUniformPolicy:setupGamma [INFO] Using precomputed gamma", gammaFile, flush=True) else: self.gammas={} self.multinomials={} self.gammaRankingSize=ranking_size candidateSet=set(self.dataset.docsPerQuery) responses=joblib.Parallel(n_jobs=-2, verbose=50)(joblib.delayed(NonUniformGamma)(i, self.decay, ranking_size, self.allowRepetitions) for i in candidateSet) for tup in responses: self.gammas[tup[0]]=tup[2] self.multinomials[tup[0]]=tup[1] joblib.dump((self.gammas, self.multinomials), gammaFile, compress=9, protocol=-1) print("", flush=True) print("NonUniformPolicy:setupGamma [INFO] Finished creating Gamma_pinv cache. Size", len(self.gammas), flush=True) self.policy.predictAll(-1)
Example #15
Source File: split_train_test.py From EvalNE with MIT License | 5 votes |
def compute_splits_parallel(G, output_path, owa=True, train_frac=0.51, num_fe_train=None, num_fe_test=None, num_splits=10): r""" Computes in parallel the required number of train/test splits of edges and non-edges from an input graph and writes the data to files. The train sets are always connected / weakly connected and span all nodes of the input graph. Input graphs (digraphs) cannot contain more than one (weakly) connected component. Parameters ---------- G : graph A NetworkX graph output_path : string Indicates the path where data will be stored. Can include a name for all splits to share. owa : bool, optional Encodes the belief that the network respects or not the open world assumption. Default is True. If OWA=True, false train edges can be true test edges. False edges sampled from train graph. If OWA=False, closed world is assumed so false train edges are known to be false (not in G) train_frac : float, optional The relative size (in range (0.0, 1.0]) of the train set with respect to the total number of edges in the graph. Default is 0.51. num_fe_train : int, optional The number of train false edges to generate. Default is same number as true train edges. num_fe_test : int, optional The number of test false edges to generate. Default is same number as true test edges. num_splits : int, optional The number of train/test splits to generate. Default is 10. """ # Compute the splits sequentially or in parallel backend = 'multiprocessing' path_func = delayed(_compute_one_split) Parallel(n_jobs=num_splits, verbose=True, backend=backend)( path_func(G, output_path, owa, train_frac, num_fe_train, num_fe_test, split) for split in range(num_splits))
Example #16
Source File: classification.py From decoding-brain-challenge-2016 with BSD 3-Clause "New" or "Revised" License | 5 votes |
def _predict_distances(self, covtest): """Helper to predict the distance. equivalent to transform.""" Nc = len(self.covmeans_) if self.n_jobs == 1: dist = [distance(covtest, self.covmeans_[m], self.metric_dist) for m in range(Nc)] else: dist = Parallel(n_jobs=self.n_jobs)(delayed(distance)( covtest, self.covmeans_[m], self.metric_dist) for m in range(Nc)) dist = numpy.concatenate(dist, axis=1) return dist
Example #17
Source File: classification.py From decoding-brain-challenge-2016 with BSD 3-Clause "New" or "Revised" License | 5 votes |
def fit(self, X, y, sample_weight=None): """Fit (estimates) the centroids. Parameters ---------- X : ndarray, shape (n_trials, n_channels, n_channels) ndarray of SPD matrices. y : ndarray shape (n_trials, 1) labels corresponding to each trial. sample_weight : None | ndarray shape (n_trials, 1) the weights of each sample. if None, each sample is treated with equal weights. Returns ------- self : MDM instance The MDM instance. """ self.classes_ = numpy.unique(y) self.covmeans_ = [] if sample_weight is None: sample_weight = numpy.ones(X.shape[0]) if self.n_jobs == 1: for l in self.classes_: self.covmeans_.append( mean_covariance(X[y == l], metric=self.metric_mean, sample_weight=sample_weight[y == l])) else: self.covmeans_ = Parallel(n_jobs=self.n_jobs)( delayed(mean_covariance)(X[y == l], metric=self.metric_mean, sample_weight=sample_weight[y == l]) for l in self.classes_) return self
Example #18
Source File: mi.py From mifs with BSD 3-Clause "New" or "Revised" License | 5 votes |
def get_first_mi_vector(MI_FS, k): """ Calculates the Mututal Information between each feature in X and y. This function is for when |S| = 0. We select the first feautre in S. """ n, p = MI_FS.X.shape MIs = Parallel(n_jobs=MI_FS.n_jobs)(delayed(_get_first_mi)(i, k, MI_FS) for i in range(p)) return MIs
Example #19
Source File: multisurfstar.py From scikit-rebate with MIT License | 5 votes |
def _run_algorithm(self): """ Runs nearest neighbor (NN) identification and feature scoring to yield MultiSURF* scores. """ nan_entries = np.isnan(self._X) NNlist = [self._find_neighbors(datalen) for datalen in range(self._datalen)] NN_near_list = [i[0] for i in NNlist] NN_far_list = [i[1] for i in NNlist] scores = np.sum(Parallel(n_jobs=self.n_jobs)(delayed( MultiSURFstar_compute_scores)(instance_num, self.attr, nan_entries, self._num_attributes, self.mcmap, NN_near, NN_far, self._headers, self._class_type, self._X, self._y, self._labels_std, self.data_type) for instance_num, NN_near, NN_far in zip(range(self._datalen), NN_near_list, NN_far_list)), axis=0) return np.array(scores)
Example #20
Source File: multisurf.py From scikit-rebate with MIT License | 5 votes |
def _run_algorithm(self): """ Runs nearest neighbor (NN) identification and feature scoring to yield MultiSURF scores. """ nan_entries = np.isnan(self._X) NNlist = [self._find_neighbors(datalen) for datalen in range(self._datalen)] scores = np.sum(Parallel(n_jobs=self.n_jobs)(delayed( MultiSURF_compute_scores)(instance_num, self.attr, nan_entries, self._num_attributes, self.mcmap, NN_near, self._headers, self._class_type, self._X, self._y, self._labels_std, self.data_type) for instance_num, NN_near in zip(range(self._datalen), NNlist)), axis=0) return np.array(scores)
Example #21
Source File: _base.py From ibex with BSD 3-Clause "New" or "Revised" License | 5 votes |
def transform(self, X, *args, **kwargs): """ Transforms ``X`` using the transformers, uses :func:`pandas.concat` to horizontally concatenate the results. """ verify_x_type(X) Xts = joblib.Parallel(n_jobs=self.n_jobs)( joblib.delayed(_transform)(trans, weight, X, *args, **kwargs) for _, trans, weight in self._iter()) return self.__concat(Xts)
Example #22
Source File: pipline.py From MachineLearning with Apache License 2.0 | 5 votes |
def transform(self, X, y=None): parallel = Parallel( n_jobs=self.n_jobs, pre_dispatch=self.pre_dispatch, verbose=self.verbose ) stats_list = parallel(delayed(self._get_stats)(X[i_smpl, :]) for i_smpl in range(len(X))) return np.array(stats_list)
Example #23
Source File: mi.py From mifs with BSD 3-Clause "New" or "Revised" License | 5 votes |
def get_mi_vector(MI_FS, F, s): """ Calculates the Mututal Information between each feature in F and s. This function is for when |S| > 1. s is the previously selected feature. We exploite the fact that this step is embarrassingly parallel. """ MIs = Parallel(n_jobs=MI_FS.n_jobs)(delayed(_get_mi)(f, s, MI_FS) for f in F) return MIs
Example #24
Source File: parallel.py From vnpy_crypto with MIT License | 4 votes |
def parallel_func(func, n_jobs, verbose=5): """Return parallel instance with delayed function Util function to use joblib only if available Parameters ---------- func: callable A function n_jobs: int Number of jobs to run in parallel verbose: int Verbosity level Returns ------- parallel: instance of joblib.Parallel or list The parallel object my_func: callable func if not parallel or delayed(func) n_jobs: int Number of jobs >= 0 Examples -------- >>> from math import sqrt >>> from statsmodels.tools.parallel import parallel_func >>> parallel, p_func, n_jobs = parallel_func(sqrt, n_jobs=-1, verbose=0) >>> print(n_jobs) >>> parallel(p_func(i**2) for i in range(10)) """ try: try: from joblib import Parallel, delayed except ImportError: from sklearn.externals.joblib import Parallel, delayed parallel = Parallel(n_jobs, verbose=verbose) my_func = delayed(func) if n_jobs == -1: try: import multiprocessing n_jobs = multiprocessing.cpu_count() except (ImportError, NotImplementedError): import warnings warnings.warn(module_unavailable_doc.format('multiprocessing'), ModuleUnavailableWarning) n_jobs = 1 except ImportError: import warnings warnings.warn(module_unavailable_doc.format('joblib'), ModuleUnavailableWarning) n_jobs = 1 my_func = func parallel = list return parallel, my_func, n_jobs
Example #25
Source File: parallel.py From Splunking-Crime with GNU Affero General Public License v3.0 | 4 votes |
def parallel_func(func, n_jobs, verbose=5): """Return parallel instance with delayed function Util function to use joblib only if available Parameters ---------- func: callable A function n_jobs: int Number of jobs to run in parallel verbose: int Verbosity level Returns ------- parallel: instance of joblib.Parallel or list The parallel object my_func: callable func if not parallel or delayed(func) n_jobs: int Number of jobs >= 0 Examples -------- >>> from math import sqrt >>> from statsmodels.tools.parallel import parallel_func >>> parallel, p_func, n_jobs = parallel_func(sqrt, n_jobs=-1, verbose=0) >>> print(n_jobs) >>> parallel(p_func(i**2) for i in range(10)) """ try: try: from joblib import Parallel, delayed except ImportError: from sklearn.externals.joblib import Parallel, delayed parallel = Parallel(n_jobs, verbose=verbose) my_func = delayed(func) if n_jobs == -1: try: import multiprocessing n_jobs = multiprocessing.cpu_count() except (ImportError, NotImplementedError): import warnings warnings.warn(module_unavailable_doc.format('multiprocessing'), ModuleUnavailableWarning) n_jobs = 1 except ImportError: import warnings warnings.warn(module_unavailable_doc.format('joblib'), ModuleUnavailableWarning) n_jobs = 1 my_func = func parallel = list return parallel, my_func, n_jobs
Example #26
Source File: stability_selection.py From stability-selection with BSD 3-Clause "New" or "Revised" License | 4 votes |
def fit(self, X, y): """Fit the stability selection model on the given data. Parameters ---------- X : {array-like, sparse matrix}, shape = [n_samples, n_features] The training input samples. y : array-like, shape = [n_samples] The target values. """ self._validate_input() X, y = check_X_y(X, y, accept_sparse='csr') n_samples, n_variables = X.shape n_subsamples = np.floor(self.sample_fraction * n_samples).astype(int) n_lambdas = self.lambda_grid.shape[0] base_estimator = clone(self.base_estimator) random_state = check_random_state(self.random_state) stability_scores = np.zeros((n_variables, n_lambdas)) for idx, lambda_value in enumerate(self.lambda_grid): if self.verbose > 0: print("Fitting estimator for lambda = %.5f (%d / %d) on %d bootstrap samples" % (lambda_value, idx + 1, n_lambdas, self.n_bootstrap_iterations)) bootstrap_samples = _bootstrap_generator(self.n_bootstrap_iterations, self.bootstrap_func, y, n_subsamples, random_state=random_state) selected_variables = Parallel( n_jobs=self.n_jobs, verbose=self.verbose, pre_dispatch=self.pre_dispatch )(delayed(_fit_bootstrap_sample)(clone(base_estimator), X=X[safe_mask(X, subsample), :], y=y[subsample], lambda_name=self.lambda_name, lambda_value=lambda_value, threshold=self.bootstrap_threshold) for subsample in bootstrap_samples) stability_scores[:, idx] = np.vstack(selected_variables).mean(axis=0) self.stability_scores_ = stability_scores return self
Example #27
Source File: unmask.py From modl with BSD 2-Clause "Simplified" License | 4 votes |
def transform_imgs(self, imgs_list, confounds=None, copy=True, n_jobs=1, mmap_mode=None): """Prepare multi subject data in parallel Parameters ---------- imgs_list: list of Niimg-like objects See http://nilearn.github.io/manipulating_images/input_output.html. List of imgs file to prepare. One item per subject. confounds: list of confounds, optional List of confounds (2D arrays or filenames pointing to CSV files). Must be of same length than imgs_list. copy: boolean, optional If True, guarantees that output array has no memory in common with input array. n_jobs: integer, optional The number of cpus to use to do the computation. -1 means 'all cpus'. Returns ------- region_signals: list of 2D numpy.ndarray List of signal for each element per subject. shape: list of (number of scans, number of elements) """ self._check_fitted() raw = True # Check whether all imgs from imgs_list are numpy instance, or fallback # to MultiNiftiMasker (could handle hybrid imgs_list but we do not # need it for the moment) for imgs in imgs_list: if isinstance(imgs, str): name, ext = os.path.splitext(imgs) if ext != '.npy': raw = False break elif not isinstance(imgs, np.ndarray): raw = False break if raw: data = Parallel(n_jobs=n_jobs)(delayed(np.load)(imgs, mmap_mode=mmap_mode) for imgs in imgs_list) return data else: return MultiNiftiMasker.transform_imgs(self, imgs_list, confounds=confounds, copy=copy, n_jobs=n_jobs, )