Python joblib.delayed() Examples

The following are 30 code examples of joblib.delayed(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module joblib , or try the search function .
Example #1
Source File: utils.py    From nmp_qc with MIT License 8 votes vote down vote up
def get_graph_stats(graph_obj_handle, prop='degrees'):
    # if prop == 'degrees':
    num_cores = multiprocessing.cpu_count()
    inputs = [int(i*len(graph_obj_handle)/num_cores) for i in range(num_cores)] + [len(graph_obj_handle)]
    res = Parallel(n_jobs=num_cores)(delayed(get_values)(graph_obj_handle, inputs[i], inputs[i+1], prop) for i in range(num_cores))

    stat_dict = {}

    if 'degrees' in prop:
        stat_dict['degrees'] = list(set([d for core_res in res for file_res in core_res for d in file_res['degrees']]))
    if 'edge_labels' in prop:
        stat_dict['edge_labels'] = list(set([d for core_res in res for file_res in core_res for d in file_res['edge_labels']]))
    if 'target_mean' in prop or 'target_std' in prop:
        param = np.array([file_res['params'] for core_res in res for file_res in core_res])
    if 'target_mean' in prop:
        stat_dict['target_mean'] = np.mean(param, axis=0)
    if 'target_std' in prop:
        stat_dict['target_std'] = np.std(param, axis=0)

    return stat_dict 
Example #2
Source File: ShotgunClassifier.py    From SFA_Python with GNU General Public License v3.0 6 votes vote down vote up
def fitEnsemble(self, normMean, samples, factor):
        minWindowLength = 5
        maxWindowLength = getMax(samples, self.MAX_WINDOW_LENGTH)
        windows = self.getWindowsBetween(minWindowLength, maxWindowLength)
        self.logger.Log("Windows: %s" % windows)

        correctTraining = 0
        self.results = []

        self.logger.Log("%s  Fitting for a norm of %s" % (self.NAME, str(normMean)))
        Parallel(n_jobs=1, backend="threading")(delayed(self.fitIndividual, check_pickle=False)(normMean, samples, windows, i) for i in range(len(windows)))

        # Find best correctTraining
        for i in range(len(self.results)):
            if self.results[i].correct > correctTraining:
                correctTraining = self.results[i].correct

        # Remove Results that are no longer satisfactory
        new_results = []
        for i in range(len(self.results)):
            if self.results[i].correct >= (correctTraining * factor):
                new_results.append(self.results[i])

        return new_results, correctTraining 
Example #3
Source File: batched_inv_joblib.py    From content_wmf with MIT License 6 votes vote down vote up
def recompute_factors_batched(Y, S, lambda_reg, W=None, X=None,
                              dtype='float32', batch_size=10000, n_jobs=4):
    m = S.shape[0]  # m = number of users
    f = Y.shape[1]  # f = number of factors

    YTY = np.dot(Y.T, Y)  # precompute this
    YTYpR = YTY + lambda_reg * np.eye(f)
    if W is not None:
        WX = lambda_reg * (X.dot(W)).T
    else:
        WX = None
    X_new = np.zeros((m, f), dtype=dtype)

    num_batches = int(np.ceil(m / float(batch_size)))

    res = Parallel(n_jobs=n_jobs)(delayed(solve_batch)(b, S, Y, WX, YTYpR,
                                                       batch_size, m, f, dtype)
                                  for b in xrange(num_batches))
    X_new = np.concatenate(res, axis=0)

    return X_new 
Example #4
Source File: extract_frame.py    From DPC with MIT License 6 votes vote down vote up
def main_kinetics400(v_root, f_root, dim=150):
    print('extracting Kinetics400 ... ')
    for basename in ['train_split', 'val_split']:
        v_root_real = v_root + '/' + basename
        if not os.path.exists(v_root_real):
            print('Wrong v_root'); sys.exit()
        f_root_real = '/scratch/local/ssd/htd/kinetics400/frame_full' + '/' + basename 
        print('Extract to: \nframe: %s' % f_root_real)
        if not os.path.exists(f_root_real): os.makedirs(f_root_real)
        v_act_root = glob.glob(os.path.join(v_root_real, '*/'))
        v_act_root = sorted(v_act_root)

        # if resume, remember to delete the last video folder
        for i, j in tqdm(enumerate(v_act_root), total=len(v_act_root)):
            v_paths = glob.glob(os.path.join(j, '*.mp4'))
            v_paths = sorted(v_paths)
            # for resume:
            v_class = j.split('/')[-2]
            out_dir = os.path.join(f_root_real, v_class)
            if os.path.exists(out_dir): print(out_dir, 'exists!'); continue
            print('extracting: %s' % v_class)
            # dim = 150 (crop to 128 later) or 256 (crop to 224 later)
            Parallel(n_jobs=32)(delayed(extract_video_opencv)(p, f_root_real, dim=dim) for p in tqdm(v_paths, total=len(v_paths))) 
Example #5
Source File: __init__.py    From s3tk with MIT License 6 votes vote down vote up
def parallelize(bucket, only, _except, fn, args=(), versions=False):
    bucket = s3().Bucket(bucket)

    # use prefix for performance
    prefix = None
    if only:
        # get the first prefix before wildcard
        prefix = '/'.join(only.split('*')[0].split('/')[:-1])
        if prefix:
            prefix = prefix + '/'

    if versions:
        object_versions = bucket.object_versions.filter(Prefix=prefix) if prefix else bucket.object_versions.all()
        # delete markers have no size
        return Parallel(n_jobs=24)(delayed(fn)(bucket.name, ov.object_key, ov.id, *args) for ov in object_versions if object_matches(ov.object_key, only, _except) and not ov.is_latest and ov.size is not None)
    else:
        objects = bucket.objects.filter(Prefix=prefix) if prefix else bucket.objects.all()

        if only and not '*' in only:
            objects = [s3().Object(bucket, only)]

        return Parallel(n_jobs=24)(delayed(fn)(bucket.name, os.key, *args) for os in objects if object_matches(os.key, only, _except)) 
Example #6
Source File: convert_videos.py    From dmc-net with MIT License 6 votes vote down vote up
def convert_video_wapper(src_videos, 
                         dst_videos, 
                         cmd_format,
                         in_parallel=True):
    commands = []
    for src, dst in zip(src_videos, dst_videos):
        cmd = cmd_format.format(src, dst)
        commands.append(cmd)

    logging.info("- {} commonds to excute".format(len(commands)))

    if not in_parallel:
        for i, cmd in enumerate(commands):
            # if i % 100 == 0:
            #     logging.info("{} / {}: '{}'".format(i, len(commands), cmd))
            exe_cmd(cmd=cmd)
    else:
        num_jobs = 24
        logging.info("processing videos in parallel, num_jobs={}".format(num_jobs))
        Parallel(n_jobs=num_jobs)(delayed(exe_cmd)(cmd) for cmd in commands) 
Example #7
Source File: _glm_reporter_visual_inspection_suite_.py    From nistats with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def prefer_parallel_execution(functions_to_be_called):  # pragma: no cover
    try:
        import joblib
        import multiprocessing
    except ImportError:
        print('Joblib not installed, switching to serial execution')
        [run_function(fn) for fn in functions_to_be_called]
    else:
        try:
            import tqdm
        except ImportError:
            inputs = functions_to_be_called
        else:
            inputs = tqdm.tqdm(functions_to_be_called)
        n_jobs = multiprocessing.cpu_count()
        print('Parallelizing execution using Joblib')
        joblib.Parallel(n_jobs=n_jobs)(
                joblib.delayed(run_function)(fn) for fn in inputs) 
Example #8
Source File: BOSSVSClassifier.py    From SFA_Python with GNU General Public License v3.0 6 votes vote down vote up
def fitEnsemble(self, windows, normMean, samples):
        correctTraining = 0
        self.results = []

        self.logger.Log("%s  Fitting for a norm of %s" % (self.NAME, str(normMean)))
        Parallel(n_jobs=1, backend="threading")(delayed(self.fitIndividual, check_pickle=False)(normMean, samples, windows, i) for i in range(len(windows)))

        # Find best correctTraining
        for i in range(len(self.results)):
            if self.results[i].score > correctTraining:
                correctTraining = self.results[i].score

        # Remove Results that are no longer satisfactory
        new_results = []
        self.logger.Log("Stored Models for Norm=%s" % normMean)
        for i in range(len(self.results)):
            if self.results[i].score >= (correctTraining * self.factor):
                self.logger.Log("WindowLength:%s  Features:%s  TrainScore:%s" % (self.results[i].windowLength, self.results[i].features, self.results[i].score))
                new_results.append(self.results[i])

        return new_results 
Example #9
Source File: ShotgunEnsembleClassifier.py    From SFA_Python with GNU General Public License v3.0 6 votes vote down vote up
def fitEnsemble(self, normMean, samples, factor):
        minWindowLength = 5
        maxWindowLength = getMax(samples, self.MAX_WINDOW_LENGTH)
        windows = self.getWindowsBetween(minWindowLength, maxWindowLength)
        self.logger.Log("Windows: %s" % windows)

        correctTraining = 0
        self.results = []

        self.logger.Log("%s  Fitting for a norm of %s" % (self.NAME, str(normMean)))
        Parallel(n_jobs=-1, backend="threading")(delayed(self.fitIndividual, check_pickle=False)(normMean, samples, windows, i) for i in range(len(windows)))

        # Find best correctTraining
        for i in range(len(self.results)):
            if self.results[i].correct > correctTraining:
                correctTraining = self.results[i].correct

        # Remove Results that are no longer satisfactory
        new_results = []
        for i in range(len(self.results)):
            if self.results[i].correct >= (correctTraining * factor):
                new_results.append(self.results[i])

        return new_results, correctTraining 
Example #10
Source File: utils.py    From DeepLab_v3 with MIT License 6 votes vote down vote up
def next_minibatch(self):

        image_filenames_minibatch = self.image_filenames[self.current_index: self.current_index + self.minibatch_size]
        label_filenames_minibatch = self.label_filenames[self.current_index: self.current_index + self.minibatch_size]
        self.current_index += self.minibatch_size
        if self.current_index >= self.dataset_size:
            self.current_index = 0

        # Multithread image processing
        # Reference: https://www.kaggle.com/inoryy/fast-image-pre-process-in-parallel

        results = Parallel(n_jobs=self.num_jobs)(delayed(self.process_func)(image_filename, label_filename) for image_filename, label_filename in zip(image_filenames_minibatch, label_filenames_minibatch))
        images, labels = zip(*results)

        images = np.asarray(images)
        labels = np.asarray(labels)

        return images, labels 
Example #11
Source File: BOSSEnsembleClassifier.py    From SFA_Python with GNU General Public License v3.0 6 votes vote down vote up
def fitEnsemble(self, NormMean, samples):
        correctTraining = 0
        self.results = []
        self.logger.Log("%s  Fitting for a norm of %s" % (self.NAME, str(NormMean)))

        Parallel(n_jobs=1, backend="threading")(delayed(self.fitIndividual, check_pickle=False)(NormMean, samples, i) for i in range(len(self.windows)))

        #Find best correctTraining
        for i in range(len(self.results)):
            if self.results[i].score > correctTraining:
                correctTraining = self.results[i].score

        self.logger.Log("CorrectTrain for a norm of %s" % (correctTraining))
        # Remove Results that are no longer satisfactory
        new_results = []
        self.logger.Log("Stored Models for Norm=%s" % NormMean)
        for i in range(len(self.results)):
            if self.results[i].score >= (correctTraining * self.factor):
                self.logger.Log("WindowLength:%s  Features:%s  TrainScore:%s" % (self.results[i].windowLength, self.results[i].features, self.results[i].score))
                new_results.append(self.results[i])

        return new_results, correctTraining 
Example #12
Source File: preprocessor.py    From AutoSmart with GNU General Public License v3.0 6 votes vote down vote up
def fit(self,X):
        def func(ss):
            length = len(ss.unique())
            if length >= len(ss)-10:
                return True
            else:  
                return False
        
        df = X.data
        todo_cols = X.cat_cols
        res = Parallel(n_jobs=CONSTANT.JOBS,require='sharedmem')(delayed(func)(df[col]) for col in todo_cols)
        
        drop_cols = []
        for col,all_diff in zip(todo_cols,res):
            if all_diff:
                drop_cols.append(col)
        
        self.drop_cols = drop_cols 
Example #13
Source File: librispeech.py    From End-to-end-ASR-Pytorch with MIT License 6 votes vote down vote up
def __init__(self, path, split, tokenizer, bucket_size, ascending=False):
        # Setup
        self.path = path
        self.bucket_size = bucket_size

        # List all wave files
        file_list = []
        for s in split:
            split_list = list(Path(join(path, s)).rglob("*.flac"))
            assert len(split_list) > 0, "No data found @ {}".format(join(path,s))
            file_list += split_list
        # Read text
        text = Parallel(n_jobs=READ_FILE_THREADS)(
            delayed(read_text)(str(f)) for f in file_list)
        #text = Parallel(n_jobs=-1)(delayed(tokenizer.encode)(txt) for txt in text)
        text = [tokenizer.encode(txt) for txt in text]

        # Sort dataset by text length
        #file_len = Parallel(n_jobs=READ_FILE_THREADS)(delayed(getsize)(f) for f in file_list)
        self.file_list, self.text = zip(*[(f_name, txt)
                                          for f_name, txt in sorted(zip(file_list, text), reverse=not ascending, key=lambda x:len(x[1]))]) 
Example #14
Source File: decomposition.py    From tridesclous with MIT License 6 votes vote down vote up
def transform(self, waveforms):
        #~ print('ici', waveforms.shape, self.ind_peak)
        features = waveforms[:, self.ind_peak, : ].copy()
        return features



#~ Parallel(n_jobs=n_jobs)(delayed(count_match_spikes)(sorting1.get_unit_spike_train(u1),
                                                                                  #~ s2_spiketrains, delta_frames) for
                                                      #~ i1, u1 in enumerate(unit1_ids))

#~ def get_pca_one_channel(wf_chan, chan, thresh, n_left, n_components_by_channel, params):
    #~ print(chan)
    #~ pca = sklearn.decomposition.IncrementalPCA(n_components=n_components_by_channel, **params)
    #~ wf_chan = waveforms[:,:,chan]
    #~ print(wf_chan.shape)
    #~ print(wf_chan[:, -n_left].shape)
    #~ keep = np.any((wf_chan>thresh) | (wf_chan<-thresh))
    #~ keep = (wf_chan[:, -n_left]>thresh) | (wf_chan[:, -n_left]<-thresh)

    #~ if keep.sum() >=n_components_by_channel:
        #~ pca.fit(wf_chan[keep, :])
        #~ return pca
    #~ else:
        #~ return None 
Example #15
Source File: utils.py    From contextualbandits with BSD 2-Clause "Simplified" License 6 votes vote down vote up
def partial_fit(self, X, y, classes=None):
        if self.partial_method == "gamma":
            w_all = -np.log(self
                            .random_state
                            .random(size=(X.shape[0], self.nsamples))
                            .clip(min=1e-12, max=None))
            appear_times = None
            rng = None
        elif self.partial_method == "poisson":
            w_all = None
            appear_times = self.random_state.poisson(1, size = (X.shape[0], self.nsamples))
            rng = np.arange(X.shape[0])
        else:
            raise ValueError(_unexpected_err_msg)
        Parallel(n_jobs=self.njobs, verbose=0, require="sharedmem")\
                (delayed(self._partial_fit_single)\
                    (sample, w_all, appear_times, rng, X, y) \
                        for sample in range(self.nsamples)) 
Example #16
Source File: preprocessor.py    From AutoSmart with GNU General Public License v3.0 6 votes vote down vote up
def fit(self,X):
        def func(ss):
            length = len(ss.unique())
            if length <= 1:
                return True
            else:
                return False
            
        df = X.data
        todo_cols = X.cat_cols + X.multi_cat_cols + X.num_cols + X.time_cols + X.binary_cols
        res = Parallel(n_jobs=CONSTANT.JOBS,require='sharedmem')(delayed(func)(df[col]) for col in todo_cols)
        
        drop_cols = []
        for col,unique in zip(todo_cols,res):
            if unique:
                drop_cols.append(col)
        
        self.drop_cols = drop_cols 
Example #17
Source File: atlas2.py    From ssbio with MIT License 6 votes vote down vote up
def build_strain_specific_models(self, joblib=False, cores=1, force_rerun=False):
        """Wrapper function for _build_strain_specific_model"""
        if len(self.df_orthology_matrix) == 0:
            raise RuntimeError('Empty orthology matrix, please calculate first!')
        ref_functional_genes = [g.id for g in self.reference_gempro.functional_genes]
        log.info('Building strain specific models...')
        if joblib:
            result = DictList(Parallel(n_jobs=cores)(delayed(self._build_strain_specific_model)(s, ref_functional_genes, self.df_orthology_matrix, force_rerun=force_rerun) for s in self.strain_ids))
        # if sc:
        #     strains_rdd = sc.parallelize(self.strain_ids)
        #     result = strains_rdd.map(self._build_strain_specific_model).collect()
        else:
            result = []
            for s in tqdm(self.strain_ids):
                result.append(self._build_strain_specific_model(s, ref_functional_genes, self.df_orthology_matrix, force_rerun=force_rerun))

        for strain_id, gp_noseqs_path in result:
            self.strain_infodict[strain_id]['gp_noseqs_path'] = gp_noseqs_path 
Example #18
Source File: graph.py    From AutoSmart with GNU General Public License v3.0 6 votes vote down vote up
def recognize_binary_col(self,data,cat_cols):
        def func(ss):
            ss = ss.unique()
            if len(ss) == 3:
                if pd.isna(ss).sum() == 1:
                    return True
            if len(ss) == 2:
                return True
            return False
        
        binary_cols = []
        
        res = Parallel(n_jobs=CONSTANT.JOBS,require='sharedmem')(delayed(func)(data[col]) for col in cat_cols)
        
        for col,is_binary in zip(cat_cols,res):
            if is_binary:
                binary_cols.append(col)
        
        return binary_cols 
Example #19
Source File: optim_par_L2M2019Ctrl_2D.py    From osim-rl with MIT License 5 votes vote down vote up
def f(self, v_params):
        self.n_gen += 1
        timeout_error = True
        error_count = 0
        while timeout_error:
            try:
                v_total_reward = Parallel(n_jobs=N_PROC, timeout=TIMEOUT)\
                (delayed(f_ind)(self.n_gen, i, p) for i, p in enumerate(v_params))
                timeout_error = False
            except Exception as e_msg:
                error_count += 1
                print('\ntimeout error (x{})!!!'.format(error_count))
                #print(e_msg)

        for total_reward in v_total_reward:
            if self.best_total_reward  < total_reward:
                filename = "./optim_data/cma/" + trial_name + "best_w.txt"
                print("\n")
                print("----")
                print("update the best score!!!!")
                print("\tprev = %.8f" % self.best_total_reward )
                print("\tcurr = %.8f" % total_reward)
                print("\tsave to [%s]" % filename)
                print("----")
                print("")
                self.best_total_reward  = total_reward
                np.savetxt(filename, params)

        return [-r for r in v_total_reward] 
Example #20
Source File: aligning-docs-by-interlinks-demo2.py    From comparable-text-miner with Apache License 2.0 5 votes vote down vote up
def main(argv):
	source_corpus_file = sys.argv[1]
	target_corpus_file = sys.argv[2]
	source_language = sys.argv[3]
	target_language = sys.argv[4]
	output_path = sys.argv[5]
	
	if not output_path.endswith('/'): output_path = output_path + '/'
	tp.check_dir(output_path) # if directory does not exist, then create
	
	logging.info( 'aligning %s and %s wikipeida documents using interlanguage links',  source_language, target_language)
	source_docs = tp.split_wikipedia_docs_into_array(source_corpus_file)
	logging.info( 'source corpus is loaded')
	target_docs = tp.split_wikipedia_docs_into_array(target_corpus_file)
	logging.info( 'target corpus is loaded ... start aligning ...')
	
	aligned_corpus = Parallel(n_jobs=3,verbose=100)(delayed(tp.aligning_doc_by_interlanguage_links)(d, target_docs, source_language, target_language, output_path) for d in source_docs)
	
	
	source_out = open(output_path +  source_language + '.wiki.txt', 'w') 
	target_out = open(output_path +  target_language + '.wiki.txt', 'w')
	
	for doc_pair in aligned_corpus:
		if doc_pair[0]: # if not None 
			text_out = doc_pair[0]
			print>>source_out, text_out.encode('utf-8')
			text_out = doc_pair[1]
			print>>target_out, text_out.encode('utf-8')
	
	

################################################################## 
Example #21
Source File: dataset.py    From stable-baselines with MIT License 5 votes vote down vote up
def _run(self):
        start = True
        with Parallel(n_jobs=self.n_workers, batch_size="auto", backend=self.backend) as parallel:
            while start or self.infinite_loop:
                start = False

                if self.shuffle:
                    np.random.shuffle(self.indices)

                for minibatch_idx in range(self.n_minibatches):

                    self.start_idx = minibatch_idx * self.batch_size

                    obs = self.observations[self._minibatch_indices]
                    if self.load_images:
                        if self.n_workers <= 1:
                            obs = [self._make_batch_element(image_path)
                                   for image_path in obs]

                        else:
                            obs = parallel(delayed(self._make_batch_element)(image_path)
                                           for image_path in obs)

                        obs = np.concatenate(obs, axis=0)

                    actions = self.actions[self._minibatch_indices]

                    self.queue.put((obs, actions))

                    # Free memory
                    del obs

                self.queue.put(None) 
Example #22
Source File: main.py    From lighter with MIT License 5 votes vote down vote up
def parse_services(filenames, canaryGroup=None, profiles=[]):
    # return [parse_service(filename) for filename in filenames]
    return Parallel(n_jobs=8, backend="threading")(delayed(parse_service)(filename, canaryGroup, profiles) for filename in filenames) if filenames else [] 
Example #23
Source File: preprocessor.py    From AutoSmart with GNU General Public License v3.0 5 votes vote down vote up
def transform(self,X):
        
        def func(ss,cats):
            codes = pd.Categorical(ss,categories=cats).codes
            codes = codes.astype('float16')
            codes[codes==-1] = np.nan
            
            return codes
        
        df = X.data
        todo_cols = X.binary_cols
        res = Parallel(n_jobs=CONSTANT.JOBS,require='sharedmem')(delayed(func)(df[col],self.col2cats[col]) for col in todo_cols)
        for col,codes in zip(todo_cols,res):
            df[col] = codes 
Example #24
Source File: preprocessor.py    From AutoSmart with GNU General Public License v3.0 5 votes vote down vote up
def fit(self,X):
        def func(ss):
            cats = pd.Categorical(ss).categories 
            return cats
        
        df = X.data
        todo_cols = X.binary_cols
        
        res = Parallel(n_jobs=CONSTANT.JOBS,require='sharedmem')(delayed(func)(df[col]) for col in todo_cols)
        for col,cats in zip(todo_cols,res):
            self.col2cats[col] = cats 
Example #25
Source File: mtag.py    From mtag with GNU General Public License v3.0 5 votes vote down vote up
def ss_estimation(args, betas, se, max_iter=1000, tol=1.0e-10,
                  starting_params =(0.5, 1.0e-3),
                  callback=False):
    '''
    Numerically fit the distribution of betas and standard errors to a spike slab distribution.

    Arguments:
    ----------
    betas: The Mx1 vector of betas
    se:    The Mx1 vector of standard errors. Must allign with the reported betas.
    max_iter: int,
            Maximum number of iterations
    tol:    float,
            Tolerance used in numerical optimization (for both fatol, xatol)

    starting_params: 2-tuple: (pi_0, tau_0)
            Starting parameters for optimization. Default is 0.5, 1.0e-3
    callback:       boolean ,default False
            If True, the parameters values will be printed at each step of optimization.
    '''
    M,T = betas.shape


    solver_opts = dict()
    solver_opts['maxiter'] = max_iter
    solver_opts['fatol'] = tol
    solver_opts['xatol'] = tol
    solver_opts['disp'] = True
    callback = cback_print if callback else None
    arg_list_ss = [(betas[:,t], se[:,t], starting_params, solver_opts) for t in range(T)]
    ss_results =  joblib.Parallel(n_jobs = args.cores,
                                          backend='multiprocessing',
                                          verbose=0,
                                          batch_size=1)(joblib.delayed(_optim_ss)(f_args) for f_args in arg_list_ss)
    return ss_results 
Example #26
Source File: WEASEL.py    From SFA_Python with GNU General Public License v3.0 5 votes vote down vote up
def createWORDS(self, samples, data = 'Train'):
        self.words = [None for _ in range(len(self.windowLengths))]
        Parallel(n_jobs=1, backend="threading")(delayed(self.createWords, check_pickle=False)(samples, w, data) for w in range(len(self.windowLengths)))
        return self.words 
Example #27
Source File: atlas2.py    From ssbio with MIT License 5 votes vote down vote up
def load_sequences_to_strains(self, joblib=False, cores=1, force_rerun=False):
        """Wrapper function for _load_sequences_to_strain"""
        log.info('Loading sequences to strain GEM-PROs...')
        if joblib:
            result = DictList(Parallel(n_jobs=cores)(delayed(self._load_sequences_to_strain)(s, force_rerun) for s in self.strain_ids))
        else:
            result = []
            for s in tqdm(self.strain_ids):
                result.append(self._load_sequences_to_strain(s, force_rerun))

        for strain_id, gp_seqs_path in result:
            self.strain_infodict[strain_id]['gp_seqs_path'] = gp_seqs_path 
Example #28
Source File: BOSSEnsembleClassifier.py    From SFA_Python with GNU General Public License v3.0 5 votes vote down vote up
def predict(self, models, samples, testing=False):
        uniqueLabels = np.unique(samples["Labels"])
        self.Label_Matrix = [[None for _ in range(len(models))] for _ in range(samples['Samples'])]
        predictedLabels = [None for _ in range(samples['Samples'])]

        Parallel(n_jobs=1, backend="threading")(delayed(self.predictIndividual, check_pickle=False)(models, samples, testing, i) for i in range(len(models)))

        maxCounts = [None for _ in range(samples['Samples'])]
        for i in range(len(self.Label_Matrix)):
            counts = {l:0 for l in uniqueLabels}
            for k in self.Label_Matrix[i]:
                if (k != None) and (list(k.keys())[0] != None):
                    label = list(k.keys())[0]
                    count = counts[label] if label in counts.keys() else 0
                    increment = list(k.values())[0] if self.ENSEMBLE_WEIGHTS else 1
                    count = increment if (count == None) else count + increment
                    counts[label] = count

            maxCount = -1
            for e in uniqueLabels: # counts.keys():
                if (predictedLabels[i] == None) or (maxCount < counts[e]) or (maxCount == counts[e]) and (predictedLabels[i] <= e):
                    maxCount = counts[e]
                    predictedLabels[i] = e

        correctTesting = 0
        for i in range(samples["Samples"]):
            correctTesting += 1 if samples[i].label == predictedLabels[i] else 0

        return Predictions(correctTesting, predictedLabels) 
Example #29
Source File: Preprocessing.py    From training_results_v0.6 with Apache License 2.0 5 votes vote down vote up
def write_data_csv(fname, frames, preproc):
   """Write data to csv file"""
   fdata = open(fname, "w")
   dr = Parallel()(delayed(get_data)(lst,preproc) for lst in frames)
   data,result = zip(*dr)
   for entry in data:
      fdata.write(','.join(entry)+'\r\n')
   print("All finished, %d slices in total" % len(data))
   fdata.close()
   result = np.ravel(result)
   return result 
Example #30
Source File: data_loader.py    From srl-zoo with MIT License 5 votes vote down vote up
def _run(self):
        start = True
        with Parallel(n_jobs=self.n_workers, batch_size="auto", backend="threading") as parallel:
            while start or self.infinite_loop:
                start = False
                if self.shuffle:
                    indices = np.random.permutation(self.n_minibatches).astype(np.int64)
                else:
                    indices = np.arange(len(self.minibatchlist), dtype=np.int64)

                for minibatch_idx in indices:
                    images = self.images_path[self.minibatchlist[minibatch_idx]]

                    if self.n_workers <= 1:
                        batch = [self._makeBatchElement(image_path) for image_path in images]
                    else:
                        batch = parallel(delayed(self._makeBatchElement)(image_path) for image_path in images)

                    batch = th.cat(batch, dim=0)

                    if self.no_targets:
                        self.queue.put(batch)
                    else:
                        # th.tensor creates a copy
                        self.queue.put((batch, th.tensor(self.targets[minibatch_idx])))

                    # Free memory
                    del batch

                self.queue.put(None)