Python multiprocessing.pool.Pool() Examples
The following are 30
code examples of multiprocessing.pool.Pool().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
multiprocessing.pool
, or try the search function
.
Example #1
Source File: generate.py From post--memorization-in-rnns with MIT License | 7 votes |
def save_tfrecord(filename, dataset, verbose=False): observations = len(dataset['length']) serialized = [] with Pool(processes=4) as pool: for serialized_string in tqdm(pool.imap( tfrecord_serializer, zip(dataset['length'], dataset['source'], dataset['target']), chunksize=10 ), total=observations, disable=not verbose): serialized.append(serialized_string) # Save seriealized dataset writer = tf.python_io.TFRecordWriter( filename, options=tf.python_io.TFRecordOptions( tf.python_io.TFRecordCompressionType.ZLIB ) ) for serialized_string in tqdm(serialized, disable=not verbose): writer.write(serialized_string) writer.close()
Example #2
Source File: toutiao_spider.py From capturer with MIT License | 6 votes |
def main(): print('\nWelcome here to get pictures from www.toutiao.com!') keyword = input('Please input your search keywords > ') count = None while count == None: number_str = input( 'Please input count of picture collection that you want(Divisible by 20 ) > ') try: count = int(number_str) except ValueError: print('Please input a valid number!') if count > 0: print('Getting %s pictures...' % keyword) page_num = count // 20 + (0 if count % 20 == 0 else 1) offset_list = [x * 20 for x in range(0, page_num)] pool = Pool() partial_getter = partial(get_images_of, keyword=keyword) pool.map(partial_getter, offset_list) pool.close() pool.join() else: print('Get Cancel!')
Example #3
Source File: cpdb_statistical_analysis_helper.py From cellphonedb with MIT License | 6 votes |
def shuffled_analysis(iterations: int, meta: pd.DataFrame, counts: pd.DataFrame, interactions: pd.DataFrame, cluster_interactions: list, base_result: pd.DataFrame, threads: int, separator: str, suffixes: tuple = ('_1', '_2'), counts_data: str = 'ensembl') -> list: """ Shuffles meta and calculates the means for each and saves it in a list. Runs it in a multiple threads to run it faster """ core_logger.info('Running Statistical Analysis') with Pool(processes=threads) as pool: statistical_analysis_thread = partial(_statistical_analysis, base_result, cluster_interactions, counts, interactions, meta, separator, suffixes, counts_data=counts_data ) results = pool.map(statistical_analysis_thread, range(iterations)) return results
Example #4
Source File: autocomplete.py From post--memorization-in-rnns with MIT License | 6 votes |
def save_tfrecord(filename, dataset, verbose=False): observations = len(dataset['length']) serialized = [] with Pool(processes=4) as pool: for serialized_string in tqdm(pool.imap( tfrecord_serializer, zip(dataset['length'], dataset['source'], dataset['target']), chunksize=10 ), total=observations, disable=not verbose): serialized.append(serialized_string) # Save seriealized dataset writer = tf.python_io.TFRecordWriter( filename, options=tf.python_io.TFRecordOptions( tf.python_io.TFRecordCompressionType.ZLIB ) ) for serialized_string in tqdm(serialized, disable=not verbose): writer.write(serialized_string) writer.close()
Example #5
Source File: level_iterator.py From safelife with Apache License 2.0 | 6 votes |
def fill_queue(self): if self.results is None: self.results = queue.deque(maxlen=self.max_queue) if self.num_workers > 0: if self.pool is None: self.pool = Pool(processes=self.num_workers) while len(self.results) < self.max_queue: if self.distinct_levels is not None and self.idx >= self.distinct_levels: break elif not self.repeat_levels and self.idx >= len(self.file_data): break else: data = self.get_next_parameters() if data is None: break self.idx += 1 kwargs = {'seed': self._seed.spawn(1)[0]} if self.num_workers > 0: result = self.pool.apply_async(_game_from_data, data, kwargs) else: result = _game_from_data(*data, **kwargs) self.results.append((data, result))
Example #6
Source File: F1_running_score.py From openseg.pytorch with MIT License | 6 votes |
def __init__(self, configer=None, num_classes=None, boundary_threshold=0.00088, num_proc=15): assert configer is not None or num_classes is not None self.configer = configer if configer is not None: self.n_classes = self.configer.get('data', 'num_classes') else: self.n_classes = num_classes self.ignore_index = -1 self.boundary_threshold = boundary_threshold self.pool = Pool(processes=num_proc) self.num_proc = num_proc self._Fpc = 0 self._Fc = 0 self.seg_map_cache = [] self.gt_map_cache = []
Example #7
Source File: category_vector.py From talkingdata-adtracking-fraud-detection with MIT License | 6 votes |
def create_features_from_path(self, train_path: str, test_path: str) -> Tuple[pd.DataFrame, pd.DataFrame]: column_pairs = self.get_column_pairs() col1s = [] col2s = [] latent_vectors = [] gc.collect() with Pool(4) as p: for col1, col2, latent_vector in p.map( partial(self.compute_latent_vectors, train_path=train_path, test_path=test_path), column_pairs): col1s.append(col1) col2s.append(col2) latent_vectors.append(latent_vector.astype(np.float32)) gc.collect() return self.get_feature(train_path, col1s, col2s, latent_vectors), \ self.get_feature(test_path, col1s, col2s, latent_vectors)
Example #8
Source File: engine.py From NoXss with MIT License | 6 votes |
def verify_async(case_list,coroutine): """ Verify used gevent lib :param case_list: :param coroutine: :return: """ from gevent import monkey monkey.patch_all() result = [] geventPool = pool.Pool(coroutine) tasks = [geventPool.spawn(Verify.request_and_verify, case) for case in case_list] gevent.joinall(tasks) for i in tasks: if i.value is not None: result.append(i.value) print_info('Total Verify-Case is: %s, %s error happened.' % (len(case_list), Verify.ERROR_COUNT)) return result
Example #9
Source File: engine.py From NoXss with MIT License | 6 votes |
def deduplicate(self, url_list): print 'Start to deduplicate for all urls.' filtered_path = self.file + '.filtered' if os.path.exists(filtered_path): print '%s has been filtered as %s.' % (self.file, filtered_path) with open(filtered_path)as f: filtered = f.read().split('\n') return filtered filtered = [] # result = map(filter, url_list) from multiprocessing import cpu_count from multiprocessing.pool import Pool p=Pool(cpu_count()) result=p.map(url_filter,url_list) for i in result: if isinstance(i, str): filtered.append(i) with open(filtered_path, 'w') as f: f.write('\n'.join(filtered)) print 'Saved filtered urls to %s.' % filtered_path return filtered
Example #10
Source File: generate_questions.py From rc-data with Apache License 2.0 | 6 votes |
def GenerateMode(corpus, context_token_limit): for dataset in datasets: print 'Generating questions for the %s set:' % dataset urls_filename = '%s/wayback_%s_urls.txt' % (corpus, dataset) urls = ReadUrls(urls_filename) p = Pool() question_context_lists = p.imap_unordered( GenerateMapper, izip(urls, repeat(corpus), repeat(context_token_limit))) progress_bar = ProgressBar(len(urls)) for question_context_list in question_context_lists: if question_context_list: for question_context in question_context_list: WriteQuestionContext(question_context, corpus, dataset) progress_bar.Increment()
Example #11
Source File: download_wheels.py From piqueserver with GNU General Public License v3.0 | 6 votes |
def main(): parser = argparse.ArgumentParser( description="Downloads wheels from AppVeyor") parser.add_argument("--buildid", "-id", type=int, default=None, help='AppVeyor build id') parser.add_argument("--pool", "-p", type=int, default=6, help="Multiprocess pool size") parser.add_argument("--dir", "-d", type=check_dir, default=getcwd(), help='Directory to download the files into.') options = parser.parse_args() download_urls = [] for job in get_jobs(options.buildid): download_urls += get_artifacts(job) # Download them in parallel pool = Pool(options.pool) for download_url in download_urls: pool.apply_async(download_file, args=(download_url, options.dir)) pool.close() pool.join() print("Done")
Example #12
Source File: download.py From kaggle-hpa with BSD 2-Clause "Simplified" License | 6 votes |
def main(): # Parameters process_num = 24 image_size = (512, 512) url = 'http://v18.proteinatlas.org/images/' csv_path = "data/HPAv18RBGY_wodpl.csv" save_dir = "data/raw/external" os.makedirs(save_dir, exist_ok=True) print('Parent process %s.' % os.getpid()) img_list = list(pd.read_csv(csv_path)['Id']) img_splits = np.array_split(img_list, process_num) assert sum([len(v) for v in img_splits]) == len(img_list) p = Pool(process_num) for i, split in enumerate(img_splits): p.apply_async( download, args=(str(i), list(split), url, save_dir, image_size) ) print('Waiting for all subprocesses done...') p.close() p.join() print('All subprocesses done.')
Example #13
Source File: gen_classes.py From zenpy with GNU General Public License v3.0 | 6 votes |
def process_specification_directory(glob_pattern, outfile_name, namespace, write_baseclass=True,): with open(os.path.join(options.out_path, outfile_name), 'w+') as out_file: paths = [p for p in glob.glob(os.path.join(options.spec_path, glob_pattern))] classes = list() func = functools.partial(process_file, namespace) with Pool() as pool: classes.extend(pool.map(func, paths)) print("Formatting...") formatted_code = FormatCode("\n".join(sorted(classes)))[0] if write_baseclass: header = BASE_CLASS else: header = "from zenpy.lib.api_objects import BaseObject\nimport dateutil.parser" out_file.write("\n\n\n".join((header, formatted_code)))
Example #14
Source File: feature_union.py From mercari-solution with MIT License | 6 votes |
def transform(self, X): """Transform X separately by each transformer, concatenate results. Parameters ---------- X : iterable or array-like, depending on transformers Input data to be transformed. Returns ------- X_t : array-like or sparse matrix, shape (n_samples, sum_n_components) hstack of results of transformers. sum_n_components is the sum of n_components (output dimension) over transformers. """ with Pool(self.n_jobs) as pool: Xs = pool.starmap(_transform_one, ((trans, weight, X[trans.steps[0][1].columns]) for name, trans, weight in self._iter())) if not Xs: # All transformers are None return np.zeros((X.shape[0], 0)) if any(sparse.issparse(f) for f in Xs): Xs = sparse.hstack(Xs).tocsr() else: Xs = np.hstack(Xs) return Xs
Example #15
Source File: base_worker.py From mycroft with MIT License | 6 votes |
def _schedule_runs_lk(self, et_pool, job): """ Schedule runs to execute up to max possible parallelism suffix '_lk' means caller must already hold lock. :param et_pool: A multiprocessor pool handle :type: Pool :param job: current job :type: WorkerJob """ while (self._has_more_runs_to_schedule(job) and job.runs_in_flight < job.max_runs_in_flight): run = job.schedule_next_run() if run.id is None: raise ValueError("Unexpected end of runs") self.etl_helper.etl_step_started(job.msg_dict, run.id, run.step) log('scheduled: {0}'.format(run.id)) et_pool.apply_async( run.func, args=run.args, callback=self._create_run_complete_callback(job, run.id, run.step), ) job.runs_in_flight += 1
Example #16
Source File: feature_union.py From mercari-solution with MIT License | 6 votes |
def fit(self, X, y=None): """Fit all transformers using X. Parameters ---------- X : iterable or array-like, depending on transformers Input data, used to fit transformers. y : array-like, shape (n_samples, ...), optional Targets for supervised learning. Returns ------- self : FeatureUnion This estimator """ self.transformer_list = list(self.transformer_list) self._validate_transformers() with Pool(self.n_jobs) as pool: transformers = pool.starmap(_fit_one_transformer, ((trans, X[trans.steps[0][1].columns], y) for _, trans, _ in self._iter())) self._update_transformer_list(transformers) return self
Example #17
Source File: metric.py From dsb2018_topcoders with MIT License | 6 votes |
def calculate_cell_score_selim(y_true, y_pred, num_threads=32, ids=None): yps = [] for m in range(len(y_true)): yps.append((y_true[m].copy(), y_pred[m].copy())) pool = Pool(num_threads) results = pool.map(calculate_jaccard, yps) if ids: import pandas as pd s_iou = np.argsort(results) d = [] for i in range(len(s_iou)): id = ids[s_iou[i]] res = results[s_iou[i]] d.append([id, res]) pd.DataFrame(d, columns=["ID", "METRIC_SCORE"]).to_csv("gt_vs_oof.csv", index=False) return np.array(results).mean()
Example #18
Source File: __init__.py From Splunking-Crime with GNU Affero General Public License v3.0 | 5 votes |
def Pool(processes=None, initializer=None, initargs=(), maxtasksperchild=None): ''' Returns a process pool object ''' from multiprocessing.pool import Pool return Pool(processes, initializer, initargs, maxtasksperchild)
Example #19
Source File: metric.py From dsb2018_topcoders with MIT License | 5 votes |
def calculate_cell_score_kaggle(y_true, y_pred, num_threads=32): yps = [] for m in range(len(y_true)): yps.append((y_true[m].copy(), y_pred[m].copy())) pool = Pool(num_threads) results = pool.map(score_kaggle, yps) return np.mean(results)
Example #20
Source File: generate_questions.py From rc-data with Apache License 2.0 | 5 votes |
def StoreMode(corpus): for dataset in datasets: print 'Storing news stories for the %s set:' % dataset urls_filename = '%s/wayback_%s_urls.txt' % (corpus, dataset) urls = ReadUrls(urls_filename) p = Pool() stories = p.imap_unordered(StoreMapper, izip(urls, repeat(corpus))) progress_bar = ProgressBar(len(urls)) for story in stories: if story: WriteStory(story, corpus) progress_bar.Increment()
Example #21
Source File: tasks.py From agents-aea with Apache License 2.0 | 5 votes |
def _stop_pool(self) -> None: """ Stop internal task pool. :return: None """ if not self._pool: logger.debug("Pool is not started!.") return self._pool = cast(Pool, self._pool) self._pool.terminate() self._pool.join() self._pool = None
Example #22
Source File: tasks.py From agents-aea with Apache License 2.0 | 5 votes |
def _start_pool(self) -> None: """ Start internal task pool. Only one pool will be created. :return: None """ if self._pool: logger.debug("Pool was already started!.") return self._pool = Pool(self._nb_workers, initializer=init_worker)
Example #23
Source File: tasks.py From agents-aea with Apache License 2.0 | 5 votes |
def enqueue_task( self, func: Callable, args: Sequence = (), kwds: Optional[Dict[str, Any]] = None ) -> int: """ Enqueue a task with the executor. :param func: the callable instance to be enqueued :param args: the positional arguments to be passed to the function. :param kwds: the keyword arguments to be passed to the function. :return the task id to get the the result. :raises ValueError: if the task manager is not running. """ with self._lock: if self._stopped: raise ValueError("Task manager not running.") if not self._pool and self._is_lazy_pool_start: self._start_pool() self._pool = cast(Pool, self._pool) task_id = self._task_enqueued_counter self._task_enqueued_counter += 1 async_result = self._pool.apply_async( func, args=args, kwds=kwds if kwds is not None else {} ) self._results_by_task_id[task_id] = async_result return task_id
Example #24
Source File: tasks.py From agents-aea with Apache License 2.0 | 5 votes |
def __init__(self, nb_workers: int = 1, is_lazy_pool_start: bool = True): """ Initialize the task manager. :param nb_workers: the number of worker processes. :param is_lazy_pool_start: option to postpone pool creation till the first enqueue_task called. """ self._nb_workers = nb_workers self._is_lazy_pool_start = is_lazy_pool_start self._pool = None # type: Optional[Pool] self._stopped = True self._lock = threading.Lock() self._task_enqueued_counter = 0 self._results_by_task_id = {} # type: Dict[int, Any]
Example #25
Source File: reposcan.py From vmaas with GNU General Public License v2.0 | 5 votes |
def cancel(cls): """Terminate the process pool.""" with disabled_signals(): cls.workers.terminate() cls.workers.join() cls.workers = Pool(1) cls.finish()
Example #26
Source File: tilt_angle_estimation.py From aitom with GNU General Public License v3.0 | 5 votes |
def scan(tilt_angle_scan_range, v_abs, n_proc=0, n_chunk=20): from multiprocessing.pool import Pool pool = Pool(processes=n_proc) pool_results = [] tasks = [] for ang1, ang2, tilt_axis, light_axis in itertools.product( range(-tilt_angle_scan_range[1], -tilt_angle_scan_range[0] + 1), range(tilt_angle_scan_range[0], tilt_angle_scan_range[1] + 1), range(3), range(3)): if tilt_axis == light_axis: continue tasks.append({'ang1': ang1, 'ang2': ang2, 'tilt_axis': tilt_axis, 'light_axis': light_axis}) while tasks: # wedge_mask_cor(v_abs=v, ops=tasks[:n_chunk]) pool_results.append(pool.apply_async(func=wedge_mask_cor, kwds={'v_abs': v_abs, 'ops': tasks[:n_chunk]})) tasks = tasks[n_chunk:] best = None for re in pool_results: for r in re.get(9999999): print ('\r', r['ang1'], r['ang2'], r['tilt_axis'], r['light_axis'], r['cor'], ' ', )# print this info so that we know the scanning is inside correct range, by looking at updated examples sys.stdout.flush() if best is None: best = r continue if r['cor'] > best['cor']: best = r assert best is not None return best
Example #27
Source File: util.py From aitom with GNU General Public License v3.0 | 5 votes |
def align_to_templates(self, rec=None, segmentation_tg_op=None, tem_keys=None, template_wedge_cutoff=0.1, align_op=None, multiprocessing=False): vi = None if align_op['with_missing_wedge']: v = self.cache.get_mrc(rec['subtomogram']) vm = self.cache.get_mrc(rec['mask']) else: raise Exception('following options are need to be doube checked') if ('template' not in rec): rec['template'] = None vi = impute_vol_keys(vk=rec, ang=rec['angle'], loc=rec['loc'], tk=rec['template'], align_to_template=False, normalize=True, cache=self.cache)['vi'] if ((segmentation_tg_op is not None) and ('template' in rec) and ('segmentation' in rec['template'])): v = align_to_templates__segment(rec=rec, v=v, segmentation_tg_op=segmentation_tg_op)['v'] if multiprocessing: if (self.pool is None): self.pool = Pool() pool_results = [self.pool.apply_async(func=align_to_templates__pair_align, kwds={'c': c, 't_key': tem_keys[c], 'v': v, 'vm': vm, 'align_op': align_op, }) for c in tem_keys] align_re = {} for r in pool_results: at_re = r.get(999999) c = at_re['c'] align_re[c] = at_re if N.isnan(align_re[c]['score']): if (self.logger is not None): self.logger.warning('alignment failed: rec %s, template %s, error %s ', repr(rec), repr(tem_keys[c]), repr(align_re[c]['err'])) self.pool.close() self.pool = None else: align_re = {} for c in tem_keys: if self.work_queue.done_tasks_contains(self.task.task_id): raise Exception('Duplicated task') align_re[c] = align_to_templates__pair_align(c=c, t_key=tem_keys[c], v=v, vm=vm, align_op=align_op) if N.isnan(align_re[c]['score']): if (self.logger is not None): self.logger.warning('alignment failed: rec %s, template %s, error %s ', repr(rec), repr(tem_keys[c]), repr(align_re[c]['err'])) return {'vol_key': rec, 'align': align_re, }
Example #28
Source File: util.py From aitom with GNU General Public License v3.0 | 5 votes |
def template_segmentation(self, tk, op, multiprocessing=False): if multiprocessing: if (self.pool is None): self.pool = Pool() pool_results = [self.pool.apply_async(func=template_segmentation__single, kwds={'c': c, 'tk': tk[c], 'op': op, }) for c in tk] for r in pool_results: r = r.get(999999) tk[r['c']] = r['tk'] self.pool.close() self.pool = None else: for c in tk: r = template_segmentation__single(c=c, tk=copy.deepcopy(tk[c]), op=copy.deepcopy(op)) tk[r['c']] = r['tk']
Example #29
Source File: multiprocessing.py From python-pool-performance with MIT License | 5 votes |
def init_pool(self, worker_count): return Pool(worker_count)
Example #30
Source File: dataset_utils.py From ACDC2017 with Apache License 2.0 | 5 votes |
def run_preprocessing(folder="/media/fabian/My Book/datasets/ACDC/training/", folder_out = "/media/fabian/DeepLearningData/datasets/ACDC_forReal_orig_Z/", keep_z_spacing=True): patient_info = generate_patient_info(folder) if not os.path.isdir(folder_out): os.mkdir(folder_out) with open(os.path.join(folder_out, "patient_info.pkl"), 'w') as f: cPickle.dump(patient_info, f) # beware of z spacing!!! see process_patient for more info! ids = range(101) p = pool.Pool(8) p.map(process_patient, zip(ids, [patient_info]*101, [folder]*101, [folder_out]*101, [keep_z_spacing]*101)) p.close() p.join()