Python multiprocessing.Pool() Examples
The following are 30
code examples of multiprocessing.Pool().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
multiprocessing
, or try the search function
.
Example #1
Source File: base.py From django-anonymizer with MIT License | 6 votes |
def run(self, chunksize=2000, parallel=4): self.validate() if not self.replacers: return chunks = self.get_queryset_chunk_iterator(chunksize) if parallel == 0: for objs in chunks: _run(self, objs) else: connection.close() pool = Pool(processes=parallel) futures = [pool.apply_async(_run, (self, objs)) for objs in chunks] for future in futures: future.get() pool.close() pool.join()
Example #2
Source File: util.py From neural-fingerprinting with BSD 3-Clause "New" or "Revised" License | 6 votes |
def score_samples(kdes, samples, preds, n_jobs=None): """ TODO :param kdes: :param samples: :param preds: :param n_jobs: :return: """ if n_jobs is not None: p = mp.Pool(n_jobs) else: p = mp.Pool() results = np.asarray( p.map( score_point, [(x, kdes[i]) for x, i in zip(samples, preds)] ) ) p.close() p.join() return results
Example #3
Source File: parallel.py From CAMISIM with Apache License 2.0 | 6 votes |
def add_tasks(thread_task_list, identifier=None): """ Execute several functions (threads, processes) in parallel. @type thread_task_list: list of TaskThread @return: a list of respective return values """ assert isinstance(thread_task_list, list) if identifier is None: identifier = len(AsyncParallel.task_handler_list) # creates a pool of workers, add all tasks to the pool if AsyncParallel.pool is None: AsyncParallel.pool = mp.Pool(processes=AsyncParallel.max_processes) if identifier not in AsyncParallel.task_handler_list: AsyncParallel.task_handler_list[identifier] = [] for task in thread_task_list: assert isinstance(task, TaskThread) AsyncParallel.task_handler_list[identifier].append(AsyncParallel.pool.apply_async(task.fun, task.args)) return identifier
Example #4
Source File: get_recipes.py From recipe-box with MIT License | 6 votes |
def scrape_recipe_box(scraper, site_str, page_iter, status_interval=50): if args.append: recipes = quick_load(site_str) else: recipes = {} start = time.time() if args.multi: pool = Pool(cpu_count() * 2) results = pool.map(scraper, page_iter) for r in results: recipes.update(r) else: for i in page_iter: recipes.update(scraper(i)) if i % status_interval == 0: print('Scraping page {} of {}'.format(i, max(page_iter))) quick_save(site_str, recipes) time.sleep(args.sleep) print('Scraped {} recipes from {} in {:.0f} minutes'.format( len(recipes), site_str, (time.time() - start) / 60)) quick_save(site_str, recipes)
Example #5
Source File: separator.py From spleeter with MIT License | 6 votes |
def __init__(self, params_descriptor, MWF=False, stft_backend="auto", multiprocess=True): """ Default constructor. :param params_descriptor: Descriptor for TF params to be used. :param MWF: (Optional) True if MWF should be used, False otherwise. """ self._params = load_configuration(params_descriptor) self._sample_rate = self._params['sample_rate'] self._MWF = MWF self._tf_graph = tf.Graph() self._predictor = None self._input_provider = None self._builder = None self._features = None self._session = None self._pool = Pool() if multiprocess else None self._tasks = [] self._params["stft_backend"] = get_backend(stft_backend)
Example #6
Source File: translate.py From flores with Creative Commons Attribution Share Alike 4.0 International | 6 votes |
def translate_files_local(args, cmds): m = mp.Manager() gpu_queue = m.Queue() for i in args.cuda_visible_device_ids: gpu_queue.put(i) with mp.Pool(processes=len(args.cuda_visible_device_ids)) as pool: for _ in tqdm.tqdm(pool.imap_unordered(translate, [(gpu_queue, cmd) for cmd in cmds]), total=len(cmds)): pass
Example #7
Source File: get_twitter_user_data_parallel.py From twitterscraper with MIT License | 6 votes |
def main(args): users = [] for arg in args: users.append(arg) pool_size = len(users) if pool_size < 8: pool = Pool(pool_size) else: pool = Pool(8) for user in pool.map(get_user_info, users): twitter_user_info.append(user) cols = ['id', 'fullname', 'date_joined', 'location', 'blog', 'num_tweets', 'following', 'followers', 'likes', 'lists'] data_frame = pd.DataFrame(twitter_user_info, index=users, columns=cols) data_frame.index.name = "Users" data_frame.sort_values(by="followers", ascending=False, inplace=True, kind='quicksort', na_position='last') display(data_frame)
Example #8
Source File: get_twitter_user_data.py From twitterscraper with MIT License | 6 votes |
def main(): start = time.time() users = ['Carlos_F_Enguix', 'mmtung', 'dremio', 'MongoDB', 'JenWike', 'timberners_lee','ataspinar2', 'realDonaldTrump', 'BarackObama', 'elonmusk', 'BillGates', 'BillClinton','katyperry','KimKardashian'] pool = Pool(8) for user in pool.map(get_user_info,users): twitter_user_info.append(user) cols=['id','fullname','date_joined','location','blog', 'num_tweets','following','followers','likes','lists'] data_frame = pd.DataFrame(twitter_user_info, index=users, columns=cols) data_frame.index.name = "Users" data_frame.sort_values(by="followers", ascending=False, inplace=True, kind='quicksort', na_position='last') elapsed = time.time() - start print(f"Elapsed time: {elapsed}") display(data_frame)
Example #9
Source File: utils.py From PathCon with MIT License | 6 votes |
def get_params_for_mp(n_triples): n_cores = mp.cpu_count() pool = mp.Pool(n_cores) avg = n_triples // n_cores range_list = [] start = 0 for i in range(n_cores): num = avg + 1 if i < n_triples - avg * n_cores else avg range_list.append([start, start + num]) start += num return n_cores, pool, range_list # input: [(h1, {t1, t2 ...}), (h2, {t3 ...}), ...] # output: {(h1, t1): paths, (h1, t2): paths, (h2, t3): paths, ...}
Example #10
Source File: estimator.py From EDeN with MIT License | 6 votes |
def model_selection(self, graphs, targets, n_iter=30, subsample_size=None): """model_selection_randomized.""" param_distr = {"r": list(range(1, 5)), "d": list(range(0, 10))} if subsample_size: graphs, targets = subsample( graphs, targets, subsample_size=subsample_size) pool = mp.Pool() scores = pool.map(_eval, [(graphs, targets, param_distr)] * n_iter) pool.close() pool.join() best_params = max(scores)[1] logger.debug("Best parameters:\n%s" % (best_params)) self = EdenEstimator(**best_params) return self
Example #11
Source File: generate_toys.py From medicaldetectiontoolkit with Apache License 2.0 | 6 votes |
def generate_experiment(exp_name, n_train_images, n_test_images, mode, class_diameters=(20, 20)): train_dir = os.path.join(cf.root_dir, exp_name, 'train') test_dir = os.path.join(cf.root_dir, exp_name, 'test') if not os.path.exists(train_dir): os.makedirs(train_dir) if not os.path.exists(test_dir): os.makedirs(test_dir) # enforced distance between object center and image edge. foreground_margin = np.max(class_diameters) // 2 info = [] info += [[train_dir, six, foreground_margin, class_diameters, mode] for six in range(n_train_images)] info += [[test_dir, six, foreground_margin, class_diameters, mode] for six in range(n_test_images)] print('starting creating {} images'.format(len(info))) pool = Pool(processes=12) pool.map(multi_processing_create_image, info, chunksize=1) pool.close() pool.join() aggregate_meta_info(train_dir) aggregate_meta_info(test_dir)
Example #12
Source File: evolve-feedforward-parallel.py From neat-python with BSD 3-Clause "New" or "Revised" License | 6 votes |
def eval_genome(genome, config): """ This function will be run in parallel by ParallelEvaluator. It takes two arguments (a single genome and the genome class configuration data) and should return one float (that genome's fitness). Note that this function needs to be in module scope for multiprocessing.Pool (which is what ParallelEvaluator uses) to find it. Because of this, make sure you check for __main__ before executing any code (as we do here in the last few lines in the file), otherwise you'll have made a fork bomb instead of a neuroevolution demo. :) """ net = neat.nn.FeedForwardNetwork.create(genome, config) error = 4.0 for xi, xo in zip(xor_inputs, xor_outputs): output = net.activate(xi) error -= (output[0] - xo[0]) ** 2 return error
Example #13
Source File: securityheader.py From securityheaders with Apache License 2.0 | 6 votes |
def check_headers_parallel(self, urls, options=None, callback=None): if not options: options= self.options.result() if Pool: results = [] freeze_support() pool = Pool(processes=100) for url in urls: result = pool.apply_async(self.check_headers, args=(url, options.get('redirects'), options), callback=callback) results.append(result) pool.close() pool.join() return results else: raise Exception('no parallelism supported')
Example #14
Source File: t2t_datagen.py From fine-lm with MIT License | 6 votes |
def generate_data_for_registered_problem(problem_name): """Generate data for a registered problem.""" tf.logging.info("Generating data for %s.", problem_name) if FLAGS.num_shards: raise ValueError("--num_shards should not be set for registered Problem.") problem = registry.problem(problem_name) task_id = None if FLAGS.task_id < 0 else FLAGS.task_id data_dir = os.path.expanduser(FLAGS.data_dir) tmp_dir = os.path.expanduser(FLAGS.tmp_dir) if task_id is None and problem.multiprocess_generate: if FLAGS.task_id_start != -1: assert FLAGS.task_id_end != -1 task_id_start = FLAGS.task_id_start task_id_end = FLAGS.task_id_end else: task_id_start = 0 task_id_end = problem.num_generate_tasks pool = multiprocessing.Pool(processes=FLAGS.num_concurrent_processes) problem.prepare_to_generate(data_dir, tmp_dir) args = [(problem_name, data_dir, tmp_dir, task_id) for task_id in range(task_id_start, task_id_end)] pool.map(generate_data_in_process, args) else: problem.generate_data(data_dir, tmp_dir, task_id)
Example #15
Source File: oracle.py From pwnypack with MIT License | 5 votes |
def padding_oracle_encrypt(oracle, plaintext, block_size=128, pool=None): """ Encrypt plaintext using an oracle function that returns ``True`` if the provided ciphertext is correctly PKCS#7 padded after decryption. The cipher needs to operate in CBC mode. Args: oracle(callable): The oracle function. Will be called repeatedly with a chunk of ciphertext. plaintext(bytes): The plaintext data to encrypt. block_size(int): The cipher's block size in bits. pool(multiprocessing.Pool): A multiprocessing pool to use to parallelize the encryption. This pool is used to call the oracle function. Fairly heavy due to the required inter-process state synchronization. If ``None`` (the default), no multiprocessing will be used. Returns: bytes: The encrypted data. Raises: RuntimeError: Raised if the oracle behaves unpredictable. """ plaintext = bytearray(plaintext) block_len = block_size // 8 padding_len = block_len - (len(plaintext) % block_len) plaintext.extend([padding_len] * padding_len) ciphertext = bytearray() chunk = bytearray(os.urandom(block_len)) ciphertext[0:0] = chunk for plain_start in range(len(plaintext) - block_len, -1, -block_len): plain = plaintext[plain_start:plain_start + block_len] chunk = ciphertext[0:0] = encrypt_block(oracle, block_len, chunk, plain, pool) return bytes(ciphertext)
Example #16
Source File: test_parallel.py From pymoo with Apache License 2.0 | 5 votes |
def test_evaluation_with_multiprocessing_process_pool_starmap(self): X, F = self.get_data() with multiprocessing.Pool() as pool: _F = MyProblemElementwise(parallelization = ("starmap", pool.starmap)).evaluate(X) self.assertTrue(np.all(np.abs(_F - F) < 0.00001))
Example #17
Source File: process_pool.py From misp42splunk with GNU Lesser General Public License v3.0 | 5 votes |
def __init__(self, size=0, maxtasksperchild=10000): if size <= 0: size = multiprocessing.cpu_count() self.size = size self._pool = multiprocessing.Pool(processes=size, maxtasksperchild=maxtasksperchild) self._stopped = False
Example #18
Source File: base.py From errand-boy with BSD 3-Clause "New" or "Revised" License | 5 votes |
def run_server(self, pool_size=10, max_accepts=5000, max_child_tasks=100): setproctitle('errand-boy master process') serverconnection = self.server_get_connection() logger.info('Accepting connections: {}'.format(self.connection_to_string(serverconnection))) logger.info('pool_size: {}'.format(pool_size)) logger.info('max_accepts: {}'.format(max_accepts)) logger.info('max_child_tasks: {}'.format(max_child_tasks)) pool = multiprocessing.Pool(pool_size, worker_init, tuple(), max_child_tasks) connections = [] remaining_accepts = max_accepts if not remaining_accepts: remaining_accepts = True try: while remaining_accepts: connection = self.server_accept(serverconnection) logger.info('Accepted connection from: {}'.format(self.connection_to_string(connection))) result = pool.apply_async(worker, [self, self.server_serialize_connection(connection)]) connection = None if remaining_accepts is not True: remaining_accepts -= 1 except KeyboardInterrupt: logger.info('Received KeyboardInterrupt') pool.terminate() except Exception as e: logger.exception(e) pool.terminate() raise finally: pool.close() pool.join()
Example #19
Source File: evolve.py From neat-python with BSD 3-Clause "New" or "Revised" License | 5 votes |
def __init__(self): self.pool = None if NUM_CORES < 2 else multiprocessing.Pool(NUM_CORES) self.test_episodes = [] self.generation = 0 self.min_reward = -200 self.max_reward = 200 self.episode_score = [] self.episode_length = []
Example #20
Source File: evolve_interactive.py From neat-python with BSD 3-Clause "New" or "Revised" License | 5 votes |
def __init__(self, thumb_width, thumb_height, full_width, full_height, window_width, window_height, scheme, num_workers): """ :param thumb_width: Width of preview image :param thumb_height: Height of preview image :param full_width: Width of full rendered image :param full_height: Height of full rendered image :param window_width: Width of the view window :param window_height: Height of the view window :param scheme: Image type to generate: mono, gray, or color """ self.generation = 0 self.thumb_width = thumb_width self.thumb_height = thumb_height self.full_width = full_width self.full_height = full_height self.window_width = window_width self.window_height = window_height assert scheme in ('mono', 'gray', 'color') self.scheme = scheme # Compute the number of thumbnails we can show in the viewer window, while # leaving one row to handle minor variations in the population size. self.num_cols = int(math.floor((window_width - 16) / (thumb_width + 4))) self.num_rows = int(math.floor((window_height - 16) / (thumb_height + 4))) self.pool = Pool(num_workers)
Example #21
Source File: evolve_novelty.py From neat-python with BSD 3-Clause "New" or "Revised" License | 5 votes |
def __init__(self, num_workers, scheme): self.num_workers = num_workers self.scheme = scheme self.pool = Pool(num_workers) self.archive = [] self.out_index = 1
Example #22
Source File: rouge_tensor.py From TransferRL with MIT License | 5 votes |
def rouge_l_sentence_level(eval_sentences, ref_sentences): """Computes ROUGE-L (sentence level) of two collections of sentences. Source: https://www.microsoft.com/en-us/research/publication/ rouge-a-package-for-automatic-evaluation-of-summaries/ Calculated according to: R_lcs = LCS(X,Y)/m P_lcs = LCS(X,Y)/n F_lcs = ((1 + beta^2)*R_lcs*P_lcs) / (R_lcs + (beta^2) * P_lcs) where: X = reference summary Y = Candidate summary m = length of reference summary n = length of candidate summary Args: eval_sentences: The sentences that have been picked by the summarizer ref_sentences: The sentences from the reference set Returns: A float: F_lcs """ ''' _t = time.time() pool = Pool(cpu_count()) params = [(eval_sentence, ref_sentence) for eval_sentence, ref_sentence in zip(eval_sentences, ref_sentences)] f1_scores = list(pool.imap(_run, params)) pool.close() ''' f1_scores = [] for eval_sentence, ref_sentence in zip(eval_sentences, ref_sentences): m = len(ref_sentence) n = len(eval_sentence) lcs = _len_lcs(eval_sentence, ref_sentence) f1_scores.append(_f_lcs(lcs, m, n)) return np.array(f1_scores).astype(np.float32)
Example #23
Source File: parallel.py From neat-python with BSD 3-Clause "New" or "Revised" License | 5 votes |
def __init__(self, num_workers, eval_function, timeout=None): """ eval_function should take one argument, a tuple of (genome object, config object), and return a single float (the genome's fitness). """ self.num_workers = num_workers self.eval_function = eval_function self.timeout = timeout self.pool = Pool(num_workers)
Example #24
Source File: text.py From TaskBot with GNU General Public License v3.0 | 5 votes |
def cut(x, n_job=None, join=False): """ 分词功能,接收一个字符串,返回分词结果 :param x: list of sentence or sentence :param n_job: :param join: return " ".join() result 如果为True,将分词结果合成一个字符串;否则,每个分词为一个字符串 :return: """ assert isinstance(x, str) or isinstance(x, list) if isinstance(x, str): # logger.info("String input, user 1 cpu core") if join: return _cut2str(x) else: return _cut2list(x) if n_job: n_job = min(CPU, n_job) # logger.info("%d Sentences input, Use %d cpu core " % (len(x), n_job)) pool = mp.Pool(n_job) if join: rst = pool.map(_cut2str, x) else: rst = pool.map(_cut2list, x) pool.close() pool.join() else: if join: rst = [_cut2str(i) for i in x] else: rst = [_cut2list(i) for i in x] return rst
Example #25
Source File: extractor.py From firmanal with MIT License | 5 votes |
def __init__(self, indir, outdir=None, rootfs=True, kernel=True, numproc=True, server=None, brand=None): # Input firmware update file or directory self._input = os.path.abspath(indir) # Output firmware directory self.output_dir = os.path.abspath(outdir) if outdir else None # Whether to attempt to extract kernel self.do_kernel = kernel # Whether to attempt to extract root filesystem self.do_rootfs = rootfs # Brand of the firmware self.brand = brand # Hostname of SQL server self.database = server # Worker pool. self._pool = multiprocessing.Pool() if numproc else None # Set containing MD5 checksums of visited items self.visited = set() # List containing tagged items to extract as 2-tuple: (tag [e.g. MD5], # path) self._list = list()
Example #26
Source File: datasets.py From argus-freesound with MIT License | 5 votes |
def get_folds_data(corrections=None): print("Start generate folds data") print("Audio config", get_audio_config()) train_folds_df = pd.read_csv(config.train_folds_path) audio_paths_lst = [] targets_lst = [] folds_lst = [] for i, row in train_folds_df.iterrows(): labels = row.labels if corrections is not None: if row.fname in corrections: action = corrections[row.fname] if action == 'remove': print(f"Skip {row.fname}") continue else: print(f"Replace labels {row.fname} from {labels} to {action}") labels = action folds_lst.append(row.fold) audio_paths_lst.append(row.file_path) target = torch.zeros(len(config.classes)) for label in labels.split(','): target[config.class2index[label]] = 1. targets_lst.append(target) with mp.Pool(N_WORKERS) as pool: images_lst = pool.map(read_as_melspectrogram, audio_paths_lst) return images_lst, targets_lst, folds_lst
Example #27
Source File: import_train_images.py From L3C-PyTorch with GNU General Public License v3.0 | 5 votes |
def process_all_in(self, input_dir): images_dl = iter_images(input_dir) # generator of paths # files this job should compress files_of_job = [p for _, p in job_enumerate(images_dl)] # files that were compressed already by somebody (i.e. this job earlier) processed_already = self.images_cleaned | self.images_discarded # resulting files to be compressed files_of_job = [p for p in files_of_job if get_fn(p) not in processed_already] N = len(files_of_job) if N == 0: print('Everything processed / nothing to process.') return num_process = 2 if NUM_TASKS > 1 else _NUM_PROCESSES print(f'Processing {N} images using {num_process} processes in {NUM_TASKS} tasks...') start = time.time() predicted_time = None with multiprocessing.Pool(processes=num_process) as pool: for i, clean in enumerate(pool.imap_unordered(self.process, files_of_job)): if i > 0 and i % 100 == 0: time_per_img = (time.time() - start) / (i + 1) time_remaining = time_per_img * (N - i) if not predicted_time: predicted_time = time_remaining print(f'\r{time_per_img:.2e} s/img | ' f'{i / N * 100:.1f}% | ' f'{time_remaining / 60:.1f} min remaining', end='', flush=True)
Example #28
Source File: alignproc.py From svviz with MIT License | 5 votes |
def multimap(namesToReferences, seqs): if not hasattr(multimap, "pool"): multimap.pool = multiprocessing.Pool(processes=misc.cpu_count_physical()) pool = multimap.pool results = {} results = dict(pool.map_async(remaps, [(namesToReferences, seq) for seq in seqs]).get(999999)) # results = dict(map(remaps, [(namesToReferences, seq) for seq in seqs])) return results
Example #29
Source File: dataloader_utils.py From medicaldetectiontoolkit with Apache License 2.0 | 5 votes |
def unpack_dataset(folder, threads=8): case_identifiers = get_case_identifiers(folder) p = Pool(threads) npz_files = [os.path.join(folder, i + ".npz") for i in case_identifiers] p.map(convert_to_npy, npz_files) p.close() p.join()
Example #30
Source File: pack_dataset.py From medicaldetectiontoolkit with Apache License 2.0 | 5 votes |
def unpack_dataset(folder, threads=8): case_identifiers = get_case_identifiers(folder) p = Pool(threads) npz_files = [os.path.join(folder, i + ".npz") for i in case_identifiers] p.map(convert_to_npy, npz_files) p.close() p.join()