Python multiprocessing.Pool() Examples

The following are 30 code examples of multiprocessing.Pool(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module multiprocessing , or try the search function .
Example #1
Source File: base.py    From django-anonymizer with MIT License 6 votes vote down vote up
def run(self, chunksize=2000, parallel=4):
        self.validate()

        if not self.replacers:
            return

        chunks = self.get_queryset_chunk_iterator(chunksize)

        if parallel == 0:
            for objs in chunks:
                _run(self, objs)
        else:
            connection.close()
            pool = Pool(processes=parallel)
            futures = [pool.apply_async(_run, (self, objs))
                       for objs in chunks]
            for future in futures:
                future.get()
            pool.close()
            pool.join() 
Example #2
Source File: util.py    From neural-fingerprinting with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def score_samples(kdes, samples, preds, n_jobs=None):
    """
    TODO
    :param kdes:
    :param samples:
    :param preds:
    :param n_jobs:
    :return:
    """
    if n_jobs is not None:
        p = mp.Pool(n_jobs)
    else:
        p = mp.Pool()
    results = np.asarray(
        p.map(
            score_point,
            [(x, kdes[i]) for x, i in zip(samples, preds)]
        )
    )
    p.close()
    p.join()

    return results 
Example #3
Source File: parallel.py    From CAMISIM with Apache License 2.0 6 votes vote down vote up
def add_tasks(thread_task_list, identifier=None):
		"""
			Execute several functions (threads, processes) in parallel.

			@type thread_task_list: list of TaskThread

			@return: a list of respective return values
		"""
		assert isinstance(thread_task_list, list)

		if identifier is None:
			identifier = len(AsyncParallel.task_handler_list)

		# creates a pool of workers, add all tasks to the pool
		if AsyncParallel.pool is None:
			AsyncParallel.pool = mp.Pool(processes=AsyncParallel.max_processes)

		if identifier not in AsyncParallel.task_handler_list:
			AsyncParallel.task_handler_list[identifier] = []

		for task in thread_task_list:
			assert isinstance(task, TaskThread)
			AsyncParallel.task_handler_list[identifier].append(AsyncParallel.pool.apply_async(task.fun, task.args))
		return identifier 
Example #4
Source File: get_recipes.py    From recipe-box with MIT License 6 votes vote down vote up
def scrape_recipe_box(scraper, site_str, page_iter, status_interval=50):

    if args.append:
        recipes = quick_load(site_str)
    else:
        recipes = {}
    start = time.time()
    if args.multi:
        pool = Pool(cpu_count() * 2)
        results = pool.map(scraper, page_iter)
        for r in results:
            recipes.update(r)
    else:
        for i in page_iter:
            recipes.update(scraper(i))
            if i % status_interval == 0:
                print('Scraping page {} of {}'.format(i, max(page_iter)))
                quick_save(site_str, recipes)
            time.sleep(args.sleep)

    print('Scraped {} recipes from {} in {:.0f} minutes'.format(
        len(recipes), site_str, (time.time() - start) / 60))
    quick_save(site_str, recipes) 
Example #5
Source File: separator.py    From spleeter with MIT License 6 votes vote down vote up
def __init__(self, params_descriptor, MWF=False, stft_backend="auto", multiprocess=True):
        """ Default constructor.

        :param params_descriptor: Descriptor for TF params to be used.
        :param MWF: (Optional) True if MWF should be used, False otherwise.
        """

        self._params = load_configuration(params_descriptor)
        self._sample_rate = self._params['sample_rate']
        self._MWF = MWF
        self._tf_graph = tf.Graph()
        self._predictor = None
        self._input_provider = None
        self._builder = None
        self._features = None
        self._session = None
        self._pool = Pool() if multiprocess else None
        self._tasks = []
        self._params["stft_backend"] = get_backend(stft_backend) 
Example #6
Source File: translate.py    From flores with Creative Commons Attribution Share Alike 4.0 International 6 votes vote down vote up
def translate_files_local(args, cmds):
    m = mp.Manager()
    gpu_queue = m.Queue()
    for i in args.cuda_visible_device_ids:
        gpu_queue.put(i)
    with mp.Pool(processes=len(args.cuda_visible_device_ids)) as pool:
        for _ in tqdm.tqdm(pool.imap_unordered(translate, [(gpu_queue, cmd) for cmd in cmds]), total=len(cmds)):
            pass 
Example #7
Source File: get_twitter_user_data_parallel.py    From twitterscraper with MIT License 6 votes vote down vote up
def main(args):
    users = []

    for arg in args:
        users.append(arg)

    pool_size = len(users)
    if pool_size < 8:
        pool = Pool(pool_size)
    else:
        pool = Pool(8)

    for user in pool.map(get_user_info, users):
        twitter_user_info.append(user)

    cols = ['id', 'fullname', 'date_joined', 'location', 'blog', 'num_tweets', 'following', 'followers', 'likes',
            'lists']
    data_frame = pd.DataFrame(twitter_user_info, index=users, columns=cols)
    data_frame.index.name = "Users"
    data_frame.sort_values(by="followers", ascending=False, inplace=True, kind='quicksort', na_position='last')
    display(data_frame) 
Example #8
Source File: get_twitter_user_data.py    From twitterscraper with MIT License 6 votes vote down vote up
def main():
    start = time.time()
    users = ['Carlos_F_Enguix', 'mmtung', 'dremio', 'MongoDB', 'JenWike', 'timberners_lee','ataspinar2', 'realDonaldTrump',
            'BarackObama', 'elonmusk', 'BillGates', 'BillClinton','katyperry','KimKardashian']

    pool = Pool(8)    
    for user in pool.map(get_user_info,users):
        twitter_user_info.append(user)

    cols=['id','fullname','date_joined','location','blog', 'num_tweets','following','followers','likes','lists']
    data_frame = pd.DataFrame(twitter_user_info, index=users, columns=cols)
    data_frame.index.name = "Users"
    data_frame.sort_values(by="followers", ascending=False, inplace=True, kind='quicksort', na_position='last')
    elapsed = time.time() - start
    print(f"Elapsed time: {elapsed}")
    display(data_frame) 
Example #9
Source File: utils.py    From PathCon with MIT License 6 votes vote down vote up
def get_params_for_mp(n_triples):
    n_cores = mp.cpu_count()
    pool = mp.Pool(n_cores)
    avg = n_triples // n_cores

    range_list = []
    start = 0
    for i in range(n_cores):
        num = avg + 1 if i < n_triples - avg * n_cores else avg
        range_list.append([start, start + num])
        start += num

    return n_cores, pool, range_list


# input: [(h1, {t1, t2 ...}), (h2, {t3 ...}), ...]
# output: {(h1, t1): paths, (h1, t2): paths, (h2, t3): paths, ...} 
Example #10
Source File: estimator.py    From EDeN with MIT License 6 votes vote down vote up
def model_selection(self, graphs, targets,
                        n_iter=30, subsample_size=None):
        """model_selection_randomized."""
        param_distr = {"r": list(range(1, 5)), "d": list(range(0, 10))}
        if subsample_size:
            graphs, targets = subsample(
                graphs, targets, subsample_size=subsample_size)

        pool = mp.Pool()
        scores = pool.map(_eval, [(graphs, targets, param_distr)] * n_iter)
        pool.close()
        pool.join()

        best_params = max(scores)[1]
        logger.debug("Best parameters:\n%s" % (best_params))
        self = EdenEstimator(**best_params)
        return self 
Example #11
Source File: generate_toys.py    From medicaldetectiontoolkit with Apache License 2.0 6 votes vote down vote up
def generate_experiment(exp_name, n_train_images, n_test_images, mode, class_diameters=(20, 20)):

    train_dir = os.path.join(cf.root_dir, exp_name, 'train')
    test_dir = os.path.join(cf.root_dir, exp_name, 'test')
    if not os.path.exists(train_dir):
        os.makedirs(train_dir)
    if not os.path.exists(test_dir):
        os.makedirs(test_dir)

    # enforced distance between object center and image edge.
    foreground_margin = np.max(class_diameters) // 2

    info = []
    info += [[train_dir, six, foreground_margin, class_diameters, mode] for six in range(n_train_images)]
    info += [[test_dir, six, foreground_margin, class_diameters, mode] for six in range(n_test_images)]

    print('starting creating {} images'.format(len(info)))
    pool = Pool(processes=12)
    pool.map(multi_processing_create_image, info, chunksize=1)
    pool.close()
    pool.join()

    aggregate_meta_info(train_dir)
    aggregate_meta_info(test_dir) 
Example #12
Source File: evolve-feedforward-parallel.py    From neat-python with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def eval_genome(genome, config):
    """
    This function will be run in parallel by ParallelEvaluator.  It takes two
    arguments (a single genome and the genome class configuration data) and
    should return one float (that genome's fitness).

    Note that this function needs to be in module scope for multiprocessing.Pool
    (which is what ParallelEvaluator uses) to find it.  Because of this, make
    sure you check for __main__ before executing any code (as we do here in the
    last few lines in the file), otherwise you'll have made a fork bomb
    instead of a neuroevolution demo. :)
    """

    net = neat.nn.FeedForwardNetwork.create(genome, config)
    error = 4.0
    for xi, xo in zip(xor_inputs, xor_outputs):
        output = net.activate(xi)
        error -= (output[0] - xo[0]) ** 2
    return error 
Example #13
Source File: securityheader.py    From securityheaders with Apache License 2.0 6 votes vote down vote up
def check_headers_parallel(self, urls, options=None, callback=None):
        if not options:
            options= self.options.result()

        if Pool:
            results = []
            freeze_support()
            pool = Pool(processes=100)
            for url in urls:
                result = pool.apply_async(self.check_headers, args=(url, options.get('redirects'), options), callback=callback)
                results.append(result)
            pool.close()
            pool.join() 
            return results
        else:
            raise Exception('no parallelism supported') 
Example #14
Source File: t2t_datagen.py    From fine-lm with MIT License 6 votes vote down vote up
def generate_data_for_registered_problem(problem_name):
  """Generate data for a registered problem."""
  tf.logging.info("Generating data for %s.", problem_name)
  if FLAGS.num_shards:
    raise ValueError("--num_shards should not be set for registered Problem.")
  problem = registry.problem(problem_name)
  task_id = None if FLAGS.task_id < 0 else FLAGS.task_id
  data_dir = os.path.expanduser(FLAGS.data_dir)
  tmp_dir = os.path.expanduser(FLAGS.tmp_dir)
  if task_id is None and problem.multiprocess_generate:
    if FLAGS.task_id_start != -1:
      assert FLAGS.task_id_end != -1
      task_id_start = FLAGS.task_id_start
      task_id_end = FLAGS.task_id_end
    else:
      task_id_start = 0
      task_id_end = problem.num_generate_tasks
    pool = multiprocessing.Pool(processes=FLAGS.num_concurrent_processes)
    problem.prepare_to_generate(data_dir, tmp_dir)
    args = [(problem_name, data_dir, tmp_dir, task_id)
            for task_id in range(task_id_start, task_id_end)]
    pool.map(generate_data_in_process, args)
  else:
    problem.generate_data(data_dir, tmp_dir, task_id) 
Example #15
Source File: oracle.py    From pwnypack with MIT License 5 votes vote down vote up
def padding_oracle_encrypt(oracle, plaintext, block_size=128, pool=None):
    """
    Encrypt plaintext using an oracle function that returns ``True`` if the
    provided ciphertext is correctly PKCS#7 padded after decryption. The
    cipher needs to operate in CBC mode.

    Args:
        oracle(callable): The oracle function. Will be called repeatedly with
            a chunk of ciphertext.
        plaintext(bytes): The plaintext data to encrypt.
        block_size(int): The cipher's block size in bits.
        pool(multiprocessing.Pool): A multiprocessing pool to use to
            parallelize the encryption. This pool is used to call the oracle
            function. Fairly heavy due to the required inter-process state
            synchronization. If ``None`` (the default), no multiprocessing
            will be used.

    Returns:
        bytes: The encrypted data.

    Raises:
        RuntimeError: Raised if the oracle behaves unpredictable.
    """

    plaintext = bytearray(plaintext)
    block_len = block_size // 8

    padding_len = block_len - (len(plaintext) % block_len)
    plaintext.extend([padding_len] * padding_len)

    ciphertext = bytearray()

    chunk = bytearray(os.urandom(block_len))
    ciphertext[0:0] = chunk

    for plain_start in range(len(plaintext) - block_len, -1, -block_len):
        plain = plaintext[plain_start:plain_start + block_len]
        chunk = ciphertext[0:0] = encrypt_block(oracle, block_len, chunk, plain, pool)

    return bytes(ciphertext) 
Example #16
Source File: test_parallel.py    From pymoo with Apache License 2.0 5 votes vote down vote up
def test_evaluation_with_multiprocessing_process_pool_starmap(self):
        X, F = self.get_data()
        with multiprocessing.Pool() as pool:
            _F = MyProblemElementwise(parallelization = ("starmap", pool.starmap)).evaluate(X)
        self.assertTrue(np.all(np.abs(_F - F) < 0.00001)) 
Example #17
Source File: process_pool.py    From misp42splunk with GNU Lesser General Public License v3.0 5 votes vote down vote up
def __init__(self, size=0, maxtasksperchild=10000):
        if size <= 0:
            size = multiprocessing.cpu_count()
        self.size = size
        self._pool = multiprocessing.Pool(processes=size,
                                          maxtasksperchild=maxtasksperchild)
        self._stopped = False 
Example #18
Source File: base.py    From errand-boy with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def run_server(self, pool_size=10, max_accepts=5000, max_child_tasks=100):
        setproctitle('errand-boy master process')

        serverconnection = self.server_get_connection()

        logger.info('Accepting connections: {}'.format(self.connection_to_string(serverconnection)))
        logger.info('pool_size: {}'.format(pool_size))
        logger.info('max_accepts: {}'.format(max_accepts))
        logger.info('max_child_tasks: {}'.format(max_child_tasks))

        pool = multiprocessing.Pool(pool_size, worker_init, tuple(), max_child_tasks)

        connections = []

        remaining_accepts = max_accepts

        if not remaining_accepts:
            remaining_accepts = True

        try:
            while remaining_accepts:
                connection = self.server_accept(serverconnection)

                logger.info('Accepted connection from: {}'.format(self.connection_to_string(connection)))

                result = pool.apply_async(worker, [self, self.server_serialize_connection(connection)])

                connection = None

                if remaining_accepts is not True:
                    remaining_accepts -= 1
        except KeyboardInterrupt:
            logger.info('Received KeyboardInterrupt')
            pool.terminate()
        except Exception as e:
            logger.exception(e)
            pool.terminate()
            raise
        finally:
            pool.close()
            pool.join() 
Example #19
Source File: evolve.py    From neat-python with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def __init__(self):
        self.pool = None if NUM_CORES < 2 else multiprocessing.Pool(NUM_CORES)
        self.test_episodes = []
        self.generation = 0

        self.min_reward = -200
        self.max_reward = 200

        self.episode_score = []
        self.episode_length = [] 
Example #20
Source File: evolve_interactive.py    From neat-python with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def __init__(self, thumb_width, thumb_height, full_width, full_height,
                 window_width, window_height, scheme, num_workers):
        """
        :param thumb_width: Width of preview image
        :param thumb_height: Height of preview image
        :param full_width: Width of full rendered image
        :param full_height: Height of full rendered image
        :param window_width: Width of the view window
        :param window_height: Height of the view window
        :param scheme: Image type to generate: mono, gray, or color
        """
        self.generation = 0
        self.thumb_width = thumb_width
        self.thumb_height = thumb_height
        self.full_width = full_width
        self.full_height = full_height

        self.window_width = window_width
        self.window_height = window_height

        assert scheme in ('mono', 'gray', 'color')
        self.scheme = scheme

        # Compute the number of thumbnails we can show in the viewer window, while
        # leaving one row to handle minor variations in the population size.
        self.num_cols = int(math.floor((window_width - 16) / (thumb_width + 4)))
        self.num_rows = int(math.floor((window_height - 16) / (thumb_height + 4)))

        self.pool = Pool(num_workers) 
Example #21
Source File: evolve_novelty.py    From neat-python with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def __init__(self, num_workers, scheme):
        self.num_workers = num_workers
        self.scheme = scheme
        self.pool = Pool(num_workers)
        self.archive = []
        self.out_index = 1 
Example #22
Source File: rouge_tensor.py    From TransferRL with MIT License 5 votes vote down vote up
def rouge_l_sentence_level(eval_sentences, ref_sentences):
  """Computes ROUGE-L (sentence level) of two collections of sentences.

  Source: https://www.microsoft.com/en-us/research/publication/
  rouge-a-package-for-automatic-evaluation-of-summaries/

  Calculated according to:
  R_lcs = LCS(X,Y)/m
  P_lcs = LCS(X,Y)/n
  F_lcs = ((1 + beta^2)*R_lcs*P_lcs) / (R_lcs + (beta^2) * P_lcs)

  where:
  X = reference summary
  Y = Candidate summary
  m = length of reference summary
  n = length of candidate summary

  Args:
    eval_sentences: The sentences that have been picked by the summarizer
    ref_sentences: The sentences from the reference set

  Returns:
    A float: F_lcs
  """
  '''
  _t = time.time()
  pool = Pool(cpu_count())
  params = [(eval_sentence, ref_sentence)  for eval_sentence, ref_sentence in zip(eval_sentences, ref_sentences)]
  f1_scores = list(pool.imap(_run, params))
  pool.close()
  '''
  f1_scores = []
  for eval_sentence, ref_sentence in zip(eval_sentences, ref_sentences):
    m = len(ref_sentence)
    n = len(eval_sentence)
    lcs = _len_lcs(eval_sentence, ref_sentence)
    f1_scores.append(_f_lcs(lcs, m, n))
  return np.array(f1_scores).astype(np.float32) 
Example #23
Source File: parallel.py    From neat-python with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def __init__(self, num_workers, eval_function, timeout=None):
        """
        eval_function should take one argument, a tuple of
        (genome object, config object), and return
        a single float (the genome's fitness).
        """
        self.num_workers = num_workers
        self.eval_function = eval_function
        self.timeout = timeout
        self.pool = Pool(num_workers) 
Example #24
Source File: text.py    From TaskBot with GNU General Public License v3.0 5 votes vote down vote up
def cut(x, n_job=None, join=False):
    """ 分词功能,接收一个字符串,返回分词结果

    :param x: list of sentence or sentence
    :param n_job:
    :param join: return " ".join() result 如果为True,将分词结果合成一个字符串;否则,每个分词为一个字符串
    :return:
    """
    assert isinstance(x, str) or isinstance(x, list)
    if isinstance(x, str):
        # logger.info("String input, user 1 cpu core")
        if join:
            return _cut2str(x)
        else:
            return _cut2list(x)
    if n_job:
        n_job = min(CPU, n_job)
        # logger.info("%d Sentences input, Use %d cpu core " % (len(x), n_job))
        pool = mp.Pool(n_job)
        if join:
            rst = pool.map(_cut2str, x)
        else:
            rst = pool.map(_cut2list, x)
        pool.close()
        pool.join()
    else:
        if join:
            rst = [_cut2str(i) for i in x]
        else:
            rst = [_cut2list(i) for i in x]
    return rst 
Example #25
Source File: extractor.py    From firmanal with MIT License 5 votes vote down vote up
def __init__(self, indir, outdir=None, rootfs=True, kernel=True,
                 numproc=True, server=None, brand=None):
        # Input firmware update file or directory
        self._input = os.path.abspath(indir)
        # Output firmware directory
        self.output_dir = os.path.abspath(outdir) if outdir else None

        # Whether to attempt to extract kernel
        self.do_kernel = kernel

        # Whether to attempt to extract root filesystem
        self.do_rootfs = rootfs

        # Brand of the firmware
        self.brand = brand

        # Hostname of SQL server
        self.database = server

        # Worker pool.
        self._pool = multiprocessing.Pool() if numproc else None

        # Set containing MD5 checksums of visited items
        self.visited = set()

        # List containing tagged items to extract as 2-tuple: (tag [e.g. MD5],
        # path)
        self._list = list() 
Example #26
Source File: datasets.py    From argus-freesound with MIT License 5 votes vote down vote up
def get_folds_data(corrections=None):
    print("Start generate folds data")
    print("Audio config", get_audio_config())
    train_folds_df = pd.read_csv(config.train_folds_path)

    audio_paths_lst = []
    targets_lst = []
    folds_lst = []
    for i, row in train_folds_df.iterrows():
        labels = row.labels

        if corrections is not None:
            if row.fname in corrections:
                action = corrections[row.fname]
                if action == 'remove':
                    print(f"Skip {row.fname}")
                    continue
                else:
                    print(f"Replace labels {row.fname} from {labels} to {action}")
                    labels = action

        folds_lst.append(row.fold)
        audio_paths_lst.append(row.file_path)
        target = torch.zeros(len(config.classes))
        for label in labels.split(','):
            target[config.class2index[label]] = 1.
        targets_lst.append(target)

    with mp.Pool(N_WORKERS) as pool:
        images_lst = pool.map(read_as_melspectrogram, audio_paths_lst)

    return images_lst, targets_lst, folds_lst 
Example #27
Source File: import_train_images.py    From L3C-PyTorch with GNU General Public License v3.0 5 votes vote down vote up
def process_all_in(self, input_dir):
        images_dl = iter_images(input_dir)  # generator of paths

        # files this job should compress
        files_of_job = [p for _, p in job_enumerate(images_dl)]
        # files that were compressed already by somebody (i.e. this job earlier)
        processed_already = self.images_cleaned | self.images_discarded
        # resulting files to be compressed
        files_of_job = [p for p in files_of_job if get_fn(p) not in processed_already]

        N = len(files_of_job)
        if N == 0:
            print('Everything processed / nothing to process.')
            return

        num_process = 2 if NUM_TASKS > 1 else _NUM_PROCESSES
        print(f'Processing {N} images using {num_process} processes in {NUM_TASKS} tasks...')

        start = time.time()
        predicted_time = None
        with multiprocessing.Pool(processes=num_process) as pool:
            for i, clean in enumerate(pool.imap_unordered(self.process, files_of_job)):
                if i > 0 and i % 100 == 0:
                    time_per_img = (time.time() - start) / (i + 1)
                    time_remaining = time_per_img * (N - i)
                    if not predicted_time:
                        predicted_time = time_remaining
                    print(f'\r{time_per_img:.2e} s/img | '
                          f'{i / N * 100:.1f}% | '
                          f'{time_remaining / 60:.1f} min remaining', end='', flush=True) 
Example #28
Source File: alignproc.py    From svviz with MIT License 5 votes vote down vote up
def multimap(namesToReferences, seqs):
    if not hasattr(multimap, "pool"):
        multimap.pool = multiprocessing.Pool(processes=misc.cpu_count_physical())

    pool = multimap.pool

    results = {}
    results = dict(pool.map_async(remaps, [(namesToReferences, seq) for seq in seqs]).get(999999))
    # results = dict(map(remaps, [(namesToReferences, seq) for seq in seqs]))

    return results 
Example #29
Source File: dataloader_utils.py    From medicaldetectiontoolkit with Apache License 2.0 5 votes vote down vote up
def unpack_dataset(folder, threads=8):
    case_identifiers = get_case_identifiers(folder)
    p = Pool(threads)
    npz_files = [os.path.join(folder, i + ".npz") for i in case_identifiers]
    p.map(convert_to_npy, npz_files)
    p.close()
    p.join() 
Example #30
Source File: pack_dataset.py    From medicaldetectiontoolkit with Apache License 2.0 5 votes vote down vote up
def unpack_dataset(folder, threads=8):
    case_identifiers = get_case_identifiers(folder)
    p = Pool(threads)
    npz_files = [os.path.join(folder, i + ".npz") for i in case_identifiers]
    p.map(convert_to_npy, npz_files)
    p.close()
    p.join()