Python multiprocessing.pool.Pool() Examples

The following are 30 code examples of multiprocessing.pool.Pool(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module multiprocessing.pool , or try the search function .
Example #1
Source File: generate.py    From post--memorization-in-rnns with MIT License 7 votes vote down vote up
def save_tfrecord(filename, dataset, verbose=False):
    observations = len(dataset['length'])

    serialized = []
    with Pool(processes=4) as pool:
        for serialized_string in tqdm(pool.imap(
            tfrecord_serializer,
            zip(dataset['length'], dataset['source'], dataset['target']),
            chunksize=10
        ), total=observations, disable=not verbose):
            serialized.append(serialized_string)

    # Save seriealized dataset
    writer = tf.python_io.TFRecordWriter(
        filename,
        options=tf.python_io.TFRecordOptions(
            tf.python_io.TFRecordCompressionType.ZLIB
        )
    )

    for serialized_string in tqdm(serialized, disable=not verbose):
        writer.write(serialized_string)

    writer.close() 
Example #2
Source File: toutiao_spider.py    From capturer with MIT License 6 votes vote down vote up
def main():
    print('\nWelcome here to get pictures from www.toutiao.com!')
    keyword = input('Please input your search keywords > ')
    count = None
    while count == None:
        number_str = input(
            'Please input count of picture collection that you want(Divisible by 20 ) > ')
        try:
            count = int(number_str)
        except ValueError:
            print('Please input a valid number!')

    if count > 0:
        print('Getting %s pictures...' % keyword)
        page_num = count // 20 + (0 if count % 20 == 0 else 1)
        offset_list = [x * 20 for x in range(0, page_num)]
        pool = Pool()
        partial_getter = partial(get_images_of, keyword=keyword)
        pool.map(partial_getter, offset_list)
        pool.close()
        pool.join()
    else:
        print('Get Cancel!') 
Example #3
Source File: cpdb_statistical_analysis_helper.py    From cellphonedb with MIT License 6 votes vote down vote up
def shuffled_analysis(iterations: int, meta: pd.DataFrame, counts: pd.DataFrame, interactions: pd.DataFrame,
                      cluster_interactions: list, base_result: pd.DataFrame, threads: int, separator: str,
                      suffixes: tuple = ('_1', '_2'), counts_data: str = 'ensembl') -> list:
    """
    Shuffles meta and calculates the means for each and saves it in a list.

    Runs it in a multiple threads to run it faster
    """
    core_logger.info('Running Statistical Analysis')
    with Pool(processes=threads) as pool:
        statistical_analysis_thread = partial(_statistical_analysis,
                                              base_result,
                                              cluster_interactions,
                                              counts,
                                              interactions,
                                              meta,
                                              separator,
                                              suffixes,
                                              counts_data=counts_data
                                              )
        results = pool.map(statistical_analysis_thread, range(iterations))

    return results 
Example #4
Source File: autocomplete.py    From post--memorization-in-rnns with MIT License 6 votes vote down vote up
def save_tfrecord(filename, dataset, verbose=False):
    observations = len(dataset['length'])

    serialized = []
    with Pool(processes=4) as pool:
        for serialized_string in tqdm(pool.imap(
            tfrecord_serializer,
            zip(dataset['length'], dataset['source'], dataset['target']),
            chunksize=10
        ), total=observations, disable=not verbose):
            serialized.append(serialized_string)

    # Save seriealized dataset
    writer = tf.python_io.TFRecordWriter(
        filename,
        options=tf.python_io.TFRecordOptions(
            tf.python_io.TFRecordCompressionType.ZLIB
        )
    )

    for serialized_string in tqdm(serialized, disable=not verbose):
        writer.write(serialized_string)

    writer.close() 
Example #5
Source File: level_iterator.py    From safelife with Apache License 2.0 6 votes vote down vote up
def fill_queue(self):
        if self.results is None:
            self.results = queue.deque(maxlen=self.max_queue)
        if self.num_workers > 0:
            if self.pool is None:
                self.pool = Pool(processes=self.num_workers)

        while len(self.results) < self.max_queue:
            if self.distinct_levels is not None and self.idx >= self.distinct_levels:
                break
            elif not self.repeat_levels and self.idx >= len(self.file_data):
                break
            else:
                data = self.get_next_parameters()
                if data is None:
                    break
            self.idx += 1
            kwargs = {'seed': self._seed.spawn(1)[0]}
            if self.num_workers > 0:
                result = self.pool.apply_async(_game_from_data, data, kwargs)
            else:
                result = _game_from_data(*data, **kwargs)
            self.results.append((data, result)) 
Example #6
Source File: F1_running_score.py    From openseg.pytorch with MIT License 6 votes vote down vote up
def __init__(self, configer=None, num_classes=None, boundary_threshold=0.00088, num_proc=15):

        assert configer is not None or num_classes is not None
        self.configer = configer

        if configer is not None:
            self.n_classes = self.configer.get('data', 'num_classes')
        else:
            self.n_classes = num_classes

        self.ignore_index = -1
        self.boundary_threshold = boundary_threshold
        self.pool = Pool(processes=num_proc)
        self.num_proc = num_proc

        self._Fpc = 0
        self._Fc = 0
        self.seg_map_cache = []
        self.gt_map_cache = [] 
Example #7
Source File: category_vector.py    From talkingdata-adtracking-fraud-detection with MIT License 6 votes vote down vote up
def create_features_from_path(self, train_path: str, test_path: str) -> Tuple[pd.DataFrame, pd.DataFrame]:
        column_pairs = self.get_column_pairs()

        col1s = []
        col2s = []
        latent_vectors = []
        gc.collect()
        with Pool(4) as p:
            for col1, col2, latent_vector in p.map(
                    partial(self.compute_latent_vectors, train_path=train_path, test_path=test_path), column_pairs):
                col1s.append(col1)
                col2s.append(col2)
                latent_vectors.append(latent_vector.astype(np.float32))
        gc.collect()
        return self.get_feature(train_path, col1s, col2s, latent_vectors), \
               self.get_feature(test_path, col1s, col2s, latent_vectors) 
Example #8
Source File: engine.py    From NoXss with MIT License 6 votes vote down vote up
def verify_async(case_list,coroutine):
        """
        Verify used gevent lib
        :param case_list:
        :param coroutine:
        :return:
        """
        from gevent import monkey
        monkey.patch_all()
        result = []
        geventPool = pool.Pool(coroutine)
        tasks = [geventPool.spawn(Verify.request_and_verify, case) for case in case_list]
        gevent.joinall(tasks)
        for i in tasks:
            if i.value is not None:
                result.append(i.value)
        print_info('Total Verify-Case is: %s, %s error happened.' % (len(case_list), Verify.ERROR_COUNT))
        return result 
Example #9
Source File: engine.py    From NoXss with MIT License 6 votes vote down vote up
def deduplicate(self, url_list):
        print 'Start to deduplicate for all urls.'
        filtered_path = self.file + '.filtered'
        if os.path.exists(filtered_path):
            print '%s has been filtered as %s.' % (self.file, filtered_path)
            with open(filtered_path)as f:
                filtered = f.read().split('\n')
                return filtered
        filtered = []
        # result = map(filter, url_list)
        from multiprocessing import cpu_count
        from multiprocessing.pool import Pool
        p=Pool(cpu_count())
        result=p.map(url_filter,url_list)
        for i in result:
            if isinstance(i, str):
                filtered.append(i)
        with open(filtered_path, 'w') as f:
            f.write('\n'.join(filtered))
        print 'Saved filtered urls to %s.' % filtered_path
        return filtered 
Example #10
Source File: generate_questions.py    From rc-data with Apache License 2.0 6 votes vote down vote up
def GenerateMode(corpus, context_token_limit):
  for dataset in datasets:
    print 'Generating questions for the %s set:' % dataset

    urls_filename = '%s/wayback_%s_urls.txt' % (corpus, dataset)
    urls = ReadUrls(urls_filename)

    p = Pool()
    question_context_lists = p.imap_unordered(
        GenerateMapper, izip(urls, repeat(corpus), repeat(context_token_limit)))

    progress_bar = ProgressBar(len(urls))
    for question_context_list in question_context_lists:
      if question_context_list:
        for question_context in question_context_list:
          WriteQuestionContext(question_context, corpus, dataset)

      progress_bar.Increment() 
Example #11
Source File: download_wheels.py    From piqueserver with GNU General Public License v3.0 6 votes vote down vote up
def main():
    parser = argparse.ArgumentParser(
        description="Downloads wheels from AppVeyor")
    parser.add_argument("--buildid", "-id", type=int,
                        default=None, help='AppVeyor build id')
    parser.add_argument("--pool", "-p", type=int, default=6,
                        help="Multiprocess pool size")
    parser.add_argument("--dir", "-d", type=check_dir, default=getcwd(),
                        help='Directory to download the files into.')
    options = parser.parse_args()

    download_urls = []
    for job in get_jobs(options.buildid):
        download_urls += get_artifacts(job)
    # Download them in parallel
    pool = Pool(options.pool)
    for download_url in download_urls:
        pool.apply_async(download_file, args=(download_url, options.dir))
    pool.close()
    pool.join()
    print("Done") 
Example #12
Source File: download.py    From kaggle-hpa with BSD 2-Clause "Simplified" License 6 votes vote down vote up
def main():
    # Parameters
    process_num = 24
    image_size = (512, 512)
    url = 'http://v18.proteinatlas.org/images/'
    csv_path =  "data/HPAv18RBGY_wodpl.csv"
    save_dir = "data/raw/external"

    os.makedirs(save_dir, exist_ok=True)

    print('Parent process %s.' % os.getpid())
    img_list = list(pd.read_csv(csv_path)['Id'])
    img_splits = np.array_split(img_list, process_num)
    assert sum([len(v) for v in img_splits]) == len(img_list)
    p = Pool(process_num)
    for i, split in enumerate(img_splits):
        p.apply_async(
            download, args=(str(i), list(split), url, save_dir, image_size)
        )
    print('Waiting for all subprocesses done...')
    p.close()
    p.join()
    print('All subprocesses done.') 
Example #13
Source File: gen_classes.py    From zenpy with GNU General Public License v3.0 6 votes vote down vote up
def process_specification_directory(glob_pattern, outfile_name, namespace, write_baseclass=True,):
    with open(os.path.join(options.out_path, outfile_name), 'w+') as out_file:
        paths = [p for p in glob.glob(os.path.join(options.spec_path, glob_pattern))]
        classes = list()

        func = functools.partial(process_file, namespace)
        with Pool() as pool:
            classes.extend(pool.map(func, paths))
        print("Formatting...")
        formatted_code = FormatCode("\n".join(sorted(classes)))[0]
        if write_baseclass:
            header = BASE_CLASS
        else:
            header = "from zenpy.lib.api_objects import BaseObject\nimport dateutil.parser"

        out_file.write("\n\n\n".join((header, formatted_code))) 
Example #14
Source File: feature_union.py    From mercari-solution with MIT License 6 votes vote down vote up
def transform(self, X):
        """Transform X separately by each transformer, concatenate results.

        Parameters
        ----------
        X : iterable or array-like, depending on transformers
            Input data to be transformed.

        Returns
        -------
        X_t : array-like or sparse matrix, shape (n_samples, sum_n_components)
            hstack of results of transformers. sum_n_components is the
            sum of n_components (output dimension) over transformers.
        """
        with Pool(self.n_jobs) as pool:
            Xs = pool.starmap(_transform_one, ((trans, weight, X[trans.steps[0][1].columns])
                                               for name, trans, weight in self._iter()))
        if not Xs:
            # All transformers are None
            return np.zeros((X.shape[0], 0))
        if any(sparse.issparse(f) for f in Xs):
            Xs = sparse.hstack(Xs).tocsr()
        else:
            Xs = np.hstack(Xs)
        return Xs 
Example #15
Source File: base_worker.py    From mycroft with MIT License 6 votes vote down vote up
def _schedule_runs_lk(self, et_pool, job):
        """ Schedule runs to execute up to max possible parallelism
        suffix '_lk' means caller must already hold lock.

        :param et_pool: A multiprocessor pool handle
        :type: Pool
        :param job: current job
        :type: WorkerJob
        """
        while (self._has_more_runs_to_schedule(job) and
               job.runs_in_flight < job.max_runs_in_flight):
            run = job.schedule_next_run()
            if run.id is None:
                raise ValueError("Unexpected end of runs")

            self.etl_helper.etl_step_started(job.msg_dict, run.id, run.step)

            log('scheduled: {0}'.format(run.id))
            et_pool.apply_async(
                run.func,
                args=run.args,
                callback=self._create_run_complete_callback(job, run.id, run.step),
            )
            job.runs_in_flight += 1 
Example #16
Source File: feature_union.py    From mercari-solution with MIT License 6 votes vote down vote up
def fit(self, X, y=None):
        """Fit all transformers using X.

        Parameters
        ----------
        X : iterable or array-like, depending on transformers
            Input data, used to fit transformers.

        y : array-like, shape (n_samples, ...), optional
            Targets for supervised learning.

        Returns
        -------
        self : FeatureUnion
            This estimator
        """
        self.transformer_list = list(self.transformer_list)
        self._validate_transformers()
        with Pool(self.n_jobs) as pool:
            transformers = pool.starmap(_fit_one_transformer,
                                        ((trans, X[trans.steps[0][1].columns], y) for _, trans, _ in self._iter()))
        self._update_transformer_list(transformers)
        return self 
Example #17
Source File: metric.py    From dsb2018_topcoders with MIT License 6 votes vote down vote up
def calculate_cell_score_selim(y_true, y_pred, num_threads=32, ids=None):
    yps = []
    for m in range(len(y_true)):
        yps.append((y_true[m].copy(), y_pred[m].copy()))
    pool = Pool(num_threads)
    results = pool.map(calculate_jaccard, yps)
    if ids:
        import pandas as pd
        s_iou = np.argsort(results)
        d = []
        for i in range(len(s_iou)):
            id = ids[s_iou[i]]
            res = results[s_iou[i]]
            d.append([id, res])
            pd.DataFrame(d, columns=["ID", "METRIC_SCORE"]).to_csv("gt_vs_oof.csv", index=False)

    return np.array(results).mean() 
Example #18
Source File: __init__.py    From Splunking-Crime with GNU Affero General Public License v3.0 5 votes vote down vote up
def Pool(processes=None, initializer=None, initargs=(), maxtasksperchild=None):
    '''
    Returns a process pool object
    '''
    from multiprocessing.pool import Pool
    return Pool(processes, initializer, initargs, maxtasksperchild) 
Example #19
Source File: metric.py    From dsb2018_topcoders with MIT License 5 votes vote down vote up
def calculate_cell_score_kaggle(y_true, y_pred, num_threads=32):
    yps = []
    for m in range(len(y_true)):
        yps.append((y_true[m].copy(), y_pred[m].copy()))
    pool = Pool(num_threads)
    results = pool.map(score_kaggle, yps)
    return np.mean(results) 
Example #20
Source File: generate_questions.py    From rc-data with Apache License 2.0 5 votes vote down vote up
def StoreMode(corpus):
  for dataset in datasets:
    print 'Storing news stories for the %s set:' % dataset
    urls_filename = '%s/wayback_%s_urls.txt' % (corpus, dataset)
    urls = ReadUrls(urls_filename)

    p = Pool()
    stories = p.imap_unordered(StoreMapper, izip(urls, repeat(corpus)))

    progress_bar = ProgressBar(len(urls))
    for story in stories:
      if story:
        WriteStory(story, corpus)

      progress_bar.Increment() 
Example #21
Source File: tasks.py    From agents-aea with Apache License 2.0 5 votes vote down vote up
def _stop_pool(self) -> None:
        """
        Stop internal task pool.

        :return: None
        """
        if not self._pool:
            logger.debug("Pool is not started!.")
            return

        self._pool = cast(Pool, self._pool)
        self._pool.terminate()
        self._pool.join()
        self._pool = None 
Example #22
Source File: tasks.py    From agents-aea with Apache License 2.0 5 votes vote down vote up
def _start_pool(self) -> None:
        """
        Start internal task pool.

        Only one pool will be created.

        :return: None
        """
        if self._pool:
            logger.debug("Pool was already started!.")
            return
        self._pool = Pool(self._nb_workers, initializer=init_worker) 
Example #23
Source File: tasks.py    From agents-aea with Apache License 2.0 5 votes vote down vote up
def enqueue_task(
        self, func: Callable, args: Sequence = (), kwds: Optional[Dict[str, Any]] = None
    ) -> int:
        """
        Enqueue a task with the executor.

        :param func: the callable instance to be enqueued
        :param args: the positional arguments to be passed to the function.
        :param kwds: the keyword arguments to be passed to the function.
        :return the task id to get the the result.
        :raises ValueError: if the task manager is not running.
        """
        with self._lock:
            if self._stopped:
                raise ValueError("Task manager not running.")

            if not self._pool and self._is_lazy_pool_start:
                self._start_pool()

            self._pool = cast(Pool, self._pool)
            task_id = self._task_enqueued_counter
            self._task_enqueued_counter += 1
            async_result = self._pool.apply_async(
                func, args=args, kwds=kwds if kwds is not None else {}
            )
            self._results_by_task_id[task_id] = async_result
            return task_id 
Example #24
Source File: tasks.py    From agents-aea with Apache License 2.0 5 votes vote down vote up
def __init__(self, nb_workers: int = 1, is_lazy_pool_start: bool = True):
        """
        Initialize the task manager.

        :param nb_workers: the number of worker processes.
        :param is_lazy_pool_start: option to postpone pool creation till the first enqueue_task called.
        """
        self._nb_workers = nb_workers
        self._is_lazy_pool_start = is_lazy_pool_start
        self._pool = None  # type: Optional[Pool]
        self._stopped = True
        self._lock = threading.Lock()

        self._task_enqueued_counter = 0
        self._results_by_task_id = {}  # type: Dict[int, Any] 
Example #25
Source File: reposcan.py    From vmaas with GNU General Public License v2.0 5 votes vote down vote up
def cancel(cls):
        """Terminate the process pool."""
        with disabled_signals():
            cls.workers.terminate()
            cls.workers.join()
            cls.workers = Pool(1)
            cls.finish() 
Example #26
Source File: tilt_angle_estimation.py    From aitom with GNU General Public License v3.0 5 votes vote down vote up
def scan(tilt_angle_scan_range, v_abs, n_proc=0, n_chunk=20):
    from multiprocessing.pool import Pool
    pool = Pool(processes=n_proc)
    pool_results = []

    tasks = []
    for ang1, ang2, tilt_axis, light_axis in itertools.product(
            range(-tilt_angle_scan_range[1], -tilt_angle_scan_range[0] + 1),
            range(tilt_angle_scan_range[0], tilt_angle_scan_range[1] + 1), range(3), range(3)):
        if tilt_axis == light_axis: continue
        tasks.append({'ang1': ang1, 'ang2': ang2, 'tilt_axis': tilt_axis, 'light_axis': light_axis})

    while tasks:
        # wedge_mask_cor(v_abs=v, ops=tasks[:n_chunk])
        pool_results.append(pool.apply_async(func=wedge_mask_cor, kwds={'v_abs': v_abs, 'ops': tasks[:n_chunk]}))
        tasks = tasks[n_chunk:]

    best = None
    for re in pool_results:
        for r in re.get(9999999):
            print ('\r', r['ang1'], r['ang2'], r['tilt_axis'], r['light_axis'], r['cor'], '        ',  )# print this info so that we know the scanning is inside correct range, by looking at updated examples
            sys.stdout.flush()

            if best is None:
                best = r
                continue

            if r['cor'] > best['cor']:
                best = r

    assert best is not None
    return best 
Example #27
Source File: util.py    From aitom with GNU General Public License v3.0 5 votes vote down vote up
def align_to_templates(self, rec=None, segmentation_tg_op=None, tem_keys=None, template_wedge_cutoff=0.1, align_op=None, multiprocessing=False):
    vi = None
    if align_op['with_missing_wedge']:
        v = self.cache.get_mrc(rec['subtomogram'])
        vm = self.cache.get_mrc(rec['mask'])
    else:
        raise Exception('following options are need to be doube checked')
        if ('template' not in rec):
            rec['template'] = None
        vi = impute_vol_keys(vk=rec, ang=rec['angle'], loc=rec['loc'], tk=rec['template'], align_to_template=False, normalize=True, cache=self.cache)['vi']
    if ((segmentation_tg_op is not None) and ('template' in rec) and ('segmentation' in rec['template'])):
        v = align_to_templates__segment(rec=rec, v=v, segmentation_tg_op=segmentation_tg_op)['v']
    if multiprocessing:
        if (self.pool is None):
            self.pool = Pool()
        pool_results = [self.pool.apply_async(func=align_to_templates__pair_align, kwds={'c': c, 't_key': tem_keys[c], 'v': v, 'vm': vm, 'align_op': align_op, }) for c in tem_keys]
        align_re = {}
        for r in pool_results:
            at_re = r.get(999999)
            c = at_re['c']
            align_re[c] = at_re
            if N.isnan(align_re[c]['score']):
                if (self.logger is not None):
                    self.logger.warning('alignment failed: rec %s, template %s, error %s ', repr(rec), repr(tem_keys[c]), repr(align_re[c]['err']))
        self.pool.close()
        self.pool = None
    else:
        align_re = {}
        for c in tem_keys:
            if self.work_queue.done_tasks_contains(self.task.task_id):
                raise Exception('Duplicated task')
            align_re[c] = align_to_templates__pair_align(c=c, t_key=tem_keys[c], v=v, vm=vm, align_op=align_op)
            if N.isnan(align_re[c]['score']):
                if (self.logger is not None):
                    self.logger.warning('alignment failed: rec %s, template %s, error %s ', repr(rec), repr(tem_keys[c]), repr(align_re[c]['err']))
    return {'vol_key': rec, 'align': align_re, } 
Example #28
Source File: util.py    From aitom with GNU General Public License v3.0 5 votes vote down vote up
def template_segmentation(self, tk, op, multiprocessing=False):
    if multiprocessing:
        if (self.pool is None):
            self.pool = Pool()
        pool_results = [self.pool.apply_async(func=template_segmentation__single, kwds={'c': c, 'tk': tk[c], 'op': op, }) for c in tk]
        for r in pool_results:
            r = r.get(999999)
            tk[r['c']] = r['tk']
        self.pool.close()
        self.pool = None
    else:
        for c in tk:
            r = template_segmentation__single(c=c, tk=copy.deepcopy(tk[c]), op=copy.deepcopy(op))
            tk[r['c']] = r['tk'] 
Example #29
Source File: multiprocessing.py    From python-pool-performance with MIT License 5 votes vote down vote up
def init_pool(self, worker_count):
        return Pool(worker_count) 
Example #30
Source File: dataset_utils.py    From ACDC2017 with Apache License 2.0 5 votes vote down vote up
def run_preprocessing(folder="/media/fabian/My Book/datasets/ACDC/training/",
                      folder_out = "/media/fabian/DeepLearningData/datasets/ACDC_forReal_orig_Z/", keep_z_spacing=True):
    patient_info = generate_patient_info(folder)

    if not os.path.isdir(folder_out):
        os.mkdir(folder_out)
    with open(os.path.join(folder_out, "patient_info.pkl"), 'w') as f:
        cPickle.dump(patient_info, f)

    # beware of z spacing!!! see process_patient for more info!
    ids = range(101)
    p = pool.Pool(8)
    p.map(process_patient, zip(ids, [patient_info]*101, [folder]*101, [folder_out]*101, [keep_z_spacing]*101))
    p.close()
    p.join()