Python multiprocessing.dummy() Examples

The following are 10 code examples of multiprocessing.dummy(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module multiprocessing , or try the search function

Example #1

Source File: api.py From pscheduler with Apache License 2.0

6 votes

def api_ping_list(hosts, bind=None, timeout=None, threads=10):
    """
    Ping a list of hosts and return a list of their statuses.
    """

    if len(hosts) == 0:
        return {}

    # Work around a bug in 2.6
    # TODO: Get rid of this when 2.6 is no longer in the picture.
    if not hasattr(threading.current_thread(), "_children"):
        threading.current_thread()._children = weakref.WeakKeyDictionary()

    pool = multiprocessing.dummy.Pool(processes=min(len(hosts), threads))

    pool_args = [(host, timeout) for host in hosts]
    result = {}

    def ping_one(arg):
        host, timeout = arg
        up, _ = api_ping(host, bind=bind, timeout=timeout)
        return (host, up)

    for host, state in pool.imap(
            ping_one,
            pool_args,
            chunksize=1):
        result[host] = state
    pool.close()
    return result

Example #2

Source File: DataLoader.py From mxbox with BSD 3-Clause "New" or "Revised" License

5 votes

def __init__(self, dataset, feedin_shape, collate_fn=default_collate, threads=1, shuffle=False):
        super(DataLoader, self).__init__()

        self.dataset = dataset
        self.threads = threads
        self.collate_fn = collate_fn(feedin_shape)
        # self.collate_fn = self.default_collate_fn

        # shape related variables

        self.data_shapes = feedin_shape['data']
        self.label_shapes = feedin_shape['label']
        self.batch_size = feedin_shape['batch_size']

        # loader related variables
        self.current = 0
        self.total = len(self.dataset)
        self.shuflle = shuffle
        self.map_index = list(range(self.total))

        # prepare for loading
        self.get_batch = self.get_batch_single_thread
        if self.threads > 1:  # multi process read
            from multiprocessing.dummy import Pool as ThreadPool
            # self.pool = multiprocessing.Pool(self.threads)
            self.pool = ThreadPool(self.threads)
            self.get_batch = self.get_batch_multi_thread

        self.reset()

Example #3

Source File: msearch_daemon.py From search-MjoLniR with MIT License

5 votes

def consume(self, records: Iterable[Mapping]) -> None:
        def work_fn() -> None:
            self._handle_records(iter_queue(self.work_queue))

        # Use the dummy pool since these workers will primarily wait on elasticsearch
        worker_pool = multiprocessing.dummy.Pool(self.n_workers, work_fn)
        try:
            for record in records:
                if 'complete' in record:
                    # This is handled directly, rather than queued, because the
                    # consumer guarantees the offset won't be commited until the
                    # next record is consumed. By not consuming any more records
                    # we guarantee at least once processing of these sigils.
                    self._reflect_end_run(record)
                else:
                    self.work_queue.put(record)
        except KeyboardInterrupt:
            # Simply exit the work loop, let everything clean up as expected.
            pass
        finally:
            worker_pool.close()
            for i in range(self.n_workers):
                self.work_queue.put(None)
            worker_pool.join()

        # It is possible, if some workers have errors, for the queue to not be
        # completely emptied. Make sure it gets finished
        if self.work_queue.qsize() > 0:
            log.warning('Work queue not completely drained on shut down. Draining')
            # We call repeatedly because the None values exit the iterator
            while self.work_queue.qsize() > 0:
                work_fn()

Example #4

Source File: roi_extractor.py From lightnet with MIT License

5 votes

def main():
    global inference_lock
    from multiprocessing.dummy import Pool as ThreadPool
    import multiprocessing

    category_folders = glob.glob('%s/*' % (args.images))

    inference_lock = multiprocessing.Lock()
    cpu_n = multiprocessing.cpu_count()
    pool = ThreadPool(cpu_n)
    _ = pool.map(process, category_folders)

Example #5

Source File: psdns.py From pscheduler with Apache License 2.0

5 votes

def dns_bulk_resolve(candidates, reverse=False, ip_version=None, threads=50):
    """
    Resolve a list of host names to IPs or, if reverse is true, IPs to
    host names.  Return a map of each result keyed to its candidate.

    WARNING: This function will create a pool of up to 'threads'
    threads.
    """

    # This is based loosely on http://stackoverflow.com/a/34377198

    if reverse and ip_version is not None:
        raise ValueError("Unable to force IP version when reverse-resolving")

    if ip_version is None:
        ip_version = 4
    __check_ip_version__(ip_version)

    result = {}

    if len(candidates) == 0:
        return result

    # Work around a bug in 2.6
    # TODO: Get rid of this when 2.6 is no longer in the picture.
    if not hasattr(threading.current_thread(), "_children"):
        threading.current_thread()._children = weakref.WeakKeyDictionary()

    pool = multiprocessing.dummy.Pool(
        processes=min(len(candidates), threads) )

    candidate_args = [ (candidate, ip_version) for candidate in candidates ]

    for ip, name in pool.imap(
        __reverser__ if reverse else __forwarder__,
        candidate_args,
        chunksize=1):
        result[ip] = name
    pool.close()
    return result

Example #6

Source File: disassembler.py From grap with MIT License

5 votes

def timeout_worker(*arg):
    # One thread to process this file, with a timeout
    p = multiprocessing.dummy.Pool(1)
    res = p.apply_async(disas_worker, arg)
    try:
        out = res.get(timeout=arg[0][-1])
        p.close()
    except multiprocessing.TimeoutError:
        print("WARNING: Disassembly timeout for", arg[0][0])
        p.terminate()
        p.close()
        out = None

    return out

Example #7

Source File: multi.py From DeepSea with GNU General Public License v3.0

5 votes

def _all(func, hosts):
    '''
    Internal function that allow function to perform in all hosts
    '''
    all_instances = []
    # threads should likely scale with cores or interfaces
    cpus = multiprocessing.cpu_count()
    threads = 4 * cpus
    log.debug('multi._all cpus count={}, thread count={}'.format(cpus, threads))
    pool = multiprocessing.dummy.Pool(threads)
    for instance in pool.map(func, hosts):
        all_instances.append(instance)

    return all_instances

Example #8

Source File: test_multiprocessing.py From ironpython2 with Apache License 2.0

4 votes

def test_main(run=None):
    if sys.platform.startswith("linux"):
        try:
            lock = multiprocessing.RLock()
        except OSError:
            raise unittest.SkipTest("OSError raises on RLock creation, see issue 3111!")

    check_enough_semaphores()

    if run is None:
        from test.support import run_unittest as run

    util.get_temp_dir()     # creates temp directory for use by all processes

    multiprocessing.get_logger().setLevel(LOG_LEVEL)

    ProcessesMixin.pool = multiprocessing.Pool(4)
    ThreadsMixin.pool = multiprocessing.dummy.Pool(4)
    ManagerMixin.manager.__init__()
    ManagerMixin.manager.start()
    ManagerMixin.pool = ManagerMixin.manager.Pool(4)

    testcases = (
        sorted(testcases_processes.values(), key=lambda tc:tc.__name__) +
        sorted(testcases_threads.values(), key=lambda tc:tc.__name__) +
        sorted(testcases_manager.values(), key=lambda tc:tc.__name__) +
        testcases_other
        )

    loadTestsFromTestCase = unittest.defaultTestLoader.loadTestsFromTestCase
    suite = unittest.TestSuite(loadTestsFromTestCase(tc) for tc in testcases)
    # (ncoghlan): Whether or not sys.exc_clear is executed by the threading
    # module during these tests is at least platform dependent and possibly
    # non-deterministic on any given platform. So we don't mind if the listed
    # warnings aren't actually raised.
    with support.check_py3k_warnings(
            (".+__(get|set)slice__ has been removed", DeprecationWarning),
            (r"sys.exc_clear\(\) not supported", DeprecationWarning),
            quiet=True):
        run(suite)

    ThreadsMixin.pool.terminate()
    ProcessesMixin.pool.terminate()
    ManagerMixin.pool.terminate()
    ManagerMixin.manager.shutdown()

    del ProcessesMixin.pool, ThreadsMixin.pool, ManagerMixin.pool

Example #9

Source File: file_io.py From Live-feed-object-device-identification-using-Tensorflow-and-OpenCV with Apache License 2.0

4 votes

def write_to_buffer(dataframe, buffer_path, columns, expected_size=None):
  """Write a dataframe to a binary file for a dataset to consume.

  Args:
    dataframe: The pandas dataframe to be serialized.
    buffer_path: The path where the serialized results will be written.
    columns: The dataframe columns to be serialized.
    expected_size: The size in bytes of the serialized results. This is used to
      lazily construct the buffer.

  Returns:
    The path of the buffer.
  """
  if (tf.io.gfile.exists(buffer_path) and
      tf.io.gfile.stat(buffer_path).length > 0):
    actual_size = tf.io.gfile.stat(buffer_path).length
    if expected_size == actual_size:
      return buffer_path
    tf.compat.v1.logging.warning(
        "Existing buffer {} has size {}. Expected size {}. Deleting and "
        "rebuilding buffer.".format(buffer_path, actual_size, expected_size))
    tf.io.gfile.remove(buffer_path)

  if dataframe is None:
    raise ValueError(
        "dataframe was None but a valid existing buffer was not found.")

  tf.io.gfile.makedirs(os.path.split(buffer_path)[0])

  tf.compat.v1.logging.info("Constructing TFRecordDataset buffer: {}"
                            .format(buffer_path))

  count = 0
  pool = multiprocessing.dummy.Pool(multiprocessing.cpu_count())
  try:
    with tf.io.TFRecordWriter(buffer_path) as writer:
      for df_shards in iter_shard_dataframe(df=dataframe,
                                            rows_per_core=_ROWS_PER_CORE):
        _serialize_shards(df_shards, columns, pool, writer)
        count += sum([len(s) for s in df_shards])
        tf.compat.v1.logging.info("{}/{} examples written."
                                  .format(str(count).ljust(8), len(dataframe)))
  finally:
    pool.terminate()

  tf.compat.v1.logging.info("Buffer write complete.")
  return buffer_path

Example #10

Source File: megascans.py From clarisse_survival_kit with GNU General Public License v3.0

4 votes

def get_json_data_from_directory(directory):
    """Get the JSON data contents required for material setup."""
    logging.debug("Searching for JSON...")
    files = [f for f in os.listdir(directory) if os.path.isfile(os.path.join(directory, f))]
    # Search for any JSON file. Custom Mixer scans don't have a suffix like the ones from the library.
    data = {}
    for f in files:
        filename, extension = os.path.splitext(f)
        if extension == ".json":
            logging.debug("...JSON found!!!")
            json_file = os.path.join(directory, filename + ".json")
            with open(json_file) as json_file:
                json_data = json.load(json_file)
                if not json_data:
                    return None
                meta_data = json_data.get('meta')
                logging.debug("Meta JSON Data: " + str(meta_data))
                if not meta_data:
                    return None
                categories = json_data.get('categories')
                logging.debug("Categories JSON Data: " + str(categories))
                if not categories:
                    return None
                maps = json_data.get('maps')
                logging.debug("JSON follows Megascans structure.")
                if categories:
                    if 'surface' in categories:
                        data['type'] = 'surface'
                    if '3d' in categories:
                        data['type'] = '3d'
                    if 'atlas' in categories:
                        data['type'] = 'atlas'
                    if '3dplant' in categories:
                        data['type'] = '3dplant'
                if meta_data:
                    for md in meta_data:
                        if md['key'] == "height":
                            data['surface_height'] = float((md['value']).replace("m", "").replace(" ", ""))
                        elif md['key'] == "scanArea":
                            data['scan_area'] = [float(val) for val in
                                                 (md['value']).replace("m", "").replace(" ", "").split("x")]
                        elif md['key'] == "tileable":
                            data['tileable'] = md['value']
                if maps:
                    for mp in maps:
                        if mp['type'] == 'displacement' and 'maxIntensity' in mp and 'minIntensity' in mp:
                            # getting average intensity, using 260 as max RGB since that's what Megascans is doing
                            data['displacement_offset'] = ((mp['maxIntensity'] + mp['minIntensity']) * 0.5) / 260.0
            break
    return data