Python distributed.Client() Examples

The following are 30 code examples of distributed.Client(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module distributed , or try the search function .
Example #1
Source File: distributed.py    From cosima-cookbook with Apache License 2.0 6 votes vote down vote up
def start_cluster(diagnostics_port=0):
    "Set up a LocalCluster for distributed"
    
    hostname = socket.gethostname()
    n_workers = os.cpu_count() // 2
    cluster = LocalCluster(ip='localhost',
                       n_workers=n_workers,
                       diagnostics_port=diagnostics_port,
                       memory_limit=6e9)
    client = Client(cluster)

    params = { 'bokeh_port': cluster.scheduler.services['bokeh'].port,
           'user': getpass.getuser(),
           'scheduler_ip': cluster.scheduler.ip,
           'hostname': hostname, }

    print("If the link to the dashboard below doesn't work, run this command on a local terminal to set up a SSH tunnel:")
    print()
    print("  ssh -N -L {bokeh_port}:{scheduler_ip}:{bokeh_port} {hostname}.nci.org.au -l {user}".format(**params) )
    
    return client 
Example #2
Source File: runArboreto.py    From Beeline with GNU General Public License v3.0 6 votes vote down vote up
def main(args):
    opts, args = parseArgs(args)
    inDF = pd.read_csv(opts.inFile, sep = '\t', index_col = 0, header = 0)

    client = Client(processes = False)    

    if opts.algo == 'GENIE3':
        network = genie3(inDF, client_or_address = client)
        network.to_csv(opts.outFile, index = False, sep = '\t')

    elif opts.algo == 'GRNBoost2':
        network = grnboost2(inDF, client_or_address = client)
        network.to_csv(opts.outFile, index = False, sep = '\t')

    else:
        print("Wrong algorithm name. Should either be GENIE3 or GRNBoost2.") 
Example #3
Source File: dask_sampler.py    From pyABC with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def __init__(self, dask_client=None, client_max_jobs=np.inf,
                 default_pickle=False, batch_size=1):
        super().__init__()

        # Assign Client
        if dask_client is None:
            dask_client = Client()
        self.my_client = dask_client

        # Client options
        self.client_max_jobs = client_max_jobs

        # Job state
        self.jobs_queued = 0

        # For dask, we use cloudpickle by default
        self.default_pickle = default_pickle

        # Batchsize
        self.batch_size = batch_size 
Example #4
Source File: dask_utils.py    From aicsimageio with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def spawn_cluster_and_client(
    address: Optional[str] = None, **kwargs
) -> Tuple[Optional[LocalCluster], Optional[Client]]:
    """
    If provided an address, create a Dask Client connection.
    If not provided an address, create a LocalCluster and Client connection.
    If not provided an address, other Dask kwargs are accepted and passed down to the
    LocalCluster object.

    Notes
    -----
    When using this function, the processing machine or container must have networking
    capabilities enabled to function properly.
    """
    cluster = None
    if address is not None:
        client = Client(address)
        log.info(f"Connected to Remote Dask Cluster: {client}")
    else:
        cluster = LocalCluster(**kwargs)
        client = Client(cluster)
        log.info(f"Connected to Local Dask Cluster: {client}")

    return cluster, client 
Example #5
Source File: dask_utils.py    From aicsimageio with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def shutdown_cluster_and_client(
    cluster: Optional[LocalCluster], client: Optional[Client]
) -> Tuple[Optional[LocalCluster], Optional[Client]]:
    """
    Shutdown a cluster and client.

    Notes
    -----
    When using this function, the processing machine or container must have networking
    capabilities enabled to function properly.
    """
    if cluster is not None:
        cluster.close()
    if client is not None:
        client.shutdown()
        client.close()

    return cluster, client 
Example #6
Source File: dask_utils.py    From aicsimageio with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def cluster_and_client(address: Optional[str] = None, **kwargs):
    """
    If provided an address, create a Dask Client connection.
    If not provided an address, create a LocalCluster and Client connection.
    If not provided an address, other Dask kwargs are accepted and passed down to the
    LocalCluster object.

    These objects will only live for the duration of this context manager.

    Examples
    --------
    >>> with cluster_and_client() as (cluster, client):
    ...     img1 = AICSImage("1.tiff")
    ...     img2 = AICSImage("2.czi")
    ...     other processing

    Notes
    -----
    When using this context manager, the processing machine or container must have
    networking capabilities enabled to function properly.
    """
    try:
        cluster, client = spawn_cluster_and_client(address=address, **kwargs)
        yield cluster, client
    finally:
        shutdown_cluster_and_client(cluster=cluster, client=client) 
Example #7
Source File: client.py    From dask-gateway with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def get_client(self, set_as_default=True):
        """Get a ``Client`` for this cluster.

        Returns
        -------
        client : dask.distributed.Client
        """
        client = Client(
            self,
            security=self.security,
            set_as_default=set_as_default,
            asynchronous=self.asynchronous,
            loop=self.loop,
        )
        if not self.asynchronous:
            self._clients.add(client)
        return client 
Example #8
Source File: test_proxies.py    From dask-gateway with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def test_scheduler_proxy(proxy, cluster_and_security):
    cluster, security = cluster_and_security

    proxied_addr = f"gateway://{proxy.tcp_address}/temp"

    # Add a route
    await proxy.add_route(kind="SNI", sni="temp", target=cluster.scheduler_address)

    # Proxy works
    async def test_works():
        async with Client(proxied_addr, security=security, asynchronous=True) as client:
            res = await client.run_on_scheduler(lambda x: x + 1, 1)
            assert res == 2

    await with_retries(test_works, 5)

    # Remove the route
    await proxy.remove_route(kind="SNI", sni="temp")
    await proxy.remove_route(kind="SNI", sni="temp") 
Example #9
Source File: test_runner.py    From adaptive with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def test_distributed_executor():
    from distributed import Client

    learner = Learner1D(linear, (-1, 1))
    client = Client(n_workers=1)
    BlockingRunner(learner, trivial_goal, executor=client)
    client.shutdown()
    assert learner.npoints > 0 
Example #10
Source File: ClimatologySpark2.py    From incubator-sdap-nexus with Apache License 2.0 5 votes vote down vote up
def parallelStatsDaskSimple(urlSplits, ds, nEpochs, variable, mask, coordinates, reader, outHdfsPath, averagingConfig,
                            sparkConfig,
                            accumulators=['count', 'mean', 'M2', 'min', 'max']):
    '''Compute N-day climatology statistics in parallel using PySpark or pysparkling.'''
    if not sparkConfig.startswith('dask,'):
        print >> sys.stderr, "dask: configuration must be of form 'dask,n'"
        sys.exit(1)
    numPartitions = int(sparkConfig.split(',')[1])

    with Timer("Configure Dask distributed"):
        from distributed import Client, as_completed
        client = Client(DaskClientEndpoint)

    print >> sys.stderr, 'Starting parallel Stats using Dask . . .'
    start = time.time()
    futures = client.map(
        lambda urls: parallelStatsPipeline(urls, ds, nEpochs, variable, mask, coordinates, reader, averagingConfig,
                                           outHdfsPath, accumulators), urlSplits)

    outputFiles = []
    for future in as_completed(futures):
        outputFile = future.result()
        outputFiles.append(outputFile)
        end = time.time()
        print >> sys.stderr, "parallelStats: Completed %s in %0.3f seconds." % (outputFile, (end - start))
    return outputFiles 
Example #11
Source File: conftest.py    From kartothek with MIT License 5 votes vote down vote up
def setup_dask_distributed():
    """
    This fixture makes all dask tests effectively use distributed under the hood.
    """
    global _client
    with distributed.utils_test.cluster() as (scheduler, workers):
        _client = Client(scheduler["address"])
        yield 
Example #12
Source File: test_model_selection.py    From dask-ml with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def test_scheduler_param_distributed(loop):  # noqa
    X, y = make_classification(n_samples=100, n_features=10, random_state=0)
    with cluster() as (s, [a, b]):
        with Client(s["address"], loop=loop) as client:
            gs = dcv.GridSearchCV(MockClassifier(), {"foo_param": [0, 1, 2]}, cv=3)
            gs.fit(X, y)

            def f(dask_scheduler):
                return len(dask_scheduler.transition_log)

            assert client.run_on_scheduler(f)  # some work happened on cluster 
Example #13
Source File: __init__.py    From modin with Apache License 2.0 5 votes vote down vote up
def _update_engine(publisher: Publisher):
    global DEFAULT_NPARTITIONS, dask_client

    num_cpus = DEFAULT_NPARTITIONS
    if publisher.get() == "Ray":
        import ray

        if _is_first_update.get("Ray", True):
            initialize_ray()
        num_cpus = ray.cluster_resources()["CPU"]
    elif publisher.get() == "Dask":  # pragma: no cover
        from distributed.client import get_client

        if threading.current_thread().name == "MainThread" and _is_first_update.get(
            "Dask", True
        ):
            import warnings

            warnings.warn("The Dask Engine for Modin is experimental.")

        try:
            dask_client = get_client()
        except ValueError:
            from distributed import Client

            num_cpus = os.environ.get("MODIN_CPUS", None) or multiprocessing.cpu_count()
            dask_client = Client(n_workers=int(num_cpus))

    elif publisher.get() != "Python":
        raise ImportError("Unrecognized execution engine: {}.".format(publisher.get()))

    _is_first_update[publisher.get()] = False
    DEFAULT_NPARTITIONS = max(4, int(num_cpus)) 
Example #14
Source File: ga_chp_bq_advanced_preprocessor.py    From MorphL-Community-Edition with Apache License 2.0 5 votes vote down vote up
def main():
    client = Client()
    if TRAINING_OR_PREDICTION == 'training':
        process_dataframe(client, HDFS_DIR_INPUT_TRAINING,
                          HDFS_DIR_OUTPUT_TRAINING)
    else:
        process_dataframe(client, HDFS_DIR_INPUT_PREDICTION,
                          HDFS_DIR_OUTPUT_PREDICTION) 
Example #15
Source File: ga_chp_bq_model_generator.py    From MorphL-Community-Edition with Apache License 2.0 5 votes vote down vote up
def main():
    client = Client()
    dask_df = client.persist(dd.read_parquet(HDFS_DIR_INPUT))
    ModelGenerator(dask_df).generate_and_save_model() 
Example #16
Source File: ga_chp_advanced_preprocessor.py    From MorphL-Community-Edition with Apache License 2.0 5 votes vote down vote up
def main():
    client = Client()
    if TRAINING_OR_PREDICTION == 'training':
        process_dataframe(client, HDFS_DIR_INPUT_TRAINING, HDFS_DIR_OUTPUT_TRAINING)
    else:
        process_dataframe(client, HDFS_DIR_INPUT_PREDICTION, HDFS_DIR_OUTPUT_PREDICTION) 
Example #17
Source File: ga_chp_model_generator.py    From MorphL-Community-Edition with Apache License 2.0 5 votes vote down vote up
def main():
    client = Client()
    dask_df = client.persist(dd.read_parquet(HDFS_DIR_INPUT))
    ModelGenerator(dask_df).generate_and_save_model() 
Example #18
Source File: test_runner.py    From adaptive with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def ipyparallel_executor():
    from ipyparallel import Client

    if os.name == "nt":
        import wexpect as expect
    else:
        import pexpect as expect

    child = expect.spawn("ipcluster start -n 1")
    child.expect("Engines appear to have started successfully", timeout=35)
    yield Client()
    if not child.terminate(force=True):
        raise RuntimeError("Could not stop ipcluster") 
Example #19
Source File: runner.py    From adaptive with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def _ensure_executor(executor):
    if executor is None:
        executor = _default_executor()

    if isinstance(executor, concurrent.Executor):
        return executor
    elif with_ipyparallel and isinstance(executor, ipyparallel.Client):
        return executor.executor()
    elif with_distributed and isinstance(executor, distributed.Client):
        return executor.get_executor()
    else:
        raise TypeError(
            "Only a concurrent.futures.Executor, distributed.Client,"
            " or ipyparallel.Client can be used."
        ) 
Example #20
Source File: automate.py    From aospy with Apache License 2.0 5 votes vote down vote up
def _exec_calcs(calcs, parallelize=False, client=None, **compute_kwargs):
    """Execute the given calculations.

    Parameters
    ----------
    calcs : Sequence of ``aospy.Calc`` objects
    parallelize : bool, default False
        Whether to submit the calculations in parallel or not
    client : distributed.Client or None
        The distributed Client used if parallelize is set to True; if None
        a distributed LocalCluster is used.
    compute_kwargs : dict of keyword arguments passed to ``Calc.compute``

    Returns
    -------
    A list of the values returned by each Calc object that was executed.
    """
    if parallelize:
        def func(calc):
            """Wrap _compute_or_skip_on_error to require only the calc
            argument"""
            if 'write_to_tar' in compute_kwargs:
                compute_kwargs['write_to_tar'] = False
            return _compute_or_skip_on_error(calc, compute_kwargs)

        if client is None:
            n_workers = _n_workers_for_local_cluster(calcs)
            with distributed.LocalCluster(n_workers=n_workers) as cluster:
                with distributed.Client(cluster) as client:
                    result = _submit_calcs_on_client(calcs, client, func)
        else:
            result = _submit_calcs_on_client(calcs, client, func)
        if compute_kwargs['write_to_tar']:
            _serial_write_to_tar(calcs)
        return result
    else:
        return [_compute_or_skip_on_error(calc, compute_kwargs)
                for calc in calcs] 
Example #21
Source File: test_cli.py    From dask-mpi with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def test_non_default_ports(loop, nanny, mpirun):
    with tmpfile(extension="json") as fn:

        cmd = mpirun + [
            "-np",
            "2",
            "dask-mpi",
            "--scheduler-file",
            fn,
            nanny,
            "--scheduler-port",
            "56723",
            "--worker-port",
            "58464",
            "--nanny-port",
            "50164",
        ]

        with popen(cmd):
            with Client(scheduler_file=fn) as c:

                start = time()
                while len(c.scheduler_info()["workers"]) != 1:
                    assert time() < start + 10
                    sleep(0.2)

                sched_info = c.scheduler_info()
                sched_host, sched_port = get_address_host_port(sched_info["address"])
                assert sched_port == 56723
                for worker_addr, worker_info in sched_info["workers"].items():
                    worker_host, worker_port = get_address_host_port(worker_addr)
                    assert worker_port == 58464
                    if nanny == "--nanny":
                        _, nanny_port = get_address_host_port(worker_info["nanny"])
                        assert nanny_port == 50164

                assert c.submit(lambda x: x + 1, 10).result() == 11 
Example #22
Source File: test_cli.py    From dask-mpi with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def test_no_scheduler(loop, mpirun):
    with tmpfile(extension="json") as fn:

        cmd = mpirun + ["-np", "2", "dask-mpi", "--scheduler-file", fn]

        with popen(cmd, stdin=FNULL):
            with Client(scheduler_file=fn) as c:

                start = time()
                while len(c.scheduler_info()["workers"]) != 1:
                    assert time() < start + 10
                    sleep(0.2)

                assert c.submit(lambda x: x + 1, 10).result() == 11

                cmd = mpirun + [
                    "-np",
                    "1",
                    "dask-mpi",
                    "--scheduler-file",
                    fn,
                    "--no-scheduler",
                ]

                with popen(cmd):
                    start = time()
                    while len(c.scheduler_info()["workers"]) != 2:
                        assert time() < start + 10
                        sleep(0.2) 
Example #23
Source File: test_cli.py    From dask-mpi with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def test_basic(loop, nanny, mpirun):
    with tmpfile(extension="json") as fn:

        cmd = mpirun + ["-np", "4", "dask-mpi", "--scheduler-file", fn, nanny]

        with popen(cmd):
            with Client(scheduler_file=fn) as c:
                start = time()
                while len(c.scheduler_info()["workers"]) != 3:
                    assert time() < start + 10
                    sleep(0.2)

                assert c.submit(lambda x: x + 1, 10, workers=1).result() == 11 
Example #24
Source File: test_distributed.py    From nbodykit with GNU General Public License v3.0 5 votes vote down vote up
def setup():
    from distributed import LocalCluster, Client
    cluster = LocalCluster(n_workers=1, threads_per_worker=1, processes=False)
    use_distributed(Client(cluster)) 
Example #25
Source File: common.py    From xcube with MIT License 5 votes vote down vote up
def cli_option_scheduler(func):
    """Decorator for adding a pre-defined, reusable CLI option `--scheduler`."""

    # noinspection PyUnusedLocal
    def _callback(ctx: click.Context, param: click.Option, value: Optional[str]):
        if not value:
            return

        address_and_kwargs = value.split("?", 2)
        if len(address_and_kwargs) == 2:
            address, kwargs_string = address_and_kwargs
            kwargs = parse_cli_kwargs(kwargs_string, metavar="SCHEDULER")
        else:
            address, = address_and_kwargs
            kwargs = dict()

        try:
            # The Dask Client registers itself as the default Dask scheduler, and so runs dask.array used by xarray
            import distributed
            scheduler_client = distributed.Client(address, **kwargs)
            ctx_obj = ctx.ensure_object(dict)
            if ctx_obj is not None:
                ctx_obj["scheduler"] = scheduler_client
            return scheduler_client
        except ValueError as e:
            raise click.BadParameter(f'Failed to create Dask scheduler client: {e}') from e

    return click.option(
        '--scheduler',
        metavar='SCHEDULER',
        help="Enable distributed computing using the Dask scheduler identified by SCHEDULER. "
             "SCHEDULER can have the form <address>?<keyword>=<value>,... where <address> "
             "is <host> or <host>:<port> and specifies the scheduler's address in your network. "
             "For more information on distributed computing "
             "using Dask, refer to http://distributed.dask.org/. "
             "Pairs of <keyword>=<value> are passed to the Dask client. "
             "Refer to http://distributed.dask.org/en/latest/api.html#distributed.Client",
        callback=_callback)(func) 
Example #26
Source File: dask_executor.py    From airflow with Apache License 2.0 5 votes vote down vote up
def start(self) -> None:
        if self.tls_ca or self.tls_key or self.tls_cert:
            security = Security(
                tls_client_key=self.tls_key,
                tls_client_cert=self.tls_cert,
                tls_ca_file=self.tls_ca,
                require_encryption=True,
            )
        else:
            security = None

        self.client = Client(self.cluster_address, security=security)
        self.futures = {} 
Example #27
Source File: dask_executor.py    From airflow with Apache License 2.0 5 votes vote down vote up
def __init__(self, cluster_address=None):
        super().__init__(parallelism=0)
        if cluster_address is None:
            cluster_address = conf.get('dask', 'cluster_address')
        if not cluster_address:
            raise ValueError('Please provide a Dask cluster address in airflow.cfg')
        self.cluster_address = cluster_address
        # ssl / tls parameters
        self.tls_ca = conf.get('dask', 'tls_ca')
        self.tls_key = conf.get('dask', 'tls_key')
        self.tls_cert = conf.get('dask', 'tls_cert')
        self.client: Optional[Client] = None
        self.futures: Optional[Dict[Future, TaskInstanceKeyType]] = None 
Example #28
Source File: test_automate.py    From aospy with Apache License 2.0 5 votes vote down vote up
def external_client():
    # Explicitly specify we want only 4 workers so that when running on
    # continuous integration we don't request too many.
    cluster = distributed.LocalCluster(n_workers=4)
    client = distributed.Client(cluster)
    yield client
    client.close()
    cluster.close() 
Example #29
Source File: metsim.py    From MetSim with GNU General Public License v3.0 4 votes vote down vote up
def __init__(self, params: dict, domain_slice=NO_SLICE):
        """
        Constructor
        """
        self._domain = None
        self._met_data = None
        self._state = None
        self._client = None
        self._domain_slice = domain_slice
        self.progress_bar = ProgressBar()
        self.params.update(params)
        logging.captureWarnings(True)
        self.logger = logging.getLogger(__name__)
        self.logger.setLevel(self.params['verbose'])

        formatter = logging.Formatter(' - '.join(
            ['%asctime)s', '%(name)s', '%(levelname)s', '%(message)s']))
        ch = logging.StreamHandler(sys.stdout)
        ch.setFormatter(formatter)
        ch.setLevel(self.params['verbose'])
        # set global dask scheduler
        if domain_slice is NO_SLICE:
            if self.params['scheduler'] in DASK_CORE_SCHEDULERS:
                dask.config.set(scheduler=self.params['scheduler'])
            else:
                from distributed import Client, progress
                if 'distributed' == self.params['scheduler']:
                    self._client = Client(
                        n_workers=self.params['num_workers'],
                        threads_per_worker=1)
                    if self.params['verbose'] == logging.DEBUG:
                        self.progress_bar = progress
                elif os.path.isfile(self.params['scheduler']):
                    self._client = Client(
                        scheduler_file=self.params['scheduler'])
                else:
                    self._client = Client(self.params['scheduler'])
        else:
            dask.config.set(scheduler=self.params['scheduler'])

        # Set up logging
        # If in verbose mode set up the progress bar
        if self.params['verbose'] == logging.DEBUG:
            if 'distributed' != self.params['scheduler']:
                self.progress_bar.register()
                self.progress_bar = lambda x: x
        else:
            # If not in verbose mode, create a dummy function
            self.progress_bar = lambda x: x
        # Create time vector(s)
        self._times = self._get_output_times(
            freq=self.params['out_freq'],
            period_ending=self.params['period_ending']) 
Example #30
Source File: test_model_selection.py    From dask-ml with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
def test_as_completed_distributed(loop):  # noqa
    cluster_kwargs = dict(active_rpc_timeout=10, nanny=Nanny)
    if DISTRIBUTED_2_11_0:
        cluster_kwargs["disconnect_timeout"] = 10
    with cluster(**cluster_kwargs) as (s, [a, b]):
        with Client(s["address"], loop=loop) as c:
            counter_name = "counter_name"
            counter = Variable(counter_name, client=c)
            counter.set(0)
            lock_name = "lock"

            killed_workers_name = "killed_workers"
            killed_workers = Variable(killed_workers_name, client=c)
            killed_workers.set({})

            X, y = make_classification(n_samples=100, n_features=10, random_state=0)
            gs = dcv.GridSearchCV(
                AsCompletedEstimator(killed_workers_name, lock_name, counter_name, 7),
                param_grid={"foo_param": [0, 1, 2]},
                cv=3,
                refit=False,
                cache_cv=False,
                scheduler=c,
            )
            gs.fit(X, y)

            def f(dask_scheduler):
                return dask_scheduler.transition_log

            def check_reprocess(transition_log):
                finished = set()
                for transition in transition_log:
                    key, start_state, end_state = (
                        transition[0],
                        transition[1],
                        transition[2],
                    )
                    assert key not in finished
                    if (
                        "score" in key
                        and start_state == "memory"
                        and end_state == "forgotten"
                    ):
                        finished.add(key)

            check_reprocess(c.run_on_scheduler(f))