Python distributed.Client() Examples
The following are 30
code examples of distributed.Client().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
distributed
, or try the search function
.
Example #1
Source File: distributed.py From cosima-cookbook with Apache License 2.0 | 6 votes |
def start_cluster(diagnostics_port=0): "Set up a LocalCluster for distributed" hostname = socket.gethostname() n_workers = os.cpu_count() // 2 cluster = LocalCluster(ip='localhost', n_workers=n_workers, diagnostics_port=diagnostics_port, memory_limit=6e9) client = Client(cluster) params = { 'bokeh_port': cluster.scheduler.services['bokeh'].port, 'user': getpass.getuser(), 'scheduler_ip': cluster.scheduler.ip, 'hostname': hostname, } print("If the link to the dashboard below doesn't work, run this command on a local terminal to set up a SSH tunnel:") print() print(" ssh -N -L {bokeh_port}:{scheduler_ip}:{bokeh_port} {hostname}.nci.org.au -l {user}".format(**params) ) return client
Example #2
Source File: runArboreto.py From Beeline with GNU General Public License v3.0 | 6 votes |
def main(args): opts, args = parseArgs(args) inDF = pd.read_csv(opts.inFile, sep = '\t', index_col = 0, header = 0) client = Client(processes = False) if opts.algo == 'GENIE3': network = genie3(inDF, client_or_address = client) network.to_csv(opts.outFile, index = False, sep = '\t') elif opts.algo == 'GRNBoost2': network = grnboost2(inDF, client_or_address = client) network.to_csv(opts.outFile, index = False, sep = '\t') else: print("Wrong algorithm name. Should either be GENIE3 or GRNBoost2.")
Example #3
Source File: dask_sampler.py From pyABC with BSD 3-Clause "New" or "Revised" License | 6 votes |
def __init__(self, dask_client=None, client_max_jobs=np.inf, default_pickle=False, batch_size=1): super().__init__() # Assign Client if dask_client is None: dask_client = Client() self.my_client = dask_client # Client options self.client_max_jobs = client_max_jobs # Job state self.jobs_queued = 0 # For dask, we use cloudpickle by default self.default_pickle = default_pickle # Batchsize self.batch_size = batch_size
Example #4
Source File: dask_utils.py From aicsimageio with BSD 3-Clause "New" or "Revised" License | 6 votes |
def spawn_cluster_and_client( address: Optional[str] = None, **kwargs ) -> Tuple[Optional[LocalCluster], Optional[Client]]: """ If provided an address, create a Dask Client connection. If not provided an address, create a LocalCluster and Client connection. If not provided an address, other Dask kwargs are accepted and passed down to the LocalCluster object. Notes ----- When using this function, the processing machine or container must have networking capabilities enabled to function properly. """ cluster = None if address is not None: client = Client(address) log.info(f"Connected to Remote Dask Cluster: {client}") else: cluster = LocalCluster(**kwargs) client = Client(cluster) log.info(f"Connected to Local Dask Cluster: {client}") return cluster, client
Example #5
Source File: dask_utils.py From aicsimageio with BSD 3-Clause "New" or "Revised" License | 6 votes |
def shutdown_cluster_and_client( cluster: Optional[LocalCluster], client: Optional[Client] ) -> Tuple[Optional[LocalCluster], Optional[Client]]: """ Shutdown a cluster and client. Notes ----- When using this function, the processing machine or container must have networking capabilities enabled to function properly. """ if cluster is not None: cluster.close() if client is not None: client.shutdown() client.close() return cluster, client
Example #6
Source File: dask_utils.py From aicsimageio with BSD 3-Clause "New" or "Revised" License | 6 votes |
def cluster_and_client(address: Optional[str] = None, **kwargs): """ If provided an address, create a Dask Client connection. If not provided an address, create a LocalCluster and Client connection. If not provided an address, other Dask kwargs are accepted and passed down to the LocalCluster object. These objects will only live for the duration of this context manager. Examples -------- >>> with cluster_and_client() as (cluster, client): ... img1 = AICSImage("1.tiff") ... img2 = AICSImage("2.czi") ... other processing Notes ----- When using this context manager, the processing machine or container must have networking capabilities enabled to function properly. """ try: cluster, client = spawn_cluster_and_client(address=address, **kwargs) yield cluster, client finally: shutdown_cluster_and_client(cluster=cluster, client=client)
Example #7
Source File: client.py From dask-gateway with BSD 3-Clause "New" or "Revised" License | 6 votes |
def get_client(self, set_as_default=True): """Get a ``Client`` for this cluster. Returns ------- client : dask.distributed.Client """ client = Client( self, security=self.security, set_as_default=set_as_default, asynchronous=self.asynchronous, loop=self.loop, ) if not self.asynchronous: self._clients.add(client) return client
Example #8
Source File: test_proxies.py From dask-gateway with BSD 3-Clause "New" or "Revised" License | 6 votes |
def test_scheduler_proxy(proxy, cluster_and_security): cluster, security = cluster_and_security proxied_addr = f"gateway://{proxy.tcp_address}/temp" # Add a route await proxy.add_route(kind="SNI", sni="temp", target=cluster.scheduler_address) # Proxy works async def test_works(): async with Client(proxied_addr, security=security, asynchronous=True) as client: res = await client.run_on_scheduler(lambda x: x + 1, 1) assert res == 2 await with_retries(test_works, 5) # Remove the route await proxy.remove_route(kind="SNI", sni="temp") await proxy.remove_route(kind="SNI", sni="temp")
Example #9
Source File: test_runner.py From adaptive with BSD 3-Clause "New" or "Revised" License | 5 votes |
def test_distributed_executor(): from distributed import Client learner = Learner1D(linear, (-1, 1)) client = Client(n_workers=1) BlockingRunner(learner, trivial_goal, executor=client) client.shutdown() assert learner.npoints > 0
Example #10
Source File: ClimatologySpark2.py From incubator-sdap-nexus with Apache License 2.0 | 5 votes |
def parallelStatsDaskSimple(urlSplits, ds, nEpochs, variable, mask, coordinates, reader, outHdfsPath, averagingConfig, sparkConfig, accumulators=['count', 'mean', 'M2', 'min', 'max']): '''Compute N-day climatology statistics in parallel using PySpark or pysparkling.''' if not sparkConfig.startswith('dask,'): print >> sys.stderr, "dask: configuration must be of form 'dask,n'" sys.exit(1) numPartitions = int(sparkConfig.split(',')[1]) with Timer("Configure Dask distributed"): from distributed import Client, as_completed client = Client(DaskClientEndpoint) print >> sys.stderr, 'Starting parallel Stats using Dask . . .' start = time.time() futures = client.map( lambda urls: parallelStatsPipeline(urls, ds, nEpochs, variable, mask, coordinates, reader, averagingConfig, outHdfsPath, accumulators), urlSplits) outputFiles = [] for future in as_completed(futures): outputFile = future.result() outputFiles.append(outputFile) end = time.time() print >> sys.stderr, "parallelStats: Completed %s in %0.3f seconds." % (outputFile, (end - start)) return outputFiles
Example #11
Source File: conftest.py From kartothek with MIT License | 5 votes |
def setup_dask_distributed(): """ This fixture makes all dask tests effectively use distributed under the hood. """ global _client with distributed.utils_test.cluster() as (scheduler, workers): _client = Client(scheduler["address"]) yield
Example #12
Source File: test_model_selection.py From dask-ml with BSD 3-Clause "New" or "Revised" License | 5 votes |
def test_scheduler_param_distributed(loop): # noqa X, y = make_classification(n_samples=100, n_features=10, random_state=0) with cluster() as (s, [a, b]): with Client(s["address"], loop=loop) as client: gs = dcv.GridSearchCV(MockClassifier(), {"foo_param": [0, 1, 2]}, cv=3) gs.fit(X, y) def f(dask_scheduler): return len(dask_scheduler.transition_log) assert client.run_on_scheduler(f) # some work happened on cluster
Example #13
Source File: __init__.py From modin with Apache License 2.0 | 5 votes |
def _update_engine(publisher: Publisher): global DEFAULT_NPARTITIONS, dask_client num_cpus = DEFAULT_NPARTITIONS if publisher.get() == "Ray": import ray if _is_first_update.get("Ray", True): initialize_ray() num_cpus = ray.cluster_resources()["CPU"] elif publisher.get() == "Dask": # pragma: no cover from distributed.client import get_client if threading.current_thread().name == "MainThread" and _is_first_update.get( "Dask", True ): import warnings warnings.warn("The Dask Engine for Modin is experimental.") try: dask_client = get_client() except ValueError: from distributed import Client num_cpus = os.environ.get("MODIN_CPUS", None) or multiprocessing.cpu_count() dask_client = Client(n_workers=int(num_cpus)) elif publisher.get() != "Python": raise ImportError("Unrecognized execution engine: {}.".format(publisher.get())) _is_first_update[publisher.get()] = False DEFAULT_NPARTITIONS = max(4, int(num_cpus))
Example #14
Source File: ga_chp_bq_advanced_preprocessor.py From MorphL-Community-Edition with Apache License 2.0 | 5 votes |
def main(): client = Client() if TRAINING_OR_PREDICTION == 'training': process_dataframe(client, HDFS_DIR_INPUT_TRAINING, HDFS_DIR_OUTPUT_TRAINING) else: process_dataframe(client, HDFS_DIR_INPUT_PREDICTION, HDFS_DIR_OUTPUT_PREDICTION)
Example #15
Source File: ga_chp_bq_model_generator.py From MorphL-Community-Edition with Apache License 2.0 | 5 votes |
def main(): client = Client() dask_df = client.persist(dd.read_parquet(HDFS_DIR_INPUT)) ModelGenerator(dask_df).generate_and_save_model()
Example #16
Source File: ga_chp_advanced_preprocessor.py From MorphL-Community-Edition with Apache License 2.0 | 5 votes |
def main(): client = Client() if TRAINING_OR_PREDICTION == 'training': process_dataframe(client, HDFS_DIR_INPUT_TRAINING, HDFS_DIR_OUTPUT_TRAINING) else: process_dataframe(client, HDFS_DIR_INPUT_PREDICTION, HDFS_DIR_OUTPUT_PREDICTION)
Example #17
Source File: ga_chp_model_generator.py From MorphL-Community-Edition with Apache License 2.0 | 5 votes |
def main(): client = Client() dask_df = client.persist(dd.read_parquet(HDFS_DIR_INPUT)) ModelGenerator(dask_df).generate_and_save_model()
Example #18
Source File: test_runner.py From adaptive with BSD 3-Clause "New" or "Revised" License | 5 votes |
def ipyparallel_executor(): from ipyparallel import Client if os.name == "nt": import wexpect as expect else: import pexpect as expect child = expect.spawn("ipcluster start -n 1") child.expect("Engines appear to have started successfully", timeout=35) yield Client() if not child.terminate(force=True): raise RuntimeError("Could not stop ipcluster")
Example #19
Source File: runner.py From adaptive with BSD 3-Clause "New" or "Revised" License | 5 votes |
def _ensure_executor(executor): if executor is None: executor = _default_executor() if isinstance(executor, concurrent.Executor): return executor elif with_ipyparallel and isinstance(executor, ipyparallel.Client): return executor.executor() elif with_distributed and isinstance(executor, distributed.Client): return executor.get_executor() else: raise TypeError( "Only a concurrent.futures.Executor, distributed.Client," " or ipyparallel.Client can be used." )
Example #20
Source File: automate.py From aospy with Apache License 2.0 | 5 votes |
def _exec_calcs(calcs, parallelize=False, client=None, **compute_kwargs): """Execute the given calculations. Parameters ---------- calcs : Sequence of ``aospy.Calc`` objects parallelize : bool, default False Whether to submit the calculations in parallel or not client : distributed.Client or None The distributed Client used if parallelize is set to True; if None a distributed LocalCluster is used. compute_kwargs : dict of keyword arguments passed to ``Calc.compute`` Returns ------- A list of the values returned by each Calc object that was executed. """ if parallelize: def func(calc): """Wrap _compute_or_skip_on_error to require only the calc argument""" if 'write_to_tar' in compute_kwargs: compute_kwargs['write_to_tar'] = False return _compute_or_skip_on_error(calc, compute_kwargs) if client is None: n_workers = _n_workers_for_local_cluster(calcs) with distributed.LocalCluster(n_workers=n_workers) as cluster: with distributed.Client(cluster) as client: result = _submit_calcs_on_client(calcs, client, func) else: result = _submit_calcs_on_client(calcs, client, func) if compute_kwargs['write_to_tar']: _serial_write_to_tar(calcs) return result else: return [_compute_or_skip_on_error(calc, compute_kwargs) for calc in calcs]
Example #21
Source File: test_cli.py From dask-mpi with BSD 3-Clause "New" or "Revised" License | 5 votes |
def test_non_default_ports(loop, nanny, mpirun): with tmpfile(extension="json") as fn: cmd = mpirun + [ "-np", "2", "dask-mpi", "--scheduler-file", fn, nanny, "--scheduler-port", "56723", "--worker-port", "58464", "--nanny-port", "50164", ] with popen(cmd): with Client(scheduler_file=fn) as c: start = time() while len(c.scheduler_info()["workers"]) != 1: assert time() < start + 10 sleep(0.2) sched_info = c.scheduler_info() sched_host, sched_port = get_address_host_port(sched_info["address"]) assert sched_port == 56723 for worker_addr, worker_info in sched_info["workers"].items(): worker_host, worker_port = get_address_host_port(worker_addr) assert worker_port == 58464 if nanny == "--nanny": _, nanny_port = get_address_host_port(worker_info["nanny"]) assert nanny_port == 50164 assert c.submit(lambda x: x + 1, 10).result() == 11
Example #22
Source File: test_cli.py From dask-mpi with BSD 3-Clause "New" or "Revised" License | 5 votes |
def test_no_scheduler(loop, mpirun): with tmpfile(extension="json") as fn: cmd = mpirun + ["-np", "2", "dask-mpi", "--scheduler-file", fn] with popen(cmd, stdin=FNULL): with Client(scheduler_file=fn) as c: start = time() while len(c.scheduler_info()["workers"]) != 1: assert time() < start + 10 sleep(0.2) assert c.submit(lambda x: x + 1, 10).result() == 11 cmd = mpirun + [ "-np", "1", "dask-mpi", "--scheduler-file", fn, "--no-scheduler", ] with popen(cmd): start = time() while len(c.scheduler_info()["workers"]) != 2: assert time() < start + 10 sleep(0.2)
Example #23
Source File: test_cli.py From dask-mpi with BSD 3-Clause "New" or "Revised" License | 5 votes |
def test_basic(loop, nanny, mpirun): with tmpfile(extension="json") as fn: cmd = mpirun + ["-np", "4", "dask-mpi", "--scheduler-file", fn, nanny] with popen(cmd): with Client(scheduler_file=fn) as c: start = time() while len(c.scheduler_info()["workers"]) != 3: assert time() < start + 10 sleep(0.2) assert c.submit(lambda x: x + 1, 10, workers=1).result() == 11
Example #24
Source File: test_distributed.py From nbodykit with GNU General Public License v3.0 | 5 votes |
def setup(): from distributed import LocalCluster, Client cluster = LocalCluster(n_workers=1, threads_per_worker=1, processes=False) use_distributed(Client(cluster))
Example #25
Source File: common.py From xcube with MIT License | 5 votes |
def cli_option_scheduler(func): """Decorator for adding a pre-defined, reusable CLI option `--scheduler`.""" # noinspection PyUnusedLocal def _callback(ctx: click.Context, param: click.Option, value: Optional[str]): if not value: return address_and_kwargs = value.split("?", 2) if len(address_and_kwargs) == 2: address, kwargs_string = address_and_kwargs kwargs = parse_cli_kwargs(kwargs_string, metavar="SCHEDULER") else: address, = address_and_kwargs kwargs = dict() try: # The Dask Client registers itself as the default Dask scheduler, and so runs dask.array used by xarray import distributed scheduler_client = distributed.Client(address, **kwargs) ctx_obj = ctx.ensure_object(dict) if ctx_obj is not None: ctx_obj["scheduler"] = scheduler_client return scheduler_client except ValueError as e: raise click.BadParameter(f'Failed to create Dask scheduler client: {e}') from e return click.option( '--scheduler', metavar='SCHEDULER', help="Enable distributed computing using the Dask scheduler identified by SCHEDULER. " "SCHEDULER can have the form <address>?<keyword>=<value>,... where <address> " "is <host> or <host>:<port> and specifies the scheduler's address in your network. " "For more information on distributed computing " "using Dask, refer to http://distributed.dask.org/. " "Pairs of <keyword>=<value> are passed to the Dask client. " "Refer to http://distributed.dask.org/en/latest/api.html#distributed.Client", callback=_callback)(func)
Example #26
Source File: dask_executor.py From airflow with Apache License 2.0 | 5 votes |
def start(self) -> None: if self.tls_ca or self.tls_key or self.tls_cert: security = Security( tls_client_key=self.tls_key, tls_client_cert=self.tls_cert, tls_ca_file=self.tls_ca, require_encryption=True, ) else: security = None self.client = Client(self.cluster_address, security=security) self.futures = {}
Example #27
Source File: dask_executor.py From airflow with Apache License 2.0 | 5 votes |
def __init__(self, cluster_address=None): super().__init__(parallelism=0) if cluster_address is None: cluster_address = conf.get('dask', 'cluster_address') if not cluster_address: raise ValueError('Please provide a Dask cluster address in airflow.cfg') self.cluster_address = cluster_address # ssl / tls parameters self.tls_ca = conf.get('dask', 'tls_ca') self.tls_key = conf.get('dask', 'tls_key') self.tls_cert = conf.get('dask', 'tls_cert') self.client: Optional[Client] = None self.futures: Optional[Dict[Future, TaskInstanceKeyType]] = None
Example #28
Source File: test_automate.py From aospy with Apache License 2.0 | 5 votes |
def external_client(): # Explicitly specify we want only 4 workers so that when running on # continuous integration we don't request too many. cluster = distributed.LocalCluster(n_workers=4) client = distributed.Client(cluster) yield client client.close() cluster.close()
Example #29
Source File: metsim.py From MetSim with GNU General Public License v3.0 | 4 votes |
def __init__(self, params: dict, domain_slice=NO_SLICE): """ Constructor """ self._domain = None self._met_data = None self._state = None self._client = None self._domain_slice = domain_slice self.progress_bar = ProgressBar() self.params.update(params) logging.captureWarnings(True) self.logger = logging.getLogger(__name__) self.logger.setLevel(self.params['verbose']) formatter = logging.Formatter(' - '.join( ['%asctime)s', '%(name)s', '%(levelname)s', '%(message)s'])) ch = logging.StreamHandler(sys.stdout) ch.setFormatter(formatter) ch.setLevel(self.params['verbose']) # set global dask scheduler if domain_slice is NO_SLICE: if self.params['scheduler'] in DASK_CORE_SCHEDULERS: dask.config.set(scheduler=self.params['scheduler']) else: from distributed import Client, progress if 'distributed' == self.params['scheduler']: self._client = Client( n_workers=self.params['num_workers'], threads_per_worker=1) if self.params['verbose'] == logging.DEBUG: self.progress_bar = progress elif os.path.isfile(self.params['scheduler']): self._client = Client( scheduler_file=self.params['scheduler']) else: self._client = Client(self.params['scheduler']) else: dask.config.set(scheduler=self.params['scheduler']) # Set up logging # If in verbose mode set up the progress bar if self.params['verbose'] == logging.DEBUG: if 'distributed' != self.params['scheduler']: self.progress_bar.register() self.progress_bar = lambda x: x else: # If not in verbose mode, create a dummy function self.progress_bar = lambda x: x # Create time vector(s) self._times = self._get_output_times( freq=self.params['out_freq'], period_ending=self.params['period_ending'])
Example #30
Source File: test_model_selection.py From dask-ml with BSD 3-Clause "New" or "Revised" License | 4 votes |
def test_as_completed_distributed(loop): # noqa cluster_kwargs = dict(active_rpc_timeout=10, nanny=Nanny) if DISTRIBUTED_2_11_0: cluster_kwargs["disconnect_timeout"] = 10 with cluster(**cluster_kwargs) as (s, [a, b]): with Client(s["address"], loop=loop) as c: counter_name = "counter_name" counter = Variable(counter_name, client=c) counter.set(0) lock_name = "lock" killed_workers_name = "killed_workers" killed_workers = Variable(killed_workers_name, client=c) killed_workers.set({}) X, y = make_classification(n_samples=100, n_features=10, random_state=0) gs = dcv.GridSearchCV( AsCompletedEstimator(killed_workers_name, lock_name, counter_name, 7), param_grid={"foo_param": [0, 1, 2]}, cv=3, refit=False, cache_cv=False, scheduler=c, ) gs.fit(X, y) def f(dask_scheduler): return dask_scheduler.transition_log def check_reprocess(transition_log): finished = set() for transition in transition_log: key, start_state, end_state = ( transition[0], transition[1], transition[2], ) assert key not in finished if ( "score" in key and start_state == "memory" and end_state == "forgotten" ): finished.add(key) check_reprocess(c.run_on_scheduler(f))