Python dask.diagnostics.ProgressBar() Examples

The following are 13 code examples of dask.diagnostics.ProgressBar(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module dask.diagnostics , or try the search function .
Example #1
Source File: cross_registration.py    From minian with GNU General Public License v3.0 6 votes vote down vote up
def calculate_centroids_old(cnmds, window, grp_dim=['animal', 'session']):
    print("computing centroids")
    cnt_list = []
    for anm, cur_anm in cnmds.groupby('animal'):
        for ss, cur_ss in cur_anm.groupby('session'):
            # cnt = centroids(cur_ss['A_shifted'], window.sel(animal=anm))
            cnt = da.delayed(centroids)(
                cur_ss['A_shifted'], window.sel(animal=anm))
            cnt_list.append(cnt)
    with ProgressBar():
        cnt_list, = da.compute(cnt_list)
    cnts_ds = pd.concat(cnt_list, ignore_index=True)
    cnts_ds.height = cnts_ds.height.astype(float)
    cnts_ds.width = cnts_ds.width.astype(float)
    cnts_ds.unit_id = cnts_ds.unit_id.astype(int)
    cnts_ds.animal = cnts_ds.animal.astype(str)
    cnts_ds.session = cnts_ds.session.astype(str)
    cnts_ds.session_id = cnts_ds.session_id.astype(str)
    return cnts_ds 
Example #2
Source File: cross_registration.py    From minian with GNU General Public License v3.0 6 votes vote down vote up
def centroids_distance_old(cents,
                       A,
                       window,
                       shift,
                       hamming,
                       corr,
                       tile=(50, 50)):
    sessions = cents['session'].unique()
    dim_h = (np.min(cents['height']), np.max(cents['height']))
    dim_w = (np.min(cents['width']), np.max(cents['width']))
    dist_list = []
    for ssA, ssB in itt.combinations(sessions, 2):
        # dist = _calc_cent_dist(ssA, ssB, cents, cnmds, window, tile, dim_h, dim_w)
        dist = da.delayed(_calc_cent_dist)(ssA, ssB, cents, A, window,
                                           tile, dim_h, dim_w, shift, hamming,
                                           corr)
        dist_list.append(dist)
    with ProgressBar():
        dist_list, = da.compute(dist_list)
    dists = pd.concat(dist_list, ignore_index=True)
    return dists 
Example #3
Source File: utilities.py    From minian with GNU General Public License v3.0 6 votes vote down vote up
def scale_varr(varr, scale=(0, 1), inplace=False, pre_compute=False):
    varr_max = varr.max()
    varr_min = varr.min()
    if pre_compute:
        print("pre-computing min and max")
        with ProgressBar():
            varr_max = varr_max.compute()
            varr_min = varr_min.compute()
    if inplace:
        varr_norm = varr
        varr_norm -= varr_min
        varr_norm *= 1 / (varr_max - varr_min)
        varr_norm *= (scale[1] - scale[0])
        varr_norm += scale[0]
    else:
        varr_norm = ((varr - varr_min) * (scale[1] - scale[0])
                     / (varr_max - varr_min)) + scale[0]
    return varr_norm 
Example #4
Source File: starfm4py.py    From starfm4py with GNU General Public License v3.0 5 votes vote down vote up
def starfm(fine_image_t0, coarse_image_t0, coarse_image_t1, profile, shape):
    print ('Processing...')
    prediction_da = predict(fine_image_t0, coarse_image_t0, coarse_image_t1, shape)
    with ProgressBar():
         prediction = prediction_da.compute()
    
    return prediction 
Example #5
Source File: brute_force_plotter.py    From brute-force-plotter with MIT License 5 votes vote down vote up
def main(input_file, dtypes, output_path):
    """Create Plots From data in input"""

    data = pd.read_csv(input_file)
    new_file_name = f"{input_file}.parq"
    data.to_parquet(new_file_name)

    data_types = json.load(open(dtypes, "r"))
    plots = create_plots(new_file_name, data_types, output_path)
    with ProgressBar():
        dask.compute(*plots, scheduler="processes", n_workers=22) 
Example #6
Source File: preprocessing.py    From minian with GNU General Public License v3.0 5 votes vote down vote up
def detect_brightspot_perframe(varray, thres=0.95):
    print("creating parallel schedule")
    spots = []
    for fid, fm in varray.rolling(frame=1):
        sp = delayed(lambda f: f > f.quantile(thres, interpolation='lower'))(
            fm)
        spots.append(sp)
    with ProgressBar():
        print("detecting bright spots by frame")
        spots, = compute(spots)
    print("concatenating results")
    spots = xr.concat(spots, dim='frame')
    return spots


# def correct_dust(varray, dust):
#     mov_corr = varray.values
#     nz = np.nonzero(dust)
#     nz_tp = [(d0, d1) for d0, d1 in zip(nz[0], nz[1])]
#     for i in range(np.count_nonzero(dust)):
#         cur_dust = (nz[0][i], nz[1][i])
#         cur_sur = set(
#             itt.product(
#                 range(cur_dust[0] - 1, cur_dust[0] + 2),
#                 range(cur_dust[1] - 1, cur_dust[1] + 2))) - set(
#                     cur_dust) - set(nz_tp)
#         cur_sur = list(
#             filter(
#                 lambda d: 0 < d[0] < mov.shape[1] and 0 < d[1] < mov.shape[2],
#                 cur_sur))
#         if len(cur_sur) > 0:
#             sur_arr = np.empty((mov.shape[0], len(cur_sur)))
#             for si, sur in enumerate(cur_sur):
#                 sur_arr[:, si] = mov[:, sur[0], sur[1]]
#             mov_corr[:, cur_dust[0], cur_dust[1]] = np.mean(sur_arr, axis=1)
#         else:
#             print("unable to correct for point ({}, {})".format(
#                 cur_dust[0], cur_dust[1]))
#     return mov_corr 
Example #7
Source File: preprocessing.py    From minian with GNU General Public License v3.0 5 votes vote down vote up
def remove_background_old(varray, window=51):
    print("creating parallel schedule")
    varr_ft = varray.astype(np.float32)
    compute_list = []
    for fid in varr_ft.coords['frame'].values:
        fm = varr_ft.loc[dict(frame=fid)]
        _ = delayed(remove_background_perframe_old)(fid, fm, varr_ft, window)
        compute_list.append(_)
    with ProgressBar():
        print("removing background")
        compute(compute_list)
    print("normalizing result")
    varr_ft = scale_varr(varr_ft, (0, 255)).astype(varray.dtype, copy=False)
    print("background removal done")
    return varr_ft.rename(varray.name + "_Filtered") 
Example #8
Source File: pyscenic.py    From pySCENIC with GNU General Public License v3.0 4 votes vote down vote up
def prune_targets_command(args):
    """
    Prune targets/find enriched features.
    """
    # Loading from YAML is extremely slow. Therefore this is a potential performance improvement.
    # Potential improvements are switching to JSON or to use a CLoader:
    # https://stackoverflow.com/questions/27743711/can-i-speedup-yaml
    # The alternative for which was opted in the end is binary pickling.
    extension = PurePath(args.module_fname.name).suffixes
    if is_valid_suffix(extension, 'ctx'):
        if args.expression_mtx_fname is None:
            LOGGER.error("No expression matrix is supplied.")
            sys.exit(0)
        LOGGER.info("Creating modules.")
        modules = adjacencies2modules(args)
    else:
        LOGGER.info("Loading modules.")
        try:
            modules = load_modules(args.module_fname.name)
        except ValueError as e:
            LOGGER.error(e)
            sys.exit(1)

    if len(modules) == 0:
        LOGGER.error("Not a single module loaded")
        sys.exit(1)

    LOGGER.info("Loading databases.")
    dbs = _load_dbs(args.database_fname)

    LOGGER.info("Calculating regulons.")
    motif_annotations_fname = args.annotations_fname.name
    calc_func = find_features if args.no_pruning == "yes" else prune2df
    with ProgressBar() if args.mode == "dask_multiprocessing" else NoProgressBar():
        df_motifs = calc_func(dbs, modules, motif_annotations_fname,
                           rank_threshold=args.rank_threshold,
                           auc_threshold=args.auc_threshold,
                           nes_threshold=args.nes_threshold,
                           client_or_address=args.mode,
                           module_chunksize=args.chunk_size,
                           num_workers=args.num_workers)

    LOGGER.info("Writing results to file.")
    if args.output.name == '<stdout>':
        df_motifs.to_csv(args.output)
    else:
        save_enriched_motifs(df_motifs, args.output.name) 
Example #9
Source File: hpc-prune.py    From pySCENIC with GNU General Public License v3.0 4 votes vote down vote up
def run(args):
    # Set logging level.
    logging_debug_opt = False
    LOGGER.addHandler(create_logging_handler(logging_debug_opt))
    LOGGER.setLevel(logging.DEBUG)

    LOGGER.info("Using configuration {}.".format(args.config_filename))
    cfg = ConfigParser()
    cfg.read(args.config_filename)

    in_fname = cfg['data']['modules'] if not args.input else args.input
    LOGGER.info("Loading modules from {}.".format(in_fname))
    # Loading from YAML is extremely slow. Therefore this is a potential performance improvement.
    # Potential improvements are switching to JSON or to use a CLoader:
    # https://stackoverflow.com/questions/27743711/can-i-speedup-yaml
    if in_fname.endswith('.yaml'):
        modules = load_from_yaml(in_fname)
    else:
        with open(in_fname, 'rb') as f:
            modules = pickle.load(f)
    # Filter out modules with to few genes.
    min_genes = int(cfg['parameters']['min_genes'])
    modules = list(filter(lambda m: len(m) >= min_genes, modules))

    LOGGER.info("Loading databases.")
    def name(fname):
        return os.path.splitext(os.path.basename(fname))[0]
    db_fnames = list(mapcat(glob.glob, cfg['data']['databases'].split(";")))
    dbs = [RankingDatabase(fname=fname, name=name(fname)) for fname in db_fnames]

    LOGGER.info("Calculating regulons.")
    motif_annotations_fname = cfg['data']['motif_annotations']
    mode= cfg['parameters']['mode']
    with ProgressBar() if mode == "dask_multiprocessing" else NoProgressBar():
        df = prune2df(dbs, modules, motif_annotations_fname,
                                  rank_threshold=int(cfg['parameters']['rank_threshold']),
                                  auc_threshold=float(cfg['parameters']['auc_threshold']),
                                  nes_threshold=float(cfg['parameters']['nes_threshold']),
                                  client_or_address=mode,
                                  module_chunksize=cfg['parameters']['chunk_size'],
                                  num_workers=args.num_workers)

    LOGGER.info("Writing results to file.")
    df.to_csv(cfg['parameters']['output'] if not args.output else args.output) 
Example #10
Source File: diffraction2d.py    From pyxem with GNU General Public License v3.0 4 votes vote down vote up
def template_match_with_binary_image(
        self, binary_image, lazy_result=True, show_progressbar=True
    ):
        """Template match the signal dimensions with a binary image.

        Used to find diffraction disks in convergent beam electron
        diffraction data.

        Might also work with non-binary images, but this haven't been
        extensively tested.

        Parameters
        ----------
        binary_image : 2-D NumPy array
        lazy_result : bool, default True
            If True, will return a LazyDiffraction2D object. If False,
            will compute the result and return a Diffraction2D object.
        show_progressbar : bool, default True

        Returns
        -------
        template_match : Diffraction2D object

        Examples
        --------
        >>> s = ps.dummy_data.get_cbed_signal()
        >>> binary_image = np.random.randint(0, 2, (6, 6))
        >>> s_template = s.template_match_with_binary_image(
        ...     binary_image, show_progressbar=False)
        >>> s.plot()

        See also
        --------
        template_match_disk
        template_match_ring

        """
        if self._lazy:
            dask_array = self.data
        else:
            sig_chunks = list(self.axes_manager.signal_shape)[::-1]
            chunks = [8] * len(self.axes_manager.navigation_shape)
            chunks.extend(sig_chunks)
            dask_array = da.from_array(self.data, chunks=chunks)
        output_array = dt._template_match_with_binary_image(dask_array, binary_image)
        if not lazy_result:
            if show_progressbar:
                pbar = ProgressBar()
                pbar.register()
            output_array = output_array.compute()
            if show_progressbar:
                pbar.unregister()
            s = Diffraction2D(output_array)
        else:
            s = LazyDiffraction2D(output_array)
        pst._copy_signal_all_axes_metadata(self, s)
        return s 
Example #11
Source File: diffraction2d.py    From pyxem with GNU General Public License v3.0 4 votes vote down vote up
def intensity_peaks(
        self, peak_array, disk_r=4, lazy_result=True, show_progressbar=True
    ):
        """Get intensity of a peak in the diffraction data.

        The intensity is calculated by taking the mean of the
        pixel values inside radius disk_r from the peak
        position.

        Parameters
        ----------
        peak_array : Numpy or Dask array
            Must have the same navigation shape as this signal.
        disk_r : int
            Radius of the disc chosen to take the mean value of
        lazy_result : bool, default True
            If True, will return a LazyDiffraction2D object. If False,
            will compute the result and return a Diffraction2D object.
        show_progressbar : bool, default True

        Returns
        -------
        intensity_array: Numpy or Dask array
            Same navigation shape as this signal, with peak position in
            x and y coordinates and the mean intensity.

        Examples
        --------
        >>> s = ps.dummy_data.get_cbed_signal()
        >>> peak_array = s.find_peaks_lazy()
        >>> intensity_array = s.intensity_peaks(peak_array, disk_r=6)
        >>> intensity_array_computed = intensity_array.compute()

        """
        if self._lazy:
            dask_array = self.data
        else:
            sig_chunks = list(self.axes_manager.signal_shape)[::-1]
            chunks = [8] * len(self.axes_manager.navigation_shape)
            chunks.extend(sig_chunks)
            dask_array = da.from_array(self.data, chunks=chunks)

        chunks_peak = dask_array.chunks[:-2]
        if hasattr(peak_array, "chunks"):
            peak_array_dask = da.rechunk(peak_array, chunks=chunks_peak)
        else:
            peak_array_dask = da.from_array(peak_array, chunks=chunks_peak)

        output_array = dt._intensity_peaks_image(dask_array, peak_array_dask, disk_r)

        if not lazy_result:
            if show_progressbar:
                pbar = ProgressBar()
                pbar.register()
            output_array = output_array.compute()
            if show_progressbar:
                pbar.unregister()
        return output_array 
Example #12
Source File: cross_registration.py    From minian with GNU General Public License v3.0 4 votes vote down vote up
def estimate_shifts(minian_df, by='session', to='first', temp_var='org', template=None, rm_background=False):
    if template is not None:
        minian_df['template'] = template

    def get_temp(row):
        ds, temp = row['minian'], row['template']
        try:
            return ds.isel(frame=temp).drop('frame')
        except TypeError:
            func_dict = {
                'mean': lambda v: v.mean('frame'),
                'max': lambda v: v.max('frame')}
            try:
                return func_dict[temp](ds)
            except KeyError:
                raise NotImplementedError(
                    "template {} not understood".format(temp))

    minian_df['template'] = minian_df.apply(get_temp, axis='columns')
    grp_dims = list(minian_df.index.names)
    grp_dims.remove(by)
    temp_dict, shift_dict, corr_dict, tempsh_dict = [dict() for _ in range(4)]
    for idxs, df in minian_df.groupby(level=grp_dims):
        try:
            temp_ls = [t[temp_var] for t in df['template']]
        except KeyError:
            raise KeyError(
                "variable {} not found in dataset".format(temp_var))
        temps = (xr.concat(temp_ls, dim=by).expand_dims(grp_dims)
                 .reset_coords(drop=True))
        res = estimate_shift_fft(temps, dim=by, on=to)
        shifts = res.sel(variable=['height', 'width'])
        corrs = res.sel(variable='corr')
        temps_sh = apply_shifts(temps, shifts)
        temp_dict[idxs] = temps
        shift_dict[idxs] = shifts
        corr_dict[idxs] = corrs
        tempsh_dict[idxs] = temps_sh
    temps = xrconcat_recursive(temp_dict, grp_dims).rename('temps')
    shifts = xrconcat_recursive(shift_dict, grp_dims).rename('shifts')
    corrs = xrconcat_recursive(corr_dict, grp_dims).rename('corrs')
    temps_sh = xrconcat_recursive(tempsh_dict, grp_dims).rename('temps_shifted')
    with ProgressBar():
        temps = temps.compute()
        shifts = shifts.compute()
        corrs = corrs.compute()
        temps_sh = temps_sh.compute()
    return xr.merge([temps, shifts, corrs, temps_sh]) 
Example #13
Source File: metsim.py    From MetSim with GNU General Public License v3.0 4 votes vote down vote up
def __init__(self, params: dict, domain_slice=NO_SLICE):
        """
        Constructor
        """
        self._domain = None
        self._met_data = None
        self._state = None
        self._client = None
        self._domain_slice = domain_slice
        self.progress_bar = ProgressBar()
        self.params.update(params)
        logging.captureWarnings(True)
        self.logger = logging.getLogger(__name__)
        self.logger.setLevel(self.params['verbose'])

        formatter = logging.Formatter(' - '.join(
            ['%asctime)s', '%(name)s', '%(levelname)s', '%(message)s']))
        ch = logging.StreamHandler(sys.stdout)
        ch.setFormatter(formatter)
        ch.setLevel(self.params['verbose'])
        # set global dask scheduler
        if domain_slice is NO_SLICE:
            if self.params['scheduler'] in DASK_CORE_SCHEDULERS:
                dask.config.set(scheduler=self.params['scheduler'])
            else:
                from distributed import Client, progress
                if 'distributed' == self.params['scheduler']:
                    self._client = Client(
                        n_workers=self.params['num_workers'],
                        threads_per_worker=1)
                    if self.params['verbose'] == logging.DEBUG:
                        self.progress_bar = progress
                elif os.path.isfile(self.params['scheduler']):
                    self._client = Client(
                        scheduler_file=self.params['scheduler'])
                else:
                    self._client = Client(self.params['scheduler'])
        else:
            dask.config.set(scheduler=self.params['scheduler'])

        # Set up logging
        # If in verbose mode set up the progress bar
        if self.params['verbose'] == logging.DEBUG:
            if 'distributed' != self.params['scheduler']:
                self.progress_bar.register()
                self.progress_bar = lambda x: x
        else:
            # If not in verbose mode, create a dummy function
            self.progress_bar = lambda x: x
        # Create time vector(s)
        self._times = self._get_output_times(
            freq=self.params['out_freq'],
            period_ending=self.params['period_ending'])