Python numpy.lib.recfunctions.drop_fields() Examples

The following are 13 code examples of numpy.lib.recfunctions.drop_fields(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module numpy.lib.recfunctions , or try the search function .
Example #1
Source File: utils.py    From diogenes with MIT License 6 votes vote down vote up
def remove_cols(M, col_names):
    """Remove columns specified by col_names from structured array

    Parameters
    ----------
    M : numpy.ndarray
        structured array
    col_names : list of str
        names for columns to remove

    Returns
    -------
    numpy.ndarray
        structured array without columns
    """
    M, col_names = check_consistent(M, col_names=col_names)
    return nprf.drop_fields(M, col_names, usemask=False) 
Example #2
Source File: simpletable.py    From TheCannon with MIT License 5 votes vote down vote up
def pop_columns(self, names):
        """
        Pop several columns from the table

        Parameters
        ----------

        names: sequence
            A list containing the names of the columns to remove

        Returns
        -------

        values: tuple
            list of columns
        """

        if not hasattr(names, '__iter__') or type(names) in basestring:
            names = [names]

        p = [self[k] for k in names]

        _names = set([ self.resolve_alias(k) for k in names ])
        self.data = recfunctions.drop_fields(self.data, _names)
        for k in names:
            self._aliases.pop(k, None)
            self._units.pop(k, None)
            self._desc.pop(k, None)

        return p 
Example #3
Source File: lib.py    From picasso with MIT License 5 votes vote down vote up
def remove_from_rec(rec_array, name):
    return _drop_fields(rec_array, name, usemask=False, asrecarray=True) 
Example #4
Source File: recarray_tools.py    From pax with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def drop_fields(arr, *args, **kwargs):
    """Drop fields from numpy structured array
    Gives error if fields don't exist
    """
    return recfunctions.drop_fields(arr, usemask=False, *args, **kwargs) 
Example #5
Source File: recarray_tools.py    From pax with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def drop_fields_if_exist(arr, fields):
    return drop_fields(arr, [f for f in fields if f in arr.dtype.names]) 
Example #6
Source File: recarray_tools.py    From pax with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def fields_view(arr, fields):
    """View one or several columns from a numpy record array"""
    # Single field is easy:
    if isinstance(fields, str):
        return arr[fields]
    for f in fields:
        if f not in arr.dtype.names:
            raise ValueError("Field %s is not in the array..." % f)
    # Don't know how to do it for multiple fields, make a copy for now... (probably?)
    return drop_fields(arr, [f for f in arr.dtype.names if f not in fields])
    # The solution in
    # http://stackoverflow.com/questions/15182381/how-to-return-a-view-of-several-columns-in-numpy-structured-array
    # doesn't work in combination with filter_on_fields...
    # dtype2 = np.dtype({name:arr.dtype.fields[name] for name in columns})
    # return np.ndarray(arr.shape, dtype2, arr, 0, arr.strides) 
Example #7
Source File: compress.py    From picopore with GNU General Public License v3.0 5 votes vote down vote up
def indexToZero(f, path, col, name="picopore.{}_index", dataColumn=None):
    dataset = f[path]
    name = name.format(col)
    data = f[path].value
    if not name in dataset.attrs.keys():
        dataColumn = data[col] if dataColumn is None else dataColumn
        start_index = min(dataColumn)
        dataset.attrs.create(name, start_index, dtype=getDtype(start_index))
        dataColumn = dataColumn-start_index
        data = drop_fields(data, [col])
        data = append_fields(data, [col], [dataColumn], [getDtype(dataColumn)])
    return data 
Example #8
Source File: compress.py    From picopore with GNU General Public License v3.0 5 votes vote down vote up
def deepLosslessCompress(f, group):
    paths = findDatasets(f, group, "Events")
    paths = [path for path in paths if "Basecall" in path]
    # index event detection
    if "UniqueGlobalKey/channel_id" in f:
        sampleRate = f["UniqueGlobalKey/channel_id"].attrs["sampling_rate"]
        for path in paths:
            if f[path].parent.parent.attrs.__contains__("event_detection"):
                # index back to event detection
                dataset = f[path].value
                start = np.array([int(round(sampleRate * i)) for i in dataset["start"]])
                dataset = indexToZero(f, path, "start", dataColumn=start)
                move = dataset["move"] # rewrite move dataset because it's int64 for max 2
                # otherwise, event by event
                dataset = drop_fields(dataset, ["mean", "stdv", "length", "move"])
                dataset = append_fields(dataset, ["move"], [move], [getDtype(move)])
                rewriteDataset(f, path, compression="gzip", compression_opts=9, dataset=dataset)
                # rewrite eventdetection too - start is also way too big here
                eventDetectionPath = findDatasets(f, "all", entry_point=f[path].parent.parent.attrs.get("event_detection"))[0]
                if "picopore.start_index" not in f[eventDetectionPath].attrs.keys():
                    eventData = indexToZero(f, eventDetectionPath, "start")
                    rewriteDataset(f, eventDetectionPath, compression="gzip", compression_opts=9, dataset=eventData)

    if __basegroup_name__ not in f:
        f.create_group(__basegroup_name__)
        for name, group in f.items():
            if name != __basegroup_name__:
                recursiveCollapseGroups(f, __basegroup_name__, name, group)
    return losslessCompress(f, group) 
Example #9
Source File: simpletable.py    From pyphot with MIT License 5 votes vote down vote up
def pop_columns(self, names):
        """
        Pop several columns from the table

        Parameters
        ----------

        names: sequence
            A list containing the names of the columns to remove

        Returns
        -------

        values: tuple
            list of columns
        """

        if not hasattr(names, '__iter__') or type(names) in basestring:
            names = [names]

        p = [self[k] for k in names]

        _names = set([ self.resolve_alias(k) for k in names ])
        self.data = recfunctions.drop_fields(self.data, _names)
        for k in names:
            self._aliases.pop(k, None)
            self._units.pop(k, None)
            self._desc.pop(k, None)

        return p 
Example #10
Source File: simpletable.py    From pyphot with MIT License 5 votes vote down vote up
def pop_columns(self, names):
        """
        Pop several columns from the table

        Parameters
        ----------

        names: sequence
            A list containing the names of the columns to remove

        Returns
        -------

        values: tuple
            list of columns
        """

        if not hasattr(names, '__iter__') or type(names) in basestring:
            names = [names]

        p = [self[k] for k in names]

        _names = set([ self.resolve_alias(k) for k in names ])
        self.data = recfunctions.drop_fields(self.data, _names)
        for k in names:
            self._aliases.pop(k, None)
            self._units.pop(k, None)
            self._desc.pop(k, None)

        return p 
Example #11
Source File: NLDN.py    From lmatools with BSD 2-Clause "Simplified" License 5 votes vote down vote up
def __init__(self, filename, date_sep='-', time_sep=':', format='stroke_DC3'):
        """ Load NLDN data from a file, into a numpy named array stored in the
            *data* attribute. *data*['time'] is relative to the *basedate* datetime
            attribute
            """
        self.format=format
        
        dtype_specs = getattr(self, format)
        
        
        nldn_initial = np.genfromtxt(filename, dtype=dtype_specs['columns'])
        date_part = np.genfromtxt(nldn_initial['date'],
                        delimiter=date_sep, dtype=dtype_specs['date_dtype'])
        time_part = np.genfromtxt(nldn_initial['time'],
                        delimiter=time_sep, dtype=dtype_specs['time_dtype'])
        dates = [datetime(a['year'], a['month'], a['day'], b['hour'], b['minute']) 
                    for a, b in zip(date_part, time_part)]
        min_date = min(dates)
        min_date = datetime(min_date.year, min_date.month, min_date.day)
        t = np.fromiter( ((d-min_date).total_seconds() for d in dates), dtype='float64')
        t += time_part['second']
        
        self.basedate = min_date
        data = drop_fields(nldn_initial, ('date', 'time'))
        data = append_fields(data, 'time', t)
        
        self.data = data 
Example #12
Source File: classify.py    From yatsm with MIT License 4 votes vote down vote up
def classify_line(filename, classifier):
    """ Use `classifier` to classify data stored in `filename`

    Args:
        filename (str): filename of stored results
        classifier (sklearn classifier): pre-trained classifier

    """
    z = np.load(filename)
    rec = z['record']

    if rec.shape[0] == 0:
        logger.debug('No records in {f}. Continuing'.format(f=filename))
        return

    # Rescale intercept term
    coef = rec['coef'].copy()  # copy so we don't transform npz coef
    coef[:, 0, :] = (coef[:, 0, :] + coef[:, 1, :] *
                     ((rec['start'] + rec['end']) / 2.0)[:, np.newaxis])

    # Include RMSE for full X matrix
    newdim = (coef.shape[0], coef.shape[1] * coef.shape[2])
    X = np.hstack((coef.reshape(newdim), rec['rmse']))

    # Create output and classify
    classes = classifier.classes_
    classified = np.zeros(rec.shape[0], dtype=[
        ('class', 'u2'),
        ('class_proba', 'float32', classes.size)
    ])
    classified['class'] = classifier.predict(X)
    classified['class_proba'] = classifier.predict_proba(X)

    # Replace with new classification if exists, or add by merging
    if ('class' in rec.dtype.names and 'class_proba' in rec.dtype.names and
            rec['class_proba'].shape[1] == classes.size):
        rec['class'] = classified['class']
        rec['class_proba'] = classified['class_proba']
    else:
        # Drop incompatible classified results if needed
        # e.g., if the number of classes changed
        if 'class' in rec.dtype.names and 'class_proba' in rec.dtype.names:
            rec = nprfn.drop_fields(rec, ['class', 'class_proba'])
        rec = nprfn.merge_arrays((rec, classified), flatten=True)

    # Create dict for re-saving `npz` file (only way to append)
    out = {}
    for k, v in six.iteritems(z):
        out[k] = v
    out['classes'] = classes
    out['record'] = rec

    np.savez(filename, **out) 
Example #13
Source File: compress.py    From picopore with GNU General Public License v3.0 4 votes vote down vote up
def deepLosslessDecompress(f, group):
    # rebuild group hierarchy
    if __basegroup_name__ in f.keys():
        uncollapseGroups(f, f[__basegroup_name__])
    paths = findDatasets(f, group)
    paths = [path for path in paths if "Basecall" in path]
    sampleRate = f["UniqueGlobalKey/channel_id"].attrs["sampling_rate"]
    for path in paths:
        if f[path].parent.parent.attrs.__contains__("event_detection"):
            # index back to event detection
            dataset = f[path].value
            if "mean" not in dataset.dtype.names:
                eventDetectionPath = findDatasets(f, "all", entry_point=f[path].parent.parent.attrs.get("event_detection"))[0]
                eventData = f[eventDetectionPath].value
                try:
                    start = eventData["start"] + f[eventDetectionPath].attrs["picopore.start_index"]
                    del f[eventDetectionPath].attrs["picopore.start_index"]
                    eventData = drop_fields(eventData, ["start"])
                    eventData = append_fields(eventData, ["start"], [start], [getDtype(start)])
                    rewriteDataset(f, eventDetectionPath, compression="gzip", compression_opts=1, dataset=eventData)
                except KeyError:
                    # must have been compressed without start indexing
                    pass
                try:
                    start_index = f[path].attrs["picopore.start_index"]
                    del f[path].attrs["picopore.start_index"]
                except KeyError:
                    # must have been compressed without start indexing
                    start_index=0
                start = dataset["start"][0] + start_index
                end = dataset["start"][-1] + start_index
                # constrain to range in basecall
                eventData = eventData[np.logical_and(eventData["start"] >= start, eventData["start"] <= end)]
                # remove missing events
                i=0
                keepIndex = []
                for time in dataset["start"]:
                    while eventData["start"][i] != time + start_index and i < eventData.shape[0]:
                        i += 1
                    keepIndex.append(i)
                eventData = eventData[keepIndex]
                dataset = drop_fields(dataset, "start")
                start = [i/sampleRate for i in eventData["start"]]
                length = [i/sampleRate for i in eventData["length"]]
                dataset = append_fields(dataset, ["mean", "start", "stdv", "length"], [eventData["mean"], start, eventData["stdv"], length])
                rewriteDataset(f, path, dataset=dataset)
    return losslessDecompress(f, group)