Python numpy.lib.recfunctions.append_fields() Examples

The following are 20 code examples of numpy.lib.recfunctions.append_fields(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module numpy.lib.recfunctions , or try the search function .
Example #1
Source File: LMAarrayFile.py    From lmatools with BSD 2-Clause "Simplified" License 6 votes vote down vote up
def __getattr__(self, attrname):

        # See __init__ for column names
        try:
            return self.data[attrname]
        except:
            pass

        # If we got here, stations column wasn't in file.
        #   Try getting it from station mask.
        if attrname=='stations':
            stations = self.hexMaskToStationCount()
            # placing self.data in a list due to this bug
            # http://stackoverflow.com/questions/36440557/typeerror-when-appending-fields-to-a-structured-array-of-size-one
            self.data = append_fields([self.data], ('stations',), (stations,))
            return stations

        return None
        #raise AttributeError, attrname 
Example #2
Source File: utils.py    From diogenes with MIT License 6 votes vote down vote up
def append_cols(M, cols, col_names):
    """Append columns to an existing structured array

    Parameters
    ----------
    M : numpy.ndarray
        structured array
    cols : list of numpy.ndarray
    col_names : list of str
        names for new columns

    Returns
    -------
    numpy.ndarray
        structured array with new columns
    """
    if is_nd(cols):
        cols = [cols]
    M = check_sa(M)
    cols = [check_col(
        col, 
        n_rows=M.shape[0], 
        argument_name='cols[{}]'.format(idx)) for idx, col in enumerate(cols)]
    col_names = check_col_names(col_names, n_cols=len(cols))
    return nprf.append_fields(M, col_names, data=cols, usemask=False) 
Example #3
Source File: readwrite.py    From root_pandas with MIT License 6 votes vote down vote up
def do_flatten(arr, flatten):
    if flatten is True:
        warnings.warn(" The option flatten=True is deprecated. Please specify the branches you would like "
                      "to flatten in a list: flatten=['foo', 'bar']", FutureWarning)
        arr_, idx = stretch(arr, return_indices=True)
    else:
        nonscalar = get_nonscalar_columns(arr)
        fields = [x for x in arr.dtype.names if (x not in nonscalar or x in flatten)]

        for col in flatten:
            if col in nonscalar:
                pass
            elif col in fields:
                raise ValueError("Requested to flatten {col} but it has a scalar type"
                                 .format(col=col))
            else:
                raise ValueError("Requested to flatten {col} but it wasn't loaded from the input file"
                                 .format(col=col))

        arr_, idx = stretch(arr, fields=fields, return_indices=True)
    arr = append_fields(arr_, '__array_index', idx, usemask=False, asrecarray=True)
    return arr 
Example #4
Source File: recarray_tools.py    From pax with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def append_fields(base, names, data, dtypes=None, fill_value=-1,
                  usemask=False,   # Different from recfunctions default
                  asrecarray=False):
    """Append fields to numpy structured array
    If fields already exists in data, will overwrite
    """
    if isinstance(names, (tuple, list)):
        # Add multiple fields at once
        if dtypes is None:
            dtypes = [d.dtype for d in data]
        # Convert to numpy arrays so we can use boolean index arrays
        names = np.array(names)
        data = np.array(data)
        dtypes = np.array(dtypes)
        not_yet_in_data = True ^ np.in1d(names, base.dtype.names)
        # Append the fields that were not in the data
        base = recfunctions.append_fields(base,
                                          names[not_yet_in_data].tolist(),
                                          data[not_yet_in_data].tolist(),
                                          dtypes[not_yet_in_data].tolist(),
                                          fill_value, usemask, asrecarray)
        # Overwrite the fields that are already in the data
        for i in np.where(True ^ not_yet_in_data)[0]:
            base[names[i]] = data[i]
        return base
    else:
        # Add single field
        if names in base.dtype.names:
            # Field already exists: overwrite data
            base[names] = data
            return base
        else:
            return recfunctions.append_fields(base, names, data, dtypes,
                                              fill_value, usemask, asrecarray) 
Example #5
Source File: NLDN.py    From lmatools with BSD 2-Clause "Simplified" License 5 votes vote down vote up
def __init__(self, filename, date_sep='-', time_sep=':', format='stroke_DC3'):
        """ Load NLDN data from a file, into a numpy named array stored in the
            *data* attribute. *data*['time'] is relative to the *basedate* datetime
            attribute
            """
        self.format=format
        
        dtype_specs = getattr(self, format)
        
        
        nldn_initial = np.genfromtxt(filename, dtype=dtype_specs['columns'])
        date_part = np.genfromtxt(nldn_initial['date'],
                        delimiter=date_sep, dtype=dtype_specs['date_dtype'])
        time_part = np.genfromtxt(nldn_initial['time'],
                        delimiter=time_sep, dtype=dtype_specs['time_dtype'])
        dates = [datetime(a['year'], a['month'], a['day'], b['hour'], b['minute']) 
                    for a, b in zip(date_part, time_part)]
        min_date = min(dates)
        min_date = datetime(min_date.year, min_date.month, min_date.day)
        t = np.fromiter( ((d-min_date).total_seconds() for d in dates), dtype='float64')
        t += time_part['second']
        
        self.basedate = min_date
        data = drop_fields(nldn_initial, ('date', 'time'))
        data = append_fields(data, 'time', t)
        
        self.data = data 
Example #6
Source File: data.py    From Splunking-Crime with GNU Affero General Public License v3.0 5 votes vote down vote up
def _get_data():
    filepath = dirname(abspath(__file__))
    with open(filepath + '/anes96.csv', "rb") as f:
        data = recfromtxt(f, delimiter="\t", names=True, dtype=float)
        logpopul = log(data['popul'] + .1)
        data = nprf.append_fields(data, 'logpopul', logpopul, usemask=False,
                                  asrecarray=True)
    return data 
Example #7
Source File: compress.py    From picopore with GNU General Public License v3.0 5 votes vote down vote up
def deepLosslessCompress(f, group):
    paths = findDatasets(f, group, "Events")
    paths = [path for path in paths if "Basecall" in path]
    # index event detection
    if "UniqueGlobalKey/channel_id" in f:
        sampleRate = f["UniqueGlobalKey/channel_id"].attrs["sampling_rate"]
        for path in paths:
            if f[path].parent.parent.attrs.__contains__("event_detection"):
                # index back to event detection
                dataset = f[path].value
                start = np.array([int(round(sampleRate * i)) for i in dataset["start"]])
                dataset = indexToZero(f, path, "start", dataColumn=start)
                move = dataset["move"] # rewrite move dataset because it's int64 for max 2
                # otherwise, event by event
                dataset = drop_fields(dataset, ["mean", "stdv", "length", "move"])
                dataset = append_fields(dataset, ["move"], [move], [getDtype(move)])
                rewriteDataset(f, path, compression="gzip", compression_opts=9, dataset=dataset)
                # rewrite eventdetection too - start is also way too big here
                eventDetectionPath = findDatasets(f, "all", entry_point=f[path].parent.parent.attrs.get("event_detection"))[0]
                if "picopore.start_index" not in f[eventDetectionPath].attrs.keys():
                    eventData = indexToZero(f, eventDetectionPath, "start")
                    rewriteDataset(f, eventDetectionPath, compression="gzip", compression_opts=9, dataset=eventData)

    if __basegroup_name__ not in f:
        f.create_group(__basegroup_name__)
        for name, group in f.items():
            if name != __basegroup_name__:
                recursiveCollapseGroups(f, __basegroup_name__, name, group)
    return losslessCompress(f, group) 
Example #8
Source File: compress.py    From picopore with GNU General Public License v3.0 5 votes vote down vote up
def indexToZero(f, path, col, name="picopore.{}_index", dataColumn=None):
    dataset = f[path]
    name = name.format(col)
    data = f[path].value
    if not name in dataset.attrs.keys():
        dataColumn = data[col] if dataColumn is None else dataColumn
        start_index = min(dataColumn)
        dataset.attrs.create(name, start_index, dtype=getDtype(start_index))
        dataColumn = dataColumn-start_index
        data = drop_fields(data, [col])
        data = append_fields(data, [col], [dataColumn], [getDtype(dataColumn)])
    return data 
Example #9
Source File: micromed.py    From wonambi with GNU General Public License v3.0 5 votes vote down vote up
def return_videos(self, begtime, endtime):
        vid = self._videos
        begtime *= 1000
        endtime *= 1000

        # remove empty rows
        DTYPE_MAX = iinfo(vid.dtype['duration']).max
        vid = vid[vid['duration'] != DTYPE_MAX]

        if vid.shape[0] == 0:
            raise OSError('No videos for this dataset')

        vid = append_fields(vid, 'absolute_end', vid['delay'] + vid['duration'], usemask=False)

        # full name without number
        i_vid = where((endtime - vid['delay'] >= 0) & (vid['absolute_end'] - begtime >= 0))[0]
        mpgfiles = ['VID_' + str(vid['file_ext'][i]) + '.AVI' for i in i_vid]
        mpgfiles = [str(self.filename.parent / mpg) for mpg in mpgfiles]

        video_beg = (begtime - vid['delay'][i_vid[0]]) / 1000
        video_end = (endtime - vid['delay'][i_vid[-1]]) / 1000

        lg.debug('First Video (#{}) starts at {}'.format(mpgfiles[0], video_beg))
        lg.debug('Last Video (#{}) ends at {}'.format(mpgfiles[-1], video_end))

        return mpgfiles, video_beg, video_end 
Example #10
Source File: lib.py    From picasso with MIT License 5 votes vote down vote up
def append_to_rec(rec_array, data, name):
    if hasattr(rec_array, name):
        rec_array = remove_from_rec(rec_array, name)
    return _append_fields(
        rec_array,
        name,
        data,
        dtypes=data.dtype,
        usemask=False,
        asrecarray=True,
    )
    return rec_array 
Example #11
Source File: numpydataset.py    From scikit-hep with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def __setattr__(self, name, value):
        listofattributes = self.__dict__.keys()

        if isinstance(value, numpy.ndarray) and name != "_data" and name not in listofattributes :
            if value.shape != self.data .shape:
                raise ValueError('Arrays should have the same dimensions')
            else:
                from numpy.lib import recfunctions
                detail = getattr(value, 'provenance', None)
                data = recfunctions.append_fields(self.data , name, value, usemask=False)
                self._data  = data
                self._provenance += Transformation("Array {0} has been created".format(name), detail)
                self.__add_var(name)
        else:
            dict.__setattr__(self, name, value) 
Example #12
Source File: utils.py    From snorkel with Apache License 2.0 5 votes vote down vote up
def add_slice_labels(
    dataloader: DictDataLoader, base_task: Task, S: np.recarray
) -> None:
    """Modify a dataloader in-place, adding labels for slice tasks.

    Parameters
    ----------
    dataloader
        A DictDataLoader whose dataset.Y_dict attribute will be modified in place
    base_task
       The Task for which we want corresponding slice tasks/labels
    S
        A recarray (output of SFApplier) containing data fields with slice
        indicator information
    """
    # Add the base task if it's missing
    if "base" not in S.dtype.names:
        # Create a new np.recarray with an additional "base" data field
        S = rfn.append_fields(
            [S], names=[("base")], data=[np.ones(S.shape)], asrecarray=True
        )

    slice_names = S.dtype.names

    Y_dict: Dict[str, np.ndarray] = dataloader.dataset.Y_dict  # type: ignore
    labels = Y_dict[base_task.name]

    for slice_name in slice_names:
        # Gather ind labels
        ind_labels = torch.LongTensor(S[slice_name])  # type: ignore

        # Mask out "inactive" pred_labels as specified by ind_labels
        pred_labels = labels.clone()
        pred_labels[~ind_labels.bool()] = -1

        ind_task_name = f"{base_task.name}_slice:{slice_name}_ind"
        pred_task_name = f"{base_task.name}_slice:{slice_name}_pred"

        # Update dataloaders
        Y_dict[ind_task_name] = ind_labels
        Y_dict[pred_task_name] = pred_labels 
Example #13
Source File: test_regression.py    From Computable with MIT License 5 votes vote down vote up
def test_append_fields_dtype_list(self):
        """Ticket #1676"""
        from numpy.lib.recfunctions import append_fields
        F = False
        base = np.array([1, 2, 3], dtype=np.int32)
        data = np.eye(3).astype(np.int32)
        names = ['a', 'b', 'c']
        dlist = [np.float64, np.int32, np.int32]
        try:
            a = append_fields(base, names, data, dlist)
        except:
            raise AssertionError() 
Example #14
Source File: data.py    From vnpy_crypto with MIT License 5 votes vote down vote up
def _get_data():
    filepath = dirname(abspath(__file__))
    with open(filepath + '/anes96.csv', "rb") as f:
        data = recfromtxt(f, delimiter="\t", names=True, dtype=float)
        logpopul = log(data['popul'] + .1)
        data = nprf.append_fields(data, 'logpopul', logpopul, usemask=False,
                                  asrecarray=True)
    return data 
Example #15
Source File: tool_findnrstdist.py    From HiSpatialCluster with Apache License 2.0 5 votes vote down vote up
def execute(self, parameters, messages):
        input_feature=parameters[0].valueAsText 
        id_field=parameters[1].valueAsText
        dens_field=parameters[2].valueAsText
        output_feature=parameters[3].valueAsText
        calc_device=parameters[4].valueAsText
        
        if '64 bit' not in sys.version and calc_device=='GPU':
            arcpy.AddError('Platform is 32bit and has no support for GPU/CUDA.')
            return

        arcpy.SetProgressorLabel('Calculating Point with Higher Density ...')
        
        arrays=arcpy.da.FeatureClassToNumPyArray(input_feature,[id_field,'SHAPE@X','SHAPE@Y',dens_field])
        
        results=0
        if calc_device=='GPU':
            from section_gpu import calc_nrst_dist_gpu
            results=calc_nrst_dist_gpu(arrays[id_field],arrays['SHAPE@X'],arrays['SHAPE@Y'],arrays[dens_field])
        else:
            from section_cpu import calc_nrst_dist_cpu
            results=calc_nrst_dist_cpu(arrays[id_field],arrays['SHAPE@X'],arrays['SHAPE@Y'],arrays[dens_field],parameters[5].value)
        
        struct_arrays=recfunctions.append_fields(recfunctions.append_fields(recfunctions.append_fields(arrays,'NRSTDIST',data=results[0])\
                                                                            ,'PARENTID',data=results[1])\
                                                 ,'MULTIPLY',data=results[0]*arrays[dens_field],usemask=False)            
#        if '64 bit' in sys.version and id_field==arcpy.Describe(input_feature).OIDFieldName:
#            sadnl=list(struct_arrays.dtype.names)
#            sadnl[sadnl.index(id_field)]='OID@'
#            struct_arrays.dtype.names=tuple(sadnl)
            
        arcpy.da.NumPyArrayToFeatureClass(struct_arrays,output_feature,\
                                          ('SHAPE@X','SHAPE@Y'),arcpy.Describe(input_feature).spatialReference)   
            
        return 
Example #16
Source File: tool_calculatedensity.py    From HiSpatialCluster with Apache License 2.0 4 votes vote down vote up
def execute(self, parameters, messages):
        
        #get params
        input_feature=parameters[0].valueAsText 
        id_field=parameters[1].valueAsText
        weight_field=parameters[2].valueAsText
        output_feature=parameters[3].valueAsText
        kernel_type=parameters[4].valueAsText
        calc_device=parameters[7].valueAsText
        
        if '64 bit' not in sys.version and calc_device=='GPU':
            arcpy.AddError('Platform is 32bit and has no support for GPU/CUDA.')
            return

        arcpy.SetProgressorLabel('Calculating Density...')
        
        #calculation          
        arrays=arcpy.da.FeatureClassToNumPyArray(input_feature,[id_field,'SHAPE@X','SHAPE@Y',weight_field])
        densities=0
        if calc_device=='GPU':            
            from section_gpu import calc_density_gpu
            densities=calc_density_gpu(arrays['SHAPE@X'],arrays['SHAPE@Y'],\
                                   arrays[weight_field],kernel_type,\
                                   cutoffd=parameters[5].value,sigma=parameters[6].value)
        else:
            from section_cpu import calc_density_cpu
            densities=calc_density_cpu(arrays['SHAPE@X'],arrays['SHAPE@Y'],\
                                   arrays[weight_field],kernel_type,\
                                   parameters[8].value,cutoffd=parameters[5].value,sigma=parameters[6].value)
        
        result_struct=recfunctions.append_fields(recfunctions.drop_fields(arrays,weight_field),\
                                                 'DENSITY',data=densities,usemask=False)
        
#        if '64 bit' in sys.version and id_field==arcpy.Describe(input_feature).OIDFieldName:
#            sadnl=list(result_struct.dtype.names)
#            sadnl[sadnl.index(id_field)]='OID@'
#            result_struct.dtype.names=tuple(sadnl)
        
        arcpy.da.NumPyArrayToFeatureClass(result_struct,output_feature,\
                                          ('SHAPE@X','SHAPE@Y'),arcpy.Describe(input_feature).spatialReference)  
        
        return 
Example #17
Source File: compress.py    From picopore with GNU General Public License v3.0 4 votes vote down vote up
def deepLosslessDecompress(f, group):
    # rebuild group hierarchy
    if __basegroup_name__ in f.keys():
        uncollapseGroups(f, f[__basegroup_name__])
    paths = findDatasets(f, group)
    paths = [path for path in paths if "Basecall" in path]
    sampleRate = f["UniqueGlobalKey/channel_id"].attrs["sampling_rate"]
    for path in paths:
        if f[path].parent.parent.attrs.__contains__("event_detection"):
            # index back to event detection
            dataset = f[path].value
            if "mean" not in dataset.dtype.names:
                eventDetectionPath = findDatasets(f, "all", entry_point=f[path].parent.parent.attrs.get("event_detection"))[0]
                eventData = f[eventDetectionPath].value
                try:
                    start = eventData["start"] + f[eventDetectionPath].attrs["picopore.start_index"]
                    del f[eventDetectionPath].attrs["picopore.start_index"]
                    eventData = drop_fields(eventData, ["start"])
                    eventData = append_fields(eventData, ["start"], [start], [getDtype(start)])
                    rewriteDataset(f, eventDetectionPath, compression="gzip", compression_opts=1, dataset=eventData)
                except KeyError:
                    # must have been compressed without start indexing
                    pass
                try:
                    start_index = f[path].attrs["picopore.start_index"]
                    del f[path].attrs["picopore.start_index"]
                except KeyError:
                    # must have been compressed without start indexing
                    start_index=0
                start = dataset["start"][0] + start_index
                end = dataset["start"][-1] + start_index
                # constrain to range in basecall
                eventData = eventData[np.logical_and(eventData["start"] >= start, eventData["start"] <= end)]
                # remove missing events
                i=0
                keepIndex = []
                for time in dataset["start"]:
                    while eventData["start"][i] != time + start_index and i < eventData.shape[0]:
                        i += 1
                    keepIndex.append(i)
                eventData = eventData[keepIndex]
                dataset = drop_fields(dataset, "start")
                start = [i/sampleRate for i in eventData["start"]]
                length = [i/sampleRate for i in eventData["length"]]
                dataset = append_fields(dataset, ["mean", "start", "stdv", "length"], [eventData["mean"], start, eventData["stdv"], length])
                rewriteDataset(f, path, dataset=dataset)
    return losslessDecompress(f, group) 
Example #18
Source File: simpletable.py    From pyphot with MIT License 4 votes vote down vote up
def add_column(self, name, data, dtype=None, unit=None, description=None):
        """
        Add one or multiple columns to the table

        Parameters
        ----------
        name: str or sequence(str)
           The name(s) of the column(s) to add

        data: ndarray, or sequence of ndarray
            The column data, or sequence of columns

        dtype: dtype
            numpy dtype for the data to add

        unit: str
            The unit of the values in the column

        description: str
            A description of the content of the column
        """

        _data = np.array(data, dtype=dtype)
        dtype = _data.dtype

        # unknown type is converted to text
        if dtype.type == np.object_:
            if len(data) == 0:
                longest = 0
            else:
                longest = len(max(data, key=len))
                _data = np.asarray(data, dtype='|%iS' % longest)

        dtype = _data.dtype

        if len(self.data.dtype) > 0:
            # existing data in the table
            if type(name) in basestring:
                # _name = name.encode('utf8')
                _name = str(name)
            else:
                # _name = [k.encode('utf8') for k in name]
                _name = [str(k) for k in name]

            self.data = recfunctions.append_fields(self.data, _name, _data,
                                                   dtypes=dtype, usemask=False,
                                                   asrecarray=True)

        else:
            if _data.ndim > 1:
                newdtype = (str(name), _data.dtype, (_data.shape[1],))
            else:
                newdtype = (str(name), _data.dtype)
            self.data = np.array(_data, dtype=[newdtype])

        if unit is not None:
            self.set_unit(name, unit)

        if description is not None:
            self.set_comment(name, description) 
Example #19
Source File: simpletable.py    From pyphot with MIT License 4 votes vote down vote up
def add_column(self, name, data, dtype=None, unit=None, description=None):
        """
        Add one or multiple columns to the table

        Parameters
        ----------
        name: str or sequence(str)
           The name(s) of the column(s) to add

        data: ndarray, or sequence of ndarray
            The column data, or sequence of columns

        dtype: dtype
            numpy dtype for the data to add

        unit: str
            The unit of the values in the column

        description: str
            A description of the content of the column
        """

        _data = np.array(data, dtype=dtype)
        dtype = _data.dtype

        # unknown type is converted to text
        if dtype.type == np.object_:
            if len(data) == 0:
                longest = 0
            else:
                longest = len(max(data, key=len))
                _data = np.asarray(data, dtype='|%iS' % longest)

        dtype = _data.dtype

        if len(self.data.dtype) > 0:
            # existing data in the table
            if type(name) in basestring:
                # _name = name.encode('utf8')
                _name = str(name)
            else:
                # _name = [k.encode('utf8') for k in name]
                _name = [str(k) for k in name]

            self.data = recfunctions.append_fields(self.data, _name, _data,
                                                   dtypes=dtype, usemask=False,
                                                   asrecarray=True)

        else:
            if _data.ndim > 1:
                newdtype = (str(name), _data.dtype, (_data.shape[1],))
            else:
                newdtype = (str(name), _data.dtype)
            self.data = np.array(_data, dtype=[newdtype])

        if unit is not None:
            self.set_unit(name, unit)

        if description is not None:
            self.set_comment(name, description) 
Example #20
Source File: simpletable.py    From TheCannon with MIT License 4 votes vote down vote up
def add_column(self, name, data, dtype=None, unit=None, description=None):
        """
        Add one or multiple columns to the table

        Parameters
        ----------
        name: str or sequence(str)
           The name(s) of the column(s) to add

        data: ndarray, or sequence of ndarray
            The column data, or sequence of columns

        dtype: dtype
            numpy dtype for the data to add

        unit: str
            The unit of the values in the column

        description: str
            A description of the content of the column
        """

        _data = np.array(data, dtype=dtype)
        dtype = _data.dtype

        # unknown type is converted to text
        if dtype.type == np.object_:
            if len(data) == 0:
                longest = 0
            else:
                longest = len(max(data, key=len))
                _data = np.asarray(data, dtype='|%iS' % longest)

        dtype = _data.dtype

        if len(self.data.dtype) > 0:
            # existing data in the table
            self.data = recfunctions.append_fields(self.data, name, _data,
                                                   dtypes=dtype, usemask=False,
                                                   asrecarray=True)
        else:
            if _data.ndim > 1:
                newdtype = (str(name), _data.dtype, (_data.shape[1],))
            else:
                newdtype = (str(name), _data.dtype)
            self.data = np.array(_data, dtype=[newdtype])

        if unit is not None:
            self.set_unit(name, unit)

        if description is not None:
            self.set_unit(name, description)