Python itertools.imap() Examples

The following are 30 code examples of itertools.imap(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module itertools , or try the search function .
Example #1
Source File: sqlize_csv.py    From public-transit-tools with Apache License 2.0 6 votes vote down vote up
def check_date_fields(rows, col_names, tablename, fname):
    '''Ensure date fields are the in the correct YYYYMMDD format before adding them to the SQL table'''
    def check_date_cols(row):
        if tablename == "calendar":
            date_cols = ["start_date", "end_date"]
        elif tablename == "calendar_dates":
            date_cols = ["date"]
        date_column_idxs = [col_names.index(x) for x in date_cols]
        for idx in date_column_idxs:
            date = row[idx]
            try:
                datetime.datetime.strptime(date, '%Y%m%d')
            except ValueError:
                msg ='Column "' + col_names[idx] + '" in file ' + fname + ' has an invalid value: ' + date + '. \
Date fields must be in YYYYMMDD format. Please check the date field formatting in calendar.txt and calendar_dates.txt.'
                arcpy.AddError(msg)
                raise BBB_SharedFunctions.CustomError
        return row
    if ispy3:
        return map(check_date_cols, rows)
    else:
        return itertools.imap(check_date_cols, rows) 
Example #2
Source File: session.py    From tidb-docker-compose with Apache License 2.0 6 votes vote down vote up
def _createFromLocal(self, data, schema):
        """
        Create an RDD for DataFrame from a list or pandas.DataFrame, returns
        the RDD and schema.
        """
        # make sure data could consumed multiple times
        if not isinstance(data, list):
            data = list(data)

        if schema is None or isinstance(schema, (list, tuple)):
            struct = self._inferSchemaFromList(data, names=schema)
            converter = _create_converter(struct)
            data = map(converter, data)
            if isinstance(schema, (list, tuple)):
                for i, name in enumerate(schema):
                    struct.fields[i].name = name
                    struct.names[i] = name
            schema = struct

        elif not isinstance(schema, StructType):
            raise TypeError("schema should be StructType or list or None, but got: %s" % schema)

        # convert python objects to sql data
        data = [schema.toInternal(row) for row in data]
        return self._sc.parallelize(data), schema 
Example #3
Source File: session.py    From tidb-docker-compose with Apache License 2.0 6 votes vote down vote up
def _createFromRDD(self, rdd, schema, samplingRatio):
        """
        Create an RDD for DataFrame from an existing RDD, returns the RDD and schema.
        """
        if schema is None or isinstance(schema, (list, tuple)):
            struct = self._inferSchema(rdd, samplingRatio, names=schema)
            converter = _create_converter(struct)
            rdd = rdd.map(converter)
            if isinstance(schema, (list, tuple)):
                for i, name in enumerate(schema):
                    struct.fields[i].name = name
                    struct.names[i] = name
            schema = struct

        elif not isinstance(schema, StructType):
            raise TypeError("schema should be StructType or list or None, but got: %s" % schema)

        # convert python objects to sql data
        rdd = rdd.map(schema.toInternal)
        return rdd, schema 
Example #4
Source File: kfuzzy.py    From maltindex with GNU General Public License v2.0 6 votes vote down vote up
def simplified(self, bytes, aggresive = False):
        output_size = self.output_size
        ignore_range = self.ignore_range
        bsize = self.bsize
        total_size = len(bytes)
        size = (total_size/bsize) / output_size
        buf = []
        reduce_errors = self.reduce_errors
        # Adjust the output to the desired output size
        for c in xrange(0, output_size):
            tmp = bytes[c*size:(c*size+1)+bsize]
            ret = sum(imap(ord, tmp)) % 255
            if reduce_errors:
                if ret != 255 and ret != 0:
                    buf.append(chr(ret))
            else:
                buf.append(chr(ret))
        
        buf = "".join(buf)
        return base64.b64encode(buf).strip("=")[:output_size] 
Example #5
Source File: session.py    From tidb-docker-compose with Apache License 2.0 6 votes vote down vote up
def _createFromLocal(self, data, schema):
        """
        Create an RDD for DataFrame from a list or pandas.DataFrame, returns
        the RDD and schema.
        """
        # make sure data could consumed multiple times
        if not isinstance(data, list):
            data = list(data)

        if schema is None or isinstance(schema, (list, tuple)):
            struct = self._inferSchemaFromList(data, names=schema)
            converter = _create_converter(struct)
            data = map(converter, data)
            if isinstance(schema, (list, tuple)):
                for i, name in enumerate(schema):
                    struct.fields[i].name = name
                    struct.names[i] = name
            schema = struct

        elif not isinstance(schema, StructType):
            raise TypeError("schema should be StructType or list or None, but got: %s" % schema)

        # convert python objects to sql data
        data = [schema.toInternal(row) for row in data]
        return self._sc.parallelize(data), schema 
Example #6
Source File: session.py    From tidb-docker-compose with Apache License 2.0 6 votes vote down vote up
def _createFromRDD(self, rdd, schema, samplingRatio):
        """
        Create an RDD for DataFrame from an existing RDD, returns the RDD and schema.
        """
        if schema is None or isinstance(schema, (list, tuple)):
            struct = self._inferSchema(rdd, samplingRatio, names=schema)
            converter = _create_converter(struct)
            rdd = rdd.map(converter)
            if isinstance(schema, (list, tuple)):
                for i, name in enumerate(schema):
                    struct.fields[i].name = name
                    struct.names[i] = name
            schema = struct

        elif not isinstance(schema, StructType):
            raise TypeError("schema should be StructType or list or None, but got: %s" % schema)

        # convert python objects to sql data
        rdd = rdd.map(schema.toInternal)
        return rdd, schema 
Example #7
Source File: kfuzzy.py    From maltindex with GNU General Public License v2.0 6 votes vote down vote up
def _fast_hash(self, bytes, aggresive = False):
        i = -1
        ret = set()
        
        output_size = self.output_size
        size = len(bytes) *1.00 / output_size
        bsize = self.bsize
        radd = ret.add
        
        while i < output_size:
            i += 1
            buf = bytes[i*bsize:(i+1)*bsize]
            char = sum(imap(ord, buf)) % 255
            if self.reduce_errors:
                if char != 255 and char != 0:
                    radd(chr(char))
            else:
                radd(chr(char))
        
        ret = "".join(ret)
        return base64.b64encode(ret).strip("=")[:output_size] 
Example #8
Source File: utils.py    From nightmare with GNU General Public License v2.0 6 votes vote down vote up
def safestr(obj, encoding='utf-8'):
    r"""
    Converts any given object to utf-8 encoded string. 
    
        >>> safestr('hello')
        'hello'
        >>> safestr(u'\u1234')
        '\xe1\x88\xb4'
        >>> safestr(2)
        '2'
    """
    if isinstance(obj, unicode):
        return obj.encode(encoding)
    elif isinstance(obj, str):
        return obj
    elif hasattr(obj, 'next'): # iterator
        return itertools.imap(safestr, obj)
    else:
        return str(obj)

# for backward-compatibility 
Example #9
Source File: schema.py    From mopidy-local-sqlite with Apache License 2.0 6 votes vote down vote up
def list_distinct(c, field, query=[]):
    if field not in _SEARCH_FIELDS:
        raise LookupError('Invalid search field: %s' % field)
    sql = """
    SELECT DISTINCT %s AS field
      FROM search
     WHERE field IS NOT NULL
    """ % field
    terms = []
    params = []
    for key, value in query:
        if key == 'any':
            terms.append('? IN (%s)' % ','.join(_SEARCH_FIELDS))
        elif key in _SEARCH_FIELDS:
            terms.append('%s = ?' % key)
        else:
            raise LookupError('Invalid search field: %s' % key)
        params.append(value)
    if terms:
        sql += ' AND ' + ' AND '.join(terms)
    logger.debug('SQLite list query %r: %s', params, sql)
    return itertools.imap(operator.itemgetter(0), c.execute(sql, params)) 
Example #10
Source File: sqlize_csv.py    From public-transit-tools with Apache License 2.0 6 votes vote down vote up
def check_date_fields(rows, col_names, tablename, fname):
    '''Ensure date fields are the in the correct YYYYMMDD format before adding them to the SQL table'''
    def check_date_cols(row):
        if tablename == "calendar":
            date_cols = ["start_date", "end_date"]
        elif tablename == "calendar_dates":
            date_cols = ["date"]
        date_column_idxs = [col_names.index(x) for x in date_cols]
        for idx in date_column_idxs:
            date = row[idx]
            try:
                datetime.datetime.strptime(date, '%Y%m%d')
            except ValueError:
                msg = u'Column "' + col_names[idx] + u'" in file ' + fname + u' has an invalid value: ' + date + u'. \
Date fields must be in YYYYMMDD format. Please check the date field formatting in calendar.txt and calendar_dates.txt.'
                Errors_To_Return.append(msg)
                raise CustomError
        return row
    return itertools.imap(check_date_cols, rows) 
Example #11
Source File: kfuzzy.py    From nightmare with GNU General Public License v2.0 6 votes vote down vote up
def _fast_hash(self, bytes, aggresive = False):
        i = -1
        ret = set()
        
        output_size = self.output_size
        size = len(bytes) *1.00 / output_size
        bsize = self.bsize
        radd = ret.add
        
        while i < output_size:
            i += 1
            buf = bytes[i*bsize:(i+1)*bsize]
            char = sum(imap(ord, buf)) % 255
            if self.reduce_errors:
                if char != 255 and char != 0:
                    radd(chr(char))
            else:
                radd(chr(char))
        
        ret = "".join(ret)
        return base64.b64encode(ret).strip("=")[:output_size] 
Example #12
Source File: utils.py    From torngas with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def safestr(obj, encoding='utf-8'):
    r"""
    Converts any given object to utf-8 encoded string.

        >>> safestr('hello')
        'hello'
        >>> safestr(u'\u1234')
        '\xe1\x88\xb4'
        >>> safestr(2)
        '2'
    """
    if isinstance(obj, unicode):
        return obj.encode(encoding)
    elif isinstance(obj, str):
        return obj
    elif hasattr(obj, 'next'):  # iterator
        return itertools.imap(safestr, obj)
    else:
        return str(obj)

# for backward-compatibility 
Example #13
Source File: kfuzzy.py    From nightmare with GNU General Public License v2.0 6 votes vote down vote up
def simplified(self, bytes, aggresive = False):
        output_size = self.output_size
        ignore_range = self.ignore_range
        bsize = self.bsize
        total_size = len(bytes)
        size = (total_size/bsize) / output_size
        buf = []
        reduce_errors = self.reduce_errors
        # Adjust the output to the desired output size
        for c in xrange(0, output_size):
            tmp = bytes[c*size:(c*size+1)+bsize]
            ret = sum(imap(ord, tmp)) % 255
            if reduce_errors:
                if ret != 255 and ret != 0:
                    buf.append(chr(ret))
            else:
                buf.append(chr(ret))
        
        buf = "".join(buf)
        return base64.b64encode(buf).strip("=")[:output_size] 
Example #14
Source File: impl.py    From pywren-ibm-cloud with Apache License 2.0 6 votes vote down vote up
def iglob(self, pathname, with_matches=False, include_hidden=False, recursive=True,
              norm_paths=True, case_sensitive=True, sep=None):
        """Return an iterator which yields the paths matching a pathname
        pattern.

        The pattern may contain simple shell-style wildcards a la
        fnmatch. However, unlike fnmatch, filenames starting with a
        dot are special cases that are not matched by '*' and '?'
        patterns.

        If ``with_matches`` is True, then for each matching path
        a 2-tuple will be returned; the second element if the tuple
        will be a list of the parts of the path that matched the individual
        wildcards.

        If ``include_hidden`` is True, then files and folders starting with
        a dot are also returned.
        """
        result = self._iglob(pathname, True, include_hidden,
                             norm_paths, case_sensitive, sep)
        if with_matches:
            return result
        return imap(lambda s: s[0], result) 
Example #15
Source File: ScatterPlotItem.py    From tf-pose with Apache License 2.0 6 votes vote down vote up
def updateSpots(self, dataSet=None):
        if dataSet is None:
            dataSet = self.data

        invalidate = False
        if self.opts['pxMode']:
            mask = np.equal(dataSet['sourceRect'], None)
            if np.any(mask):
                invalidate = True
                opts = self.getSpotOpts(dataSet[mask])
                sourceRect = self.fragmentAtlas.getSymbolCoords(opts)
                dataSet['sourceRect'][mask] = sourceRect

            self.fragmentAtlas.getAtlas() # generate atlas so source widths are available.

            dataSet['width'] = np.array(list(imap(QtCore.QRectF.width, dataSet['sourceRect'])))/2
            dataSet['targetRect'] = None
            self._maxSpotPxWidth = self.fragmentAtlas.max_width
        else:
            self._maxSpotWidth = 0
            self._maxSpotPxWidth = 0
            self.measureSpotSizes(dataSet)

        if invalidate:
            self.invalidate() 
Example #16
Source File: pool.py    From BinderFilter with MIT License 6 votes vote down vote up
def imap(self, func, iterable, chunksize=1):
        '''
        Equivalent of `itertools.imap()` -- can be MUCH slower than `Pool.map()`
        '''
        assert self._state == RUN
        if chunksize == 1:
            result = IMapIterator(self._cache)
            self._taskqueue.put((((result._job, i, func, (x,), {})
                         for i, x in enumerate(iterable)), result._set_length))
            return result
        else:
            assert chunksize > 1
            task_batches = Pool._get_tasks(func, iterable, chunksize)
            result = IMapIterator(self._cache)
            self._taskqueue.put((((result._job, i, mapstar, (x,), {})
                     for i, x in enumerate(task_batches)), result._set_length))
            return (item for chunk in result for item in chunk) 
Example #17
Source File: pool.py    From BinderFilter with MIT License 6 votes vote down vote up
def imap_unordered(self, func, iterable, chunksize=1):
        '''
        Like `imap()` method but ordering of results is arbitrary
        '''
        assert self._state == RUN
        if chunksize == 1:
            result = IMapUnorderedIterator(self._cache)
            self._taskqueue.put((((result._job, i, func, (x,), {})
                         for i, x in enumerate(iterable)), result._set_length))
            return result
        else:
            assert chunksize > 1
            task_batches = Pool._get_tasks(func, iterable, chunksize)
            result = IMapUnorderedIterator(self._cache)
            self._taskqueue.put((((result._job, i, mapstar, (x,), {})
                     for i, x in enumerate(task_batches)), result._set_length))
            return (item for chunk in result for item in chunk) 
Example #18
Source File: pool.py    From ironpython2 with Apache License 2.0 6 votes vote down vote up
def imap(self, func, iterable, chunksize=1):
        '''
        Equivalent of `itertools.imap()` -- can be MUCH slower than `Pool.map()`
        '''
        assert self._state == RUN
        if chunksize == 1:
            result = IMapIterator(self._cache)
            self._taskqueue.put((((result._job, i, func, (x,), {})
                         for i, x in enumerate(iterable)), result._set_length))
            return result
        else:
            assert chunksize > 1
            task_batches = Pool._get_tasks(func, iterable, chunksize)
            result = IMapIterator(self._cache)
            self._taskqueue.put((((result._job, i, mapstar, (x,), {})
                     for i, x in enumerate(task_batches)), result._set_length))
            return (item for chunk in result for item in chunk) 
Example #19
Source File: pool.py    From ironpython2 with Apache License 2.0 6 votes vote down vote up
def imap_unordered(self, func, iterable, chunksize=1):
        '''
        Like `imap()` method but ordering of results is arbitrary
        '''
        assert self._state == RUN
        if chunksize == 1:
            result = IMapUnorderedIterator(self._cache)
            self._taskqueue.put((((result._job, i, func, (x,), {})
                         for i, x in enumerate(iterable)), result._set_length))
            return result
        else:
            assert chunksize > 1
            task_batches = Pool._get_tasks(func, iterable, chunksize)
            result = IMapUnorderedIterator(self._cache)
            self._taskqueue.put((((result._job, i, mapstar, (x,), {})
                     for i, x in enumerate(task_batches)), result._set_length))
            return (item for chunk in result for item in chunk) 
Example #20
Source File: functions.py    From worker with GNU General Public License v3.0 6 votes vote down vote up
def multiplySeries(requestContext, *seriesLists):
    """
    Takes two or more series and multiplies their points. A constant may not be
    used. To multiply by a constant, use the scale() function.

    Example:

    .. code-block:: none

      &target=multiplySeries(Series.dividends,Series.divisors)


    """

    yield defer.succeed(None)
    (seriesList, start, end, step) = normalize(seriesLists)

    if len(seriesList) == 1:
        returnValue(seriesList)

    name = "multiplySeries(%s)" % ','.join([s.name for s in seriesList])
    product = imap(lambda x: safeMul(*x), izip(*seriesList))
    resultSeries = TimeSeries(name, start, end, step, product)
    resultSeries.pathExpression = name
    returnValue([resultSeries]) 
Example #21
Source File: csv.py    From minemeld-core with Apache License 2.0 5 votes vote down vote up
def _gzipped_line_splitter(self, response):
        # same logic used in urllib32.response.iter_lines
        pending = None

        decoder = GzipDecoder()
        chunks = itertools.imap(
            decoder.decompress,
            response.iter_content(chunk_size=1024*1024)
        )

        for chunk in chunks:
            if pending is not None:
                chunk = pending + chunk

            lines = chunk.splitlines()

            if lines and lines[-1] and chunk and lines[-1][-1] == chunk[-1]:
                pending = lines.pop()
            else:
                pending = None

            for line in lines:
                yield line

        if pending is not None:
            yield pending 
Example #22
Source File: inout.py    From patch_linemod with BSD 2-Clause "Simplified" License 5 votes vote down vote up
def load_im(path):
    im = scipy.misc.imread(path)

    # Using PyPNG
    # r = png.Reader(filename=path)
    # im = np.vstack(itertools.imap(np.uint8, r.asDirect()[2]))

    return im 
Example #23
Source File: inout.py    From patch_linemod with BSD 2-Clause "Simplified" License 5 votes vote down vote up
def load_depth(path):
    # PyPNG library is used since it allows to save 16-bit PNG
    r = png.Reader(filename=path)
    im = np.vstack(map(np.uint16, r.asDirect()[2])).astype(np.float32)
    # itertools.imap is removed in py3
    return im 
Example #24
Source File: google.py    From minemeld-core with Apache License 2.0 5 votes vote down vote up
def _build_iterator(self, now):
        _iterators = []

        dig = minemeld.packages.gdns.dig.Dig(
            servers=[_GOOGLE_DNS_SERVER],
            udp_port=self.udp_port,
            tcp_port=self.tcp_port,
            tries=self.tries,
            timeout=self.polling_timeout
        )

        mainspf = self._resolve_spf(dig, self.ROOT_SPF)
        if 'include' not in mainspf:
            LOG.error(
                '%s - No includes in SPF' % self.name
            )
            return []

        for idomain in mainspf['include']:
            ispf = self._resolve_spf(dig, idomain)

            _iterators.append(itertools.imap(
                functools.partial(self._build_IPv4, idomain),
                ispf.get('ip4', [])
            ))
            _iterators.append(itertools.imap(
                functools.partial(self._build_IPv6, idomain),
                ispf.get('ip6', [])
            ))

        return itertools.chain(*_iterators) 
Example #25
Source File: schema.py    From mopidy-local-sqlite with Apache License 2.0 5 votes vote down vote up
def lookup(c, type, uri):
    return itertools.imap(_track, c.execute(_LOOKUP_QUERIES[type], [uri])) 
Example #26
Source File: o365.py    From minemeld-core with Apache License 2.0 5 votes vote down vote up
def _build_iterator(self, now):
        oiterator = self._o365_iterator(now)

        idict = {}
        for i in oiterator:
            indicator = i['indicator']
            cvalue = idict.get(indicator, None)
            if cvalue is not None:
                i['sources'] = list(set(i['sources']) | set(cvalue['sources']))
            idict[indicator] = i

        return itertools.imap(lambda i: i[1], idict.iteritems()) 
Example #27
Source File: test_set.py    From ironpython2 with Apache License 2.0 5 votes vote down vote up
def L(seqn):
    'Test multiple tiers of iterators'
    return chain(imap(lambda x:x, R(Ig(G(seqn))))) 
Example #28
Source File: schema.py    From mopidy-local-sqlite with Apache License 2.0 5 votes vote down vote up
def tracks(c):
    return itertools.imap(_track, c.execute('SELECT * FROM tracks')) 
Example #29
Source File: schema.py    From mopidy-local-sqlite with Apache License 2.0 5 votes vote down vote up
def dates(c, format='%Y-%m-%d'):
    return itertools.imap(operator.itemgetter(0), c.execute("""
    SELECT DISTINCT strftime(?, date) AS date
      FROM track
     WHERE date IS NOT NULL
     ORDER BY date
    """, [format])) 
Example #30
Source File: inout.py    From sixd_toolkit with MIT License 5 votes vote down vote up
def load_im(path):
    im = scipy.misc.imread(path)

    # Using PyPNG
    # r = png.Reader(filename=path)
    # im = np.vstack(itertools.imap(np.uint8, r.asDirect()[2]))

    return im