Python itertools.imap() Examples

The following are 30 code examples for showing how to use itertools.imap(). These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example.

You may check out the related API usage on the sidebar.

You may also want to check out all available functions/classes of the module itertools , or try the search function .

Example 1
Project: pywren-ibm-cloud   Author: pywren   File: impl.py    License: Apache License 2.0 6 votes vote down vote up
def iglob(self, pathname, with_matches=False, include_hidden=False, recursive=True,
              norm_paths=True, case_sensitive=True, sep=None):
        """Return an iterator which yields the paths matching a pathname
        pattern.

        The pattern may contain simple shell-style wildcards a la
        fnmatch. However, unlike fnmatch, filenames starting with a
        dot are special cases that are not matched by '*' and '?'
        patterns.

        If ``with_matches`` is True, then for each matching path
        a 2-tuple will be returned; the second element if the tuple
        will be a list of the parts of the path that matched the individual
        wildcards.

        If ``include_hidden`` is True, then files and folders starting with
        a dot are also returned.
        """
        result = self._iglob(pathname, True, include_hidden,
                             norm_paths, case_sensitive, sep)
        if with_matches:
            return result
        return imap(lambda s: s[0], result) 
Example 2
Project: worker   Author: moira-alert   File: functions.py    License: GNU General Public License v3.0 6 votes vote down vote up
def multiplySeries(requestContext, *seriesLists):
    """
    Takes two or more series and multiplies their points. A constant may not be
    used. To multiply by a constant, use the scale() function.

    Example:

    .. code-block:: none

      &target=multiplySeries(Series.dividends,Series.divisors)


    """

    yield defer.succeed(None)
    (seriesList, start, end, step) = normalize(seriesLists)

    if len(seriesList) == 1:
        returnValue(seriesList)

    name = "multiplySeries(%s)" % ','.join([s.name for s in seriesList])
    product = imap(lambda x: safeMul(*x), izip(*seriesList))
    resultSeries = TimeSeries(name, start, end, step, product)
    resultSeries.pathExpression = name
    returnValue([resultSeries]) 
Example 3
Project: public-transit-tools   Author: Esri   File: sqlize_csv.py    License: Apache License 2.0 6 votes vote down vote up
def check_date_fields(rows, col_names, tablename, fname):
    '''Ensure date fields are the in the correct YYYYMMDD format before adding them to the SQL table'''
    def check_date_cols(row):
        if tablename == "calendar":
            date_cols = ["start_date", "end_date"]
        elif tablename == "calendar_dates":
            date_cols = ["date"]
        date_column_idxs = [col_names.index(x) for x in date_cols]
        for idx in date_column_idxs:
            date = row[idx]
            try:
                datetime.datetime.strptime(date, '%Y%m%d')
            except ValueError:
                msg ='Column "' + col_names[idx] + '" in file ' + fname + ' has an invalid value: ' + date + '. \
Date fields must be in YYYYMMDD format. Please check the date field formatting in calendar.txt and calendar_dates.txt.'
                arcpy.AddError(msg)
                raise BBB_SharedFunctions.CustomError
        return row
    if ispy3:
        return map(check_date_cols, rows)
    else:
        return itertools.imap(check_date_cols, rows) 
Example 4
Project: public-transit-tools   Author: Esri   File: sqlize_csv.py    License: Apache License 2.0 6 votes vote down vote up
def check_date_fields(rows, col_names, tablename, fname):
    '''Ensure date fields are the in the correct YYYYMMDD format before adding them to the SQL table'''
    def check_date_cols(row):
        if tablename == "calendar":
            date_cols = ["start_date", "end_date"]
        elif tablename == "calendar_dates":
            date_cols = ["date"]
        date_column_idxs = [col_names.index(x) for x in date_cols]
        for idx in date_column_idxs:
            date = row[idx]
            try:
                datetime.datetime.strptime(date, '%Y%m%d')
            except ValueError:
                msg = u'Column "' + col_names[idx] + u'" in file ' + fname + u' has an invalid value: ' + date + u'. \
Date fields must be in YYYYMMDD format. Please check the date field formatting in calendar.txt and calendar_dates.txt.'
                Errors_To_Return.append(msg)
                raise CustomError
        return row
    return itertools.imap(check_date_cols, rows) 
Example 5
Project: tidb-docker-compose   Author: pingcap   File: session.py    License: Apache License 2.0 6 votes vote down vote up
def _createFromRDD(self, rdd, schema, samplingRatio):
        """
        Create an RDD for DataFrame from an existing RDD, returns the RDD and schema.
        """
        if schema is None or isinstance(schema, (list, tuple)):
            struct = self._inferSchema(rdd, samplingRatio, names=schema)
            converter = _create_converter(struct)
            rdd = rdd.map(converter)
            if isinstance(schema, (list, tuple)):
                for i, name in enumerate(schema):
                    struct.fields[i].name = name
                    struct.names[i] = name
            schema = struct

        elif not isinstance(schema, StructType):
            raise TypeError("schema should be StructType or list or None, but got: %s" % schema)

        # convert python objects to sql data
        rdd = rdd.map(schema.toInternal)
        return rdd, schema 
Example 6
Project: tidb-docker-compose   Author: pingcap   File: session.py    License: Apache License 2.0 6 votes vote down vote up
def _createFromLocal(self, data, schema):
        """
        Create an RDD for DataFrame from a list or pandas.DataFrame, returns
        the RDD and schema.
        """
        # make sure data could consumed multiple times
        if not isinstance(data, list):
            data = list(data)

        if schema is None or isinstance(schema, (list, tuple)):
            struct = self._inferSchemaFromList(data, names=schema)
            converter = _create_converter(struct)
            data = map(converter, data)
            if isinstance(schema, (list, tuple)):
                for i, name in enumerate(schema):
                    struct.fields[i].name = name
                    struct.names[i] = name
            schema = struct

        elif not isinstance(schema, StructType):
            raise TypeError("schema should be StructType or list or None, but got: %s" % schema)

        # convert python objects to sql data
        data = [schema.toInternal(row) for row in data]
        return self._sc.parallelize(data), schema 
Example 7
Project: tidb-docker-compose   Author: pingcap   File: session.py    License: Apache License 2.0 6 votes vote down vote up
def _createFromRDD(self, rdd, schema, samplingRatio):
        """
        Create an RDD for DataFrame from an existing RDD, returns the RDD and schema.
        """
        if schema is None or isinstance(schema, (list, tuple)):
            struct = self._inferSchema(rdd, samplingRatio, names=schema)
            converter = _create_converter(struct)
            rdd = rdd.map(converter)
            if isinstance(schema, (list, tuple)):
                for i, name in enumerate(schema):
                    struct.fields[i].name = name
                    struct.names[i] = name
            schema = struct

        elif not isinstance(schema, StructType):
            raise TypeError("schema should be StructType or list or None, but got: %s" % schema)

        # convert python objects to sql data
        rdd = rdd.map(schema.toInternal)
        return rdd, schema 
Example 8
Project: tidb-docker-compose   Author: pingcap   File: session.py    License: Apache License 2.0 6 votes vote down vote up
def _createFromLocal(self, data, schema):
        """
        Create an RDD for DataFrame from a list or pandas.DataFrame, returns
        the RDD and schema.
        """
        # make sure data could consumed multiple times
        if not isinstance(data, list):
            data = list(data)

        if schema is None or isinstance(schema, (list, tuple)):
            struct = self._inferSchemaFromList(data, names=schema)
            converter = _create_converter(struct)
            data = map(converter, data)
            if isinstance(schema, (list, tuple)):
                for i, name in enumerate(schema):
                    struct.fields[i].name = name
                    struct.names[i] = name
            schema = struct

        elif not isinstance(schema, StructType):
            raise TypeError("schema should be StructType or list or None, but got: %s" % schema)

        # convert python objects to sql data
        data = [schema.toInternal(row) for row in data]
        return self._sc.parallelize(data), schema 
Example 9
Project: tf-pose   Author: SrikanthVelpuri   File: ScatterPlotItem.py    License: Apache License 2.0 6 votes vote down vote up
def updateSpots(self, dataSet=None):
        if dataSet is None:
            dataSet = self.data

        invalidate = False
        if self.opts['pxMode']:
            mask = np.equal(dataSet['sourceRect'], None)
            if np.any(mask):
                invalidate = True
                opts = self.getSpotOpts(dataSet[mask])
                sourceRect = self.fragmentAtlas.getSymbolCoords(opts)
                dataSet['sourceRect'][mask] = sourceRect

            self.fragmentAtlas.getAtlas() # generate atlas so source widths are available.

            dataSet['width'] = np.array(list(imap(QtCore.QRectF.width, dataSet['sourceRect'])))/2
            dataSet['targetRect'] = None
            self._maxSpotPxWidth = self.fragmentAtlas.max_width
        else:
            self._maxSpotWidth = 0
            self._maxSpotPxWidth = 0
            self.measureSpotSizes(dataSet)

        if invalidate:
            self.invalidate() 
Example 10
Project: ironpython2   Author: IronLanguages   File: pool.py    License: Apache License 2.0 6 votes vote down vote up
def imap(self, func, iterable, chunksize=1):
        '''
        Equivalent of `itertools.imap()` -- can be MUCH slower than `Pool.map()`
        '''
        assert self._state == RUN
        if chunksize == 1:
            result = IMapIterator(self._cache)
            self._taskqueue.put((((result._job, i, func, (x,), {})
                         for i, x in enumerate(iterable)), result._set_length))
            return result
        else:
            assert chunksize > 1
            task_batches = Pool._get_tasks(func, iterable, chunksize)
            result = IMapIterator(self._cache)
            self._taskqueue.put((((result._job, i, mapstar, (x,), {})
                     for i, x in enumerate(task_batches)), result._set_length))
            return (item for chunk in result for item in chunk) 
Example 11
Project: ironpython2   Author: IronLanguages   File: pool.py    License: Apache License 2.0 6 votes vote down vote up
def imap_unordered(self, func, iterable, chunksize=1):
        '''
        Like `imap()` method but ordering of results is arbitrary
        '''
        assert self._state == RUN
        if chunksize == 1:
            result = IMapUnorderedIterator(self._cache)
            self._taskqueue.put((((result._job, i, func, (x,), {})
                         for i, x in enumerate(iterable)), result._set_length))
            return result
        else:
            assert chunksize > 1
            task_batches = Pool._get_tasks(func, iterable, chunksize)
            result = IMapUnorderedIterator(self._cache)
            self._taskqueue.put((((result._job, i, mapstar, (x,), {})
                     for i, x in enumerate(task_batches)), result._set_length))
            return (item for chunk in result for item in chunk) 
Example 12
Project: torngas   Author: mqingyn   File: utils.py    License: BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def safestr(obj, encoding='utf-8'):
    r"""
    Converts any given object to utf-8 encoded string.

        >>> safestr('hello')
        'hello'
        >>> safestr(u'\u1234')
        '\xe1\x88\xb4'
        >>> safestr(2)
        '2'
    """
    if isinstance(obj, unicode):
        return obj.encode(encoding)
    elif isinstance(obj, str):
        return obj
    elif hasattr(obj, 'next'):  # iterator
        return itertools.imap(safestr, obj)
    else:
        return str(obj)

# for backward-compatibility 
Example 13
Project: mopidy-local-sqlite   Author: mopidy   File: schema.py    License: Apache License 2.0 6 votes vote down vote up
def list_distinct(c, field, query=[]):
    if field not in _SEARCH_FIELDS:
        raise LookupError('Invalid search field: %s' % field)
    sql = """
    SELECT DISTINCT %s AS field
      FROM search
     WHERE field IS NOT NULL
    """ % field
    terms = []
    params = []
    for key, value in query:
        if key == 'any':
            terms.append('? IN (%s)' % ','.join(_SEARCH_FIELDS))
        elif key in _SEARCH_FIELDS:
            terms.append('%s = ?' % key)
        else:
            raise LookupError('Invalid search field: %s' % key)
        params.append(value)
    if terms:
        sql += ' AND ' + ' AND '.join(terms)
    logger.debug('SQLite list query %r: %s', params, sql)
    return itertools.imap(operator.itemgetter(0), c.execute(sql, params)) 
Example 14
Project: nightmare   Author: joxeankoret   File: kfuzzy.py    License: GNU General Public License v2.0 6 votes vote down vote up
def simplified(self, bytes, aggresive = False):
        output_size = self.output_size
        ignore_range = self.ignore_range
        bsize = self.bsize
        total_size = len(bytes)
        size = (total_size/bsize) / output_size
        buf = []
        reduce_errors = self.reduce_errors
        # Adjust the output to the desired output size
        for c in xrange(0, output_size):
            tmp = bytes[c*size:(c*size+1)+bsize]
            ret = sum(imap(ord, tmp)) % 255
            if reduce_errors:
                if ret != 255 and ret != 0:
                    buf.append(chr(ret))
            else:
                buf.append(chr(ret))
        
        buf = "".join(buf)
        return base64.b64encode(buf).strip("=")[:output_size] 
Example 15
Project: nightmare   Author: joxeankoret   File: kfuzzy.py    License: GNU General Public License v2.0 6 votes vote down vote up
def _fast_hash(self, bytes, aggresive = False):
        i = -1
        ret = set()
        
        output_size = self.output_size
        size = len(bytes) *1.00 / output_size
        bsize = self.bsize
        radd = ret.add
        
        while i < output_size:
            i += 1
            buf = bytes[i*bsize:(i+1)*bsize]
            char = sum(imap(ord, buf)) % 255
            if self.reduce_errors:
                if char != 255 and char != 0:
                    radd(chr(char))
            else:
                radd(chr(char))
        
        ret = "".join(ret)
        return base64.b64encode(ret).strip("=")[:output_size] 
Example 16
Project: nightmare   Author: joxeankoret   File: utils.py    License: GNU General Public License v2.0 6 votes vote down vote up
def safestr(obj, encoding='utf-8'):
    r"""
    Converts any given object to utf-8 encoded string. 
    
        >>> safestr('hello')
        'hello'
        >>> safestr(u'\u1234')
        '\xe1\x88\xb4'
        >>> safestr(2)
        '2'
    """
    if isinstance(obj, unicode):
        return obj.encode(encoding)
    elif isinstance(obj, str):
        return obj
    elif hasattr(obj, 'next'): # iterator
        return itertools.imap(safestr, obj)
    else:
        return str(obj)

# for backward-compatibility 
Example 17
Project: maltindex   Author: joxeankoret   File: kfuzzy.py    License: GNU General Public License v2.0 6 votes vote down vote up
def simplified(self, bytes, aggresive = False):
        output_size = self.output_size
        ignore_range = self.ignore_range
        bsize = self.bsize
        total_size = len(bytes)
        size = (total_size/bsize) / output_size
        buf = []
        reduce_errors = self.reduce_errors
        # Adjust the output to the desired output size
        for c in xrange(0, output_size):
            tmp = bytes[c*size:(c*size+1)+bsize]
            ret = sum(imap(ord, tmp)) % 255
            if reduce_errors:
                if ret != 255 and ret != 0:
                    buf.append(chr(ret))
            else:
                buf.append(chr(ret))
        
        buf = "".join(buf)
        return base64.b64encode(buf).strip("=")[:output_size] 
Example 18
Project: maltindex   Author: joxeankoret   File: kfuzzy.py    License: GNU General Public License v2.0 6 votes vote down vote up
def _fast_hash(self, bytes, aggresive = False):
        i = -1
        ret = set()
        
        output_size = self.output_size
        size = len(bytes) *1.00 / output_size
        bsize = self.bsize
        radd = ret.add
        
        while i < output_size:
            i += 1
            buf = bytes[i*bsize:(i+1)*bsize]
            char = sum(imap(ord, buf)) % 255
            if self.reduce_errors:
                if char != 255 and char != 0:
                    radd(chr(char))
            else:
                radd(chr(char))
        
        ret = "".join(ret)
        return base64.b64encode(ret).strip("=")[:output_size] 
Example 19
Project: BinderFilter   Author: dxwu   File: pool.py    License: MIT License 6 votes vote down vote up
def imap(self, func, iterable, chunksize=1):
        '''
        Equivalent of `itertools.imap()` -- can be MUCH slower than `Pool.map()`
        '''
        assert self._state == RUN
        if chunksize == 1:
            result = IMapIterator(self._cache)
            self._taskqueue.put((((result._job, i, func, (x,), {})
                         for i, x in enumerate(iterable)), result._set_length))
            return result
        else:
            assert chunksize > 1
            task_batches = Pool._get_tasks(func, iterable, chunksize)
            result = IMapIterator(self._cache)
            self._taskqueue.put((((result._job, i, mapstar, (x,), {})
                     for i, x in enumerate(task_batches)), result._set_length))
            return (item for chunk in result for item in chunk) 
Example 20
Project: BinderFilter   Author: dxwu   File: pool.py    License: MIT License 6 votes vote down vote up
def imap_unordered(self, func, iterable, chunksize=1):
        '''
        Like `imap()` method but ordering of results is arbitrary
        '''
        assert self._state == RUN
        if chunksize == 1:
            result = IMapUnorderedIterator(self._cache)
            self._taskqueue.put((((result._job, i, func, (x,), {})
                         for i, x in enumerate(iterable)), result._set_length))
            return result
        else:
            assert chunksize > 1
            task_batches = Pool._get_tasks(func, iterable, chunksize)
            result = IMapUnorderedIterator(self._cache)
            self._taskqueue.put((((result._job, i, mapstar, (x,), {})
                     for i, x in enumerate(task_batches)), result._set_length))
            return (item for chunk in result for item in chunk) 
Example 21
Project: razzy-spinner   Author: rafasashi   File: toolbox.py    License: GNU General Public License v3.0 5 votes vote down vote up
def dictionary(files='rotokas.dic', include_header=False) :
    """
    Deprecated: use C{ToolboxData.parse()}
    
    @param files: One or more toolbox files to be processed
    @type files: L{string} or L{tuple(string)}
    @param include_header: treat header as entry?
    @type include_header: boolean
    @rtype: iterator over L{dict}
    """       
    return imap(dict, raw(files, include_header)) 
Example 22
Project: gdb_python_api   Author: jefftrull   File: combined_filter_decorator.py    License: MIT License 5 votes vote down vote up
def filter(self, frame_iter):
        # compose new iterator that excludes Boost function frames
        f_iter = filter(lambda f : re.match(r"^boost::", f.function()) is None,
                        frame_iter)
        # wrap that in our decorator
        return imap(Rot13Decorator, f_iter) 
Example 23
Project: gdb_python_api   Author: jefftrull   File: backtrace.py    License: MIT License 5 votes vote down vote up
def filter(self, frame_iter):
        # first check for multi-regex option
        squash_regexes = gdb.parameter('backtrace-strip-regexes')
        # If present we compress stack frames with matching capture groups
        if squash_regexes:
            prog = re.compile(squash_regexes)
            # if there are no (or one) capture groups, treat this like squash_regex
            if prog.groups < 2:
                squash_regex = squash_regexes
            else:
                # wrap the current iterator in a squash-matching-subsequences iterator
                # with the predicate "function name matches same regex"
                ufi = UserFilter.__adjacent_squash(frame_iter,
                                                   lambda a, b : UserFilter.__same_cgroup(prog, a, b))
                # further wrap in a decorator and return
                return imap(CommonAliasDecorator, ufi)
        else:
            # single regex is simpler - we compress based on match/nomatch
            squash_regex = gdb.parameter('backtrace-strip-regex')

        if squash_regex:
            ufi = UserFilter.__cond_squash(frame_iter,
                                           lambda x : ((x.function() != x.address()) and
                                                       re.match(squash_regex, x.function())))
            return imap(CommonAliasDecorator, ufi)
        else:
            # just add the decorator to the original iterator
            return imap(CommonAliasDecorator, frame_iter) 
Example 24
Project: gdb_python_api   Author: jefftrull   File: rot13_framedecorator.py    License: MIT License 5 votes vote down vote up
def filter(self, frame_iter):
        return imap(Rot13Decorator, frame_iter) 
Example 25
Project: mishkal   Author: linuxscout   File: utils.py    License: GNU General Public License v3.0 5 votes vote down vote up
def unixUniq(l):
  """
  Unix-like uniq, the iteratable argument should be sorted first to get unique elements.
  """
  return imap(lambda j:j[0],groupby(l,lambda i: i)) 
Example 26
Project: mishkal   Author: linuxscout   File: utils.py    License: GNU General Public License v3.0 5 votes vote down vote up
def unixUniqAndCount(l):
  """
  Unix-like uniq -c, it returns an iteratable of tuples (count, uniq_entry)
  """
  return imap(lambda j:(len(list(j[1])),j[0]),groupby(l,lambda i: i)) 
Example 27
Project: public-transit-tools   Author: Esri   File: sqlize_csv.py    License: Apache License 2.0 5 votes vote down vote up
def smarter_convert_times(rows, col_names, fname, GTFSdir, time_columns=('arrival_time', 'departure_time')):
    '''Parses time fields according to the column name.  Accepts HMS or numeric
    times, converting to seconds-since-midnight.'''

    time_column_idxs = [col_names.index(x)  for x in time_columns]
    def convert_time_columns(row):
        out_row = row[:]    # copy
        for idx in time_column_idxs:
            field = row[idx].strip()
            if check_time_str(field):
                out_row[idx] = hms.str2sec(field)
            elif field == '':
                msg = "GTFS dataset " + GTFSdir + " contains empty \
values for arrival_time or departure_time in stop_times.txt.  Although the \
GTFS spec allows empty values for these fields, this toolbox \
requires exact time values for all stops.  You will not be able to use this \
dataset for your analysis."
                arcpy.AddError(msg)
                raise BBB_SharedFunctions.CustomError
            else:
                try:
                    out_row[idx] = float (field)
                except ValueError:
                    msg = 'Column "' + col_names[idx] + '" in file ' + os.path.join(GTFSdir, fname) + ' has an invalid value: ' + field + '.'
                    arcpy.AddError(msg)
                    raise BBB_SharedFunctions.CustomError
        return out_row
    if ispy3:
        return map(convert_time_columns, rows)
    else:
        return itertools.imap(convert_time_columns, rows) 
Example 28
Project: public-transit-tools   Author: Esri   File: sqlize_csv.py    License: Apache License 2.0 5 votes vote down vote up
def check_latlon_fields(rows, col_names, fname):
    '''Ensure lat/lon fields are valid'''
    def check_latlon_cols(row):
        stop_id = row[col_names.index("stop_id")]
        stop_lat = row[col_names.index("stop_lat")]
        stop_lon = row[col_names.index("stop_lon")]
        try:
            stop_lat_float = float(stop_lat)
        except ValueError:
            msg = 'stop_id "%s" in %s contains an invalid non-numerical value \
for the stop_lat field: "%s". Please double-check all lat/lon values in your \
stops.txt file.' % (stop_id, fname, stop_lat)
            arcpy.AddError(msg)
            raise BBB_SharedFunctions.CustomError
        try:
            stop_lon_float = float(stop_lon)
        except ValueError:
            msg = 'stop_id "%s" in %s contains an invalid non-numerical value \
for the stop_lon field: "%s". Please double-check all lat/lon values in your \
stops.txt file.' % (stop_id, fname, stop_lon)
            arcpy.AddError(msg)
            raise BBB_SharedFunctions.CustomError
        if not (-90.0 <= stop_lat_float <= 90.0):
            msg = 'stop_id "%s" in %s contains an invalid value outside the \
range (-90, 90) the stop_lat field: "%s". stop_lat values must be in valid WGS 84 \
coordinates.  Please double-check all lat/lon values in your stops.txt file.\
' % (stop_id, fname, stop_lat)
            arcpy.AddError(msg)
            raise BBB_SharedFunctions.CustomError
        if not (-180.0 <= stop_lon_float <= 180.0):
            msg = 'stop_id "%s" in %s contains an invalid value outside the \
range (-180, 180) the stop_lon field: "%s". stop_lon values must be in valid WGS 84 \
coordinates.  Please double-check all lat/lon values in your stops.txt file.\
' % (stop_id, fname, stop_lon)
            arcpy.AddError(msg)
            raise BBB_SharedFunctions.CustomError
        return row
    if ispy3:
        return map(check_latlon_cols, rows)
    else:
        return itertools.imap(check_latlon_cols, rows) 
Example 29
Project: public-transit-tools   Author: Esri   File: sqlize_csv.py    License: Apache License 2.0 5 votes vote down vote up
def check_latlon_fields(rows, col_names, fname):
    '''Ensure lat/lon fields are valid'''
    def check_latlon_cols(row):
        stop_id = row[col_names.index("stop_id")]
        stop_lat = row[col_names.index("stop_lat")]
        stop_lon = row[col_names.index("stop_lon")]
        try:
            stop_lat_float = float(stop_lat)
        except ValueError:
            msg = u'stop_id "%s" in %s contains an invalid non-numerical value \
for the stop_lat field: "%s". Please double-check all lat/lon values in your \
stops.txt file.' % (stop_id, fname, stop_lat)
            Errors_To_Return.append(msg)
            raise CustomError
        try:
            stop_lon_float = float(stop_lon)
        except ValueError:
            msg = u'stop_id "%s" in %s contains an invalid non-numerical value \
for the stop_lon field: "%s". Please double-check all lat/lon values in your \
stops.txt file.' % (stop_id, fname, stop_lon)
            Errors_To_Return.append(msg)
            raise CustomError
        if not (-90.0 <= stop_lat_float <= 90.0):
            msg = u'stop_id "%s" in %s contains an invalid value outside the \
range (-90, 90) the stop_lat field: "%s". stop_lat values must be in valid WGS 84 \
coordinates.  Please double-check all lat/lon values in your stops.txt file.\
' % (stop_id, fname, stop_lat)
            Errors_To_Return.append(msg)
            raise CustomError
        if not (-180.0 <= stop_lon_float <= 180.0):
            msg = u'stop_id "%s" in %s contains an invalid value outside the \
range (-180, 180) the stop_lon field: "%s". stop_lon values must be in valid WGS 84 \
coordinates.  Please double-check all lat/lon values in your stops.txt file.\
' % (stop_id, fname, stop_lon)
            Errors_To_Return.append(msg)
            raise CustomError
        return row
    return itertools.imap(check_latlon_cols, rows) 
Example 30
Project: tidb-docker-compose   Author: pingcap   File: session.py    License: Apache License 2.0 5 votes vote down vote up
def __init__(self, sparkContext, jsparkSession=None):
        """Creates a new SparkSession.

        >>> from datetime import datetime
        >>> spark = SparkSession(sc)
        >>> allTypes = sc.parallelize([Row(i=1, s="string", d=1.0, l=1,
        ...     b=True, list=[1, 2, 3], dict={"s": 0}, row=Row(a=1),
        ...     time=datetime(2014, 8, 1, 14, 1, 5))])
        >>> df = allTypes.toDF()
        >>> df.createOrReplaceTempView("allTypes")
        >>> spark.sql('select i+1, d+1, not b, list[1], dict["s"], time, row.a '
        ...            'from allTypes where b and i > 0').collect()
        [Row((i + CAST(1 AS BIGINT))=2, (d + CAST(1 AS DOUBLE))=2.0, (NOT b)=False, list[1]=2, \
            dict[s]=0, time=datetime.datetime(2014, 8, 1, 14, 1, 5), a=1)]
        >>> df.rdd.map(lambda x: (x.i, x.s, x.d, x.l, x.b, x.time, x.row.a, x.list)).collect()
        [(1, u'string', 1.0, 1, True, datetime.datetime(2014, 8, 1, 14, 1, 5), 1, [1, 2, 3])]
        """
        from pyspark.sql.context import SQLContext
        self._sc = sparkContext
        self._jsc = self._sc._jsc
        self._jvm = self._sc._jvm
        if jsparkSession is None:
            if self._jvm.SparkSession.getDefaultSession().isDefined() \
                    and not self._jvm.SparkSession.getDefaultSession().get() \
                        .sparkContext().isStopped():
                jsparkSession = self._jvm.SparkSession.getDefaultSession().get()
            else:
		jsparkSession = self._jvm.SparkSession.builder().getOrCreate()
#                jsparkSession = self._jvm.SparkSession(self._jsc.sc())
        self._jsparkSession = jsparkSession
        self._jwrapped = self._jsparkSession.sqlContext()
        self._wrapped = SQLContext(self._sc, self, self._jwrapped)
        _monkey_patch_RDD(self)
        install_exception_handler()
        # If we had an instantiated SparkSession attached with a SparkContext
        # which is stopped now, we need to renew the instantiated SparkSession.
        # Otherwise, we will use invalid SparkSession when we call Builder.getOrCreate.
        if SparkSession._instantiatedSession is None \
                or SparkSession._instantiatedSession._sc._jsc is None:
            SparkSession._instantiatedSession = self
            self._jvm.SparkSession.setDefaultSession(self._jsparkSession)