python source code of convert

#!/usr/bin/env python
# -*- coding: utf-8 -*-
# vim: sw=4:ts=4:expandtab

"""
meza.convert
~~~~~~~~~~~~

Provides methods for converting data structures

Examples:
    basic usage::

        >>> from meza.convert import to_decimal
        >>>
        >>> to_decimal('$123.45')
        Decimal('123.45')
"""
from __future__ import (
    absolute_import, division, print_function, unicode_literals)

import itertools as it
import pygogo as gogo

from os import path as p
from decimal import Decimal, ROUND_HALF_UP, ROUND_HALF_DOWN
from io import StringIO
from json import dumps
from collections import OrderedDict
from operator import itemgetter
from functools import partial
from array import array

from builtins import *
from six.moves import filterfalse, zip_longest
from dateutil.parser import parse
from . import fntools as ft, unicsv as csv, ENCODING, DEFAULT_DATETIME, BOM
from .compat import get_native_str

try:
    import numpy as np
except ImportError:
    np = None

try:
    import pandas as pd
except ImportError:
    pd = None

logger = gogo.Gogo(__name__, monolog=True).logger


def ctype2ext(content_type=None):
    """Converts an http content type to a file extension.

    Args:
        content_type (str): Output file path or directory.

    Returns:
        str: file extension

    Examples:
        >>> ctype2ext('/csv;') == 'csv'
        True
        >>> ctype2ext('/xls;') == 'xls'
        True
        >>> ext = '/vnd.openxmlformats-officedocument.spreadsheetml.sheet;'
        >>> ctype2ext(ext) == 'xlsx'
        True
    """
    try:
        ctype = content_type.split('/')[1].split(';')[0]
    except (AttributeError, IndexError):
        ctype = None

    xlsx_type = 'vnd.openxmlformats-officedocument.spreadsheetml.sheet'
    switch = {'xls': 'xls', 'csv': 'csv'}
    switch[xlsx_type] = 'xlsx'

    if ctype not in switch:
        msg = 'Content-Type %s not found in dictionary. Using default value.'
        logger.warning(msg, ctype)

    return switch.get(ctype, 'csv')


def order_dict(content, order):
    """Converts a dict into an OrderedDict

    Args:
        content (dict): The content to convert.
        order (Seq[str]): The field order.

    Returns:
        OrderedDict: The ordered content.

    Examples:
        >>> order_dict({'a': 1, 'b': 2}, ['a', 'b']) == OrderedDict(
        ...     [('a', 1), ('b', 2)])
        True
    """
    get_order = {field: pos for pos, field in enumerate(order)}
    keyfunc = lambda x: get_order[x[0]]
    return OrderedDict(sorted(content.items(), key=keyfunc))


def to_bool(content, trues=None, falses=None, warn=False):
    """Formats strings into bool.

    Args:
        content (str): The content to parse.
        trues (Seq[str]): Values to consider True.
        falses (Seq[str]): Values to consider Frue.
        warn (bool): raise error if content can't be safely converted
            (default: False)

    See also:
        `meza.process.type_cast`

    Returns:
        bool: The parsed content.

    Examples:
        >>> to_bool(True)
        True
        >>> to_bool('true')
        True
        >>> to_bool('y')
        True
        >>> to_bool(1)
        True
        >>> to_bool(False)
        False
        >>> to_bool('false')
        False
        >>> to_bool('n')
        False
        >>> to_bool(0)
        False
        >>> to_bool('')
        False
        >>> to_bool(None)
        False
        >>> to_bool(None, warn=True)
        Traceback (most recent call last):
        ValueError: Invalid bool value: `None`.

    Returns:
        bool
    """
    trues = set(map(str.lower, trues) if trues else ft.DEF_TRUES)

    if ft.is_bool(content):
        try:
            value = content.lower() in trues
        except (TypeError, AttributeError):
            value = bool(content)
    elif warn:
        raise ValueError('Invalid bool value: `{}`.'.format(content))
    else:
        value = False

    return value


def to_int(content, thousand_sep=',', decimal_sep='.', warn=False):
    """Formats strings into integers.

    Args:
        content (str): The number to parse.
        thousand_sep (char): thousand's separator (default: ',')
        decimal_sep (char): decimal separator (default: '.')
        warn (bool): raise error if content can't be safely converted
            (default: False)

    See also:
        `meza.process.type_cast`

    Returns:
        flt: The parsed number.

    Examples:
        >>> to_int('$123.45')
        123
        >>> to_int('123€')
        123
        >>> to_int('2,123.45')
        2123
        >>> to_int('2.123,45', thousand_sep='.', decimal_sep=',')
        2123
        >>> to_int('2,123.45', warn=True)
        Traceback (most recent call last):
        ValueError: Invalid int value: `2,123.45`.
        >>> to_int('spam')
        0
        >>> to_int('spam', warn=True)
        Traceback (most recent call last):
        ValueError: Invalid int value: `spam`.

    Returns:
        int
    """
    if warn and not ft.is_int(content):
        raise ValueError('Invalid int value: `{}`.'.format(content))

    try:
        value = int(float(ft.strip(content, thousand_sep, decimal_sep)))
    except ValueError:
        if warn:
            raise ValueError('Invalid int value: `{}`.'.format(content))
        else:
            value = 0

    return value


def to_float(content, thousand_sep=',', decimal_sep='.', warn=False):
    """Formats strings into floats.

    Args:
        content (str): The number to parse.
        thousand_sep (char): thousand's separator (default: ',')
        decimal_sep (char): decimal separator (default: '.')
        warn (bool): raise error if content can't be safely converted
            (default: False)

    Returns:
        flt: The parsed number.

    See also:
        `meza.process.type_cast`

    Examples:
        >>> to_float('$123.45')
        123.45
        >>> to_float('123€')
        123.0
        >>> to_float('2,123.45')
        2123.45
        >>> to_float('2.123,45', thousand_sep='.', decimal_sep=',')
        2123.45
        >>> to_float('spam')
        0.0
        >>> to_float('spam', warn=True)
        Traceback (most recent call last):
        ValueError: Invalid float value: `spam`.

    Returns:
        float
    """
    if ft.is_numeric(content):
        value = float(ft.strip(content, thousand_sep, decimal_sep))
    elif warn:
        raise ValueError('Invalid float value: `{}`.'.format(content))
    else:
        value = 0.0

    return value


def to_decimal(content, thousand_sep=',', decimal_sep='.', **kwargs):
    """Formats strings into decimals

    Args:
        content (str): The string to parse.
        thousand_sep (char): thousand's separator (default: ',')
        decimal_sep (char): decimal separator (default: '.')
        kwargs (dict): Keyword arguments.

    Kwargs:
        warn (bool): raise error if content can't be safely converted
            (default: False)

        roundup (bool): Round up to the desired number of decimal places
             from 5 to 9 (default: True). If False, round up from 6 to 9.

        places (int): Number of decimal places to display (default: 2).

    See also:
        `meza.process.type_cast`

    Examples:
        >>> to_decimal('$123.45')
        Decimal('123.45')
        >>> to_decimal('123€')
        Decimal('123.00')
        >>> to_decimal('2,123.45')
        Decimal('2123.45')
        >>> to_decimal('2.123,45', thousand_sep='.', decimal_sep=',')
        Decimal('2123.45')
        >>> to_decimal('1.554')
        Decimal('1.55')
        >>> to_decimal('1.555')
        Decimal('1.56')
        >>> to_decimal('1.555', roundup=False)
        Decimal('1.55')
        >>> to_decimal('1.556')
        Decimal('1.56')
        >>> to_decimal('spam')
        Decimal('0.00')
        >>> to_decimal('spam', warn=True)
        Traceback (most recent call last):
        ValueError: Invalid numeric value: `spam`.

    Returns:
        decimal
    """
    if ft.is_numeric(content):
        decimalized = Decimal(ft.strip(content, thousand_sep, decimal_sep))
    elif kwargs.get('warn'):
        raise ValueError('Invalid numeric value: `{}`.'.format(content))
    else:
        decimalized = Decimal(0)

    roundup = kwargs.get('roundup', True)
    rounding = ROUND_HALF_UP if roundup else ROUND_HALF_DOWN
    places = int(kwargs.get('places', 2))
    precision = '.{}1'.format(''.join(it.repeat('0', places - 1)))
    return decimalized.quantize(Decimal(precision), rounding=rounding)


def _to_datetime(content):
    """Parses and formats strings into datetimes.

    Args:
        content (str): The date to parse.

    Returns:
        [tuple(str, bool)]: Tuple of the formatted date string and retry value.

    Examples:
        >>> _to_datetime('5/4/82')
        (datetime.datetime(1982, 5, 4, 0, 0), False)
        >>> _to_datetime('2/32/82') == ('2/32/82', True)
        True
        >>> _to_datetime('spam')
        (datetime.datetime(9999, 12, 31, 0, 0), False)
    """
    try:
        value = parse(content, default=DEFAULT_DATETIME)
    except ValueError as e:
        # impossible date, e.g., 2/31/15
        retry = any(x in str(e) for x in ('out of range', 'day must be in'))
        value = content if retry else DEFAULT_DATETIME
    else:
        retry = False

    return (value, retry)


def to_datetime(content, dt_format=None, warn=False):
    """Parses and formats strings into datetimes.

    Args:
        content (str): The string to parse.

        dt_format (str): Date format passed to `strftime()`
            (default: None).

        warn (bool): raise error if content can't be safely converted
            (default: False)

    Returns:
        obj: The datetime object or formatted datetime string.

    See also:
        `meza.process.type_cast`

    Examples:
        >>> fmt = '%Y-%m-%d %H:%M:%S'
        >>> to_datetime('5/4/82 2:00 pm')
        datetime.datetime(1982, 5, 4, 14, 0)
        >>> to_datetime('5/4/82 10:00', fmt) == '1982-05-04 10:00:00'
        True
        >>> to_datetime('2/32/82 12:15', fmt) == '1982-02-28 12:15:00'
        True
        >>> to_datetime('spam')
        datetime.datetime(9999, 12, 31, 0, 0)
        >>> to_datetime('spam', warn=True)
        Traceback (most recent call last):
        ValueError: Invalid datetime value: `spam`.

    Returns:
        datetime
    """
    bad_nums = map(str, range(29, 33))
    good_nums = map(str, range(31, 27, -1))

    try:
        bad_num = next(x for x in bad_nums if x in content)
    except StopIteration:
        options = [content]
    else:
        possibilities = (content.replace(bad_num, x) for x in good_nums)
        options = it.chain([content], possibilities)

    # Fix impossible dates, e.g., 2/31/15
    results = filterfalse(lambda x: x[1], map(_to_datetime, options))
    value = next(results)[0]

    if warn and value == DEFAULT_DATETIME:
        raise ValueError('Invalid datetime value: `{}`.'.format(content))
    else:
        datetime = value.strftime(dt_format) if dt_format else value

    return datetime


def to_date(content, date_format=None, warn=False):
    """Parses and formats strings into dates.

    Args:
        content (str): The string to parse.

        date_format (str): Time format passed to `strftime()` (default: None).

        warn (bool): raise error if content can't be safely converted
            (default: False)

    Returns:
        obj: The date object or formatted date string.

    See also:
        `meza.process.type_cast`

    Examples:
        >>> to_date('5/4/82')
        datetime.date(1982, 5, 4)
        >>> to_date('5/4/82', '%Y-%m-%d') == '1982-05-04'
        True
        >>> to_date('2/32/82', '%Y-%m-%d') == '1982-02-28'
        True
        >>> to_date('spam')
        datetime.date(9999, 12, 31)
        >>> to_date('spam', warn=True)
        Traceback (most recent call last):
        ValueError: Invalid datetime value: `spam`.

    Returns:
        date
    """
    value = to_datetime(content, warn=warn).date()
    return value.strftime(date_format) if date_format else value


def to_time(content, time_format=None, warn=False):
    """Parses and formats strings into times.

    Args:
        content (str): The string to parse.
        time_format (str): Time format passed to `strftime()` (default: None).
        warn (bool): raise error if content can't be safely converted
            (default: False)

    Returns:
        obj: The time object or formatted time string.

    See also:
        `meza.process.type_cast`

    Examples:
        >>> to_time('2:00 pm')
        datetime.time(14, 0)
        >>> to_time('10:00', '%H:%M:%S') == '10:00:00'
        True
        >>> to_time('2/32/82 12:15', '%H:%M:%S') == '12:15:00'
        True
        >>> to_time('spam')
        datetime.time(0, 0)
        >>> to_time('spam', warn=True)
        Traceback (most recent call last):
        ValueError: Invalid datetime value: `spam`.

    Returns:
        time
    """
    value = to_datetime(content, warn=warn).time()
    return value.strftime(time_format) if time_format else value


def to_filepath(filepath, **kwargs):
    """Creates a filepath from an online resource, i.e., linked file or
    google sheets export.

    Args:
        filepath (str): Output file path or directory.
        kwargs: Keyword arguments.

    Kwargs:
        headers (dict): HTTP response headers, e.g., `r.headers`.
        name_from_id (bool): Overwrite filename with resource id.
        resource_id (str): The resource id (required if `name_from_id` is True
            or filepath is a google sheets export)

    Returns:
        str: filepath

    Examples:
        >>> to_filepath('file.csv') == 'file.csv'
        True
        >>> to_filepath('.', resource_id='rid') == './rid.csv'
        True
    """
    isdir = p.isdir(filepath)
    headers = kwargs.get('headers') or {}
    name_from_id = kwargs.get('name_from_id')
    resource_id = kwargs.get('resource_id')

    if isdir and not name_from_id:
        try:
            disposition = headers.get('content-disposition', '')
            filename = disposition.split('=')[1].split('"')[1]
        except (KeyError, IndexError):
            filename = resource_id
    elif isdir or name_from_id:
        filename = resource_id

    if isdir and filename.startswith('export?format='):
        filename = '{}.{}'.format(resource_id, filename.split('=')[1])
    elif isdir and '.' not in filename:
        ctype = headers.get('content-type')
        filename = '{}.{}'.format(filename, ctype2ext(ctype))

    return p.join(filepath, filename) if isdir else filepath


def array2records(data, native=False):
    """Converts either a numpy.recarray or a nested array.array into records

    Args:
        data (Iter[array]): The 2-D array.

        native (bool): (default: False)

    Returns:
        Iterable of dicts

    See also:
        `meza.convert.df2records`

    Examples:
        >>> arr = [[1, 2, 3], [4, 5, 6]] if np else [(1, 4), (2, 5), (3, 6)]
        >>> data = np.array(arr, 'i4') if np else [array('i', a) for a in arr]
        >>> native = not np
        >>> next(array2records(data, native)) == {
        ...     'column_1': 1, 'column_2': 2, 'column_3': 3}
        True
        >>> i, f, u = [get_native_str(x) for x in ['i', 'f', 'u']]
        >>> data = [
        ...     array(i, [1, 2, 3]),
        ...     array(f, [1.0, 2.0, 3.0]),
        ...     [array(u, 'one'), array(u, 'two'), array(u, 'three')]]
        >>> next(array2records(data, True)) == {
        ...     'column_1': 1, 'column_2': 1.0, 'column_3': 'one'}
        True
    """
    textify = lambda x: x.tounicode() if x.typecode == 'u' else x.tostring()
    datify = lambda x: x.tolist() if hasattr(x, 'tolist') else map(textify, x)

    if native and hasattr(data[0], 'typecode'):
        header = None
        data = zip(*map(datify, data))
    elif native:
        header = [textify(h) for h in data[0]]
        data = zip(*map(datify, data[1:]))
    else:
        header = data.dtype.names

    if not header:
        try:
            size = data.shape[1]
        except (IndexError, AttributeError):
            data = iter(data)
            first_row = next(data)
            size = len(first_row)
            data = it.chain([first_row], data)

        header = ['column_%i' % (n + 1) for n in range(size)]

    return (dict(zip(header, row)) for row in data)


def df2records(df):
    """Converts a pandas DataFrame into records.

    Args:
        df (obj): pandas.DataFrame object

    Yields:
        dict: Record. A row of data whose keys are the field names.

    See also:
        `meza.process.array2records`

    Examples:
        >>> records = [
        ...     {'a': 1, 'b': 2.0, 'c': 'three'},
        ...     {'a': 4, 'b': 5.0, 'c': 'six'}]

        >>> if pd:
        ...    df = pd.DataFrame(records)
        ...    converted = df2records(df)
        ... else:
        ...    converted = iter(records)

        >>> next(converted) == {'a': 1, 'b': 2.0, 'c': 'three'}
        True
    """
    index = [_f for _f in df.index.names if _f]

    try:
        keys = index + df.columns.tolist()
    except AttributeError:
        # we have a Series, not a DataFrame
        keys = index + [df.name]
        rows = (i[0] + (i[1],) for i in df.items())
    else:
        rows = df.itertuples()

    for values in rows:
        if index:
            yield dict(zip(keys, values))
        else:
            yield dict(zip(keys, values[1:]))


def records2array(records, types, native=False, silent=False):
    """Converts records into either a numpy.recarray or a nested array.array

    Args:
        records (Iter[dict]): Rows of data whose keys are the field names.
            E.g., output from any `meza.io` read function.

        types (Iter[dict]):

        native (bool): Return a native array (default: False).

        silent (bool): Suppress the warning message (default: False).

    Returns:
        numpy.recarray

    See also:
        `meza.convert.records2df`

    Examples:
        >>> records = [{'alpha': 'aa', 'beta': 2}, {'alpha': 'bee', 'beta': 3}]
        >>> types = [
        ...     {'id': 'alpha', 'type': 'text'}, {'id': 'beta', 'type': 'int'}]
        >>>
        >>> arr = records2array(records, types, silent=True)
        >>> u, i = get_native_str('u'), get_native_str('i')
        >>> native_resp = [
        ...     [array(u, 'alpha'), array(u, 'beta')],
        ...     [array(u, 'aa'), array(u, 'bee')],
        ...     array(i, [2, 3])]
        >>>
        >>> if np:
        ...     arr.alpha.tolist() == ['aa', 'bee']
        ...     arr.beta.tolist() == [2, 3]
        ... else:
        ...     True
        ...     True
        True
        True
        >>> True if np else arr == native_resp
        True
        >>> records2array(records, types, native=True) == native_resp
        True
    """
    numpy = np and not native
    dialect = 'numpy' if numpy else 'array'
    zipped = [(ft.get_dtype(t1['type'], dialect), t1['id']) for t1 in types]
    _dtype, ids = list(zip(*zipped))
    dtype = list(map(get_native_str, _dtype))

    if numpy:
        data = [tuple(r.get(id_) for id_ in ids) for r in records]
        ndtype = [tuple(map(get_native_str, z)) for z in zip(ids, dtype)]
        ndarray = np.array(data, dtype=ndtype)
        converted = ndarray.view(np.recarray)
    else:
        if not (native or silent):
            msg = (
                "It looks like you don't have numpy installed. This function"
                " will return a native array instead.")

            logger.warning(msg)

        header = [array(get_native_str('u'), t2['id']) for t2 in types]
        data = (zip_longest(*([r.get(i) for i in ids] for r in records)))

        # array.array can't have nulls, so convert to an appropriate equivalent
        clean = lambda t, d: (x if x else ft.ARRAY_NULL_TYPE[t] for x in d)
        cleaned = (it.starmap(clean, zip(dtype, data)))

        values = [
            [array(d, x) for x in c] if d in {'c', 'u'} else array(d, c)
            for d, c in zip(dtype, cleaned)]

        converted = [header] + values

    return converted


def records2df(records, types, native=False, silent=False):
    """Converts records into either a pandas.DataFrame

    Args:
        records (Iter[dict]): Rows of data whose keys are the field names.
            E.g., output from any `meza.io` read function.

        types (Iter[dict]):

        native (bool): Return a native array (default: False).

        silent (bool): Suppress the warning message (default: False).

    Returns:
        numpy.recarray

    See also:
        `meza.convert.records2array`

    Examples:
        >>> records = [
        ...     {'col_1': 'alpha', 'col_2': 1.0},
        ...     {'col_1': 'beta', 'col_2': 2.3}]
        >>> types = [
        ...     {'id': 'col_1', 'type': 'text'},
        ...     {'id': 'col_2', 'type': 'float'}]
        >>> df = records2df(records, types, silent=True)
        >>> u, f = get_native_str('u'), get_native_str('f')
        >>>
        >>> native_resp = [
        ...     [array(u, 'col_1'), array(u, 'col_2')],
        ...     [array(u, 'alpha'), array(u, 'beta')],
        ...     array(f, [1.0, 2.299999952316284])]
        >>>
        >>> if pd:
        ...     columns = df.columns.tolist()
        ...     columns == ['col_1', 'col_2']
        ...     df.col_1.tolist() == ['alpha', 'beta']
        ...     [round(v, 1) for v in df.col_2]
        ... else:
        ...     True
        ...     True
        ...     [1.0, 2.3]
        True
        True
        [1.0, 2.3]
        >>> True if pd else df == native_resp
        True
        >>> records2df(records, types, native=True) == native_resp
        True
    """
    if pd and not native:
        recarray = records2array(records, types)
        df = pd.DataFrame.from_records(recarray)
    else:
        if not (native or silent):
            msg = (
                "It looks like you don't have pandas installed. This function"
                " will return a native array instead.")

            logger.warning(msg)

        df = records2array(records, types, native=True, silent=silent)

    return df


def records2csv(records, encoding=ENCODING, bom=False, skip_header=False):
    """Converts records into a csv file like object.

    Args:
        records (Iter[dict]): Rows of data whose keys are the field names.
            E.g., output from any `meza.io` read function.

        encoding (str): File encoding (default: ENCODING constant)
        bom (bool): Add Byte order marker (default: False)
        skip_header (bool): Don't write the header (default: False)

    Returns:
        obj: io.StringIO instance

    Examples:
        >>> records = [
        ...     {
        ...         'usda_id': 'IRVE2',
        ...         'species': 'Iris-versicolor',
        ...         'wikipedia_url': 'wikipedia.org/wiki/Iris_versicolor'}]
        ...
        >>> csv_obj = records2csv(records)
        >>> set(next(csv_obj).strip().split(',')) == {
        ...     'usda_id', 'species', 'wikipedia_url'}
        True
        >>> set(next(csv_obj).strip().split(',')) == {
        ...     'IRVE2', 'Iris-versicolor',
        ...     'wikipedia.org/wiki/Iris_versicolor'}
        True
    """
    f = StringIO()
    irecords = iter(records)

    if bom:
        f.write(BOM.encode(encoding))

    row = next(irecords)
    w = csv.DictWriter(f, list(row.keys()))
    None if skip_header else w.writeheader()
    w.writerow(row)
    w.writerows(irecords)
    f.seek(0)
    return f


def records2json(records, **kwargs):
    """Converts records into a json file like object.

    Args:
        records (Iter[dict]): Rows of data whose keys are the field names.
            E.g., output from any `meza.io` read function.

    Kwargs:
        indent (int): Number of spaces to indent (default: None).
        newline (bool): Output newline delimited json (default: False)
        sort_keys (bool): Sort rows by keys (default: True).
        ensure_ascii (bool): Ignore non-ASCII chars (default: False).

    See also:
        `meza.convert.records2geojson`

    Returns:
        obj: io.StringIO instance

    Examples:
        >>> from json import loads

        >>> record = {
        ...     'usda_id': 'IRVE2',
        ...     'species': 'Iris-versicolor',
        ...     'wikipedia_url': 'wikipedia.org/wiki/Iris_versicolor'}
        ...
        >>> json_obj = records2json([record])
        >>> json_str = json_obj.read()
        >>> loads(json_str)[0] == record
        True
        >>> json_str = records2json([record], newline=True).readline()
        >>> loads(json_str) == record
        True
    """
    defaults = {'sort_keys': True, 'ensure_ascii': False}
    [kwargs.setdefault(k, v) for k, v in defaults.items()]
    newline = kwargs.pop('newline', False)
    jd = partial(dumps, cls=ft.CustomEncoder, **kwargs)
    json = '\n'.join(map(jd, records)) if newline else jd(records)
    return StringIO(str(json))


def gen_features(subresults, kw):
    """Generates a geojson feature.

     Args:
        subresults (Iter[dict]): Rows of data whose keys are the field names.
            E.g., output from any `meza.io` read function.

        kw (obj): `fntools.Objectify` instance with the following Attributes:
            key (str): GeoJSON Feature ID
            lon (str): longitude field name
            lat (str): latitude field name
            sort_keys (bool): Sort rows by keys

    See also:
        `meza.convert.records2geojson`

    Yields:
        dict: a geojson feature

    Examples:
        >>> record = {
        ...     'id': 'gid', 'p1': 'prop', 'type': 'Point',
        ...     'lon': Decimal('12.2'), 'lat': Decimal('22.0')}
        >>> subresults = [((record['lon'], record['lat']), record)]
        >>> kw = ft.Objectify({'key': 'id', 'lon': 'lon', 'lat': 'lat'})
        >>> next(gen_features(subresults, kw)) == {
        ...     'type': 'Feature',
        ...     'id': 'gid',
        ...     'geometry': {
        ...         'type': 'Point',
        ...         'coordinates': (Decimal('12.2'), Decimal('22.0'))},
        ...     'properties': {'id': 'gid', 'p1': 'prop'}}
        True
    """
    black_list = {'type', kw.lon, kw.lat}

    for coordinates, row in subresults:
        properties = dict(x for x in row.items() if x[0] not in black_list)
        geometry = {'type': row['type'], 'coordinates': coordinates}

        if kw.sort_keys:
            geometry = order_dict(geometry, ['type', 'coordinates'])

        feature = {
            'type': 'Feature',
            'id': row.get(kw.key),
            'geometry': geometry,
            'properties': properties}

        if kw.sort_keys:
            feature_order = ['type', 'id', 'geometry', 'properties']
            feature = order_dict(feature, feature_order)

        yield feature


def gen_subresults(records, kw):
    """Helper function for converting record groups into a GeoJSON file like object.

     Args:
        records (Iter[dict]): Rows of data whose keys are the field names.
            E.g., output from any `meza.io` read function.

        kw (obj): `fntools.Objectify` instance with the following Attributes:
            key (str): GeoJSON Feature ID
            lon (str): longitude field name
            lat (str): latitude field name

    See also:
        `meza.convert.records2geojson`

    Yields:
        tuple(iter, dict): tuple of coordinates and row

    Examples:
        >>> kw = ft.Objectify({'key': 'id', 'lon': 'lon', 'lat': 'lat'})
        >>> record = {
        ...     'lon': Decimal('1.2'), 'lat': Decimal('22.0'), 'type': 'Point'}
        >>> next(gen_subresults([record], kw))[0]
        (Decimal('1.2'), Decimal('22.0'))
        >>> record = {'lon': 1.2, 'lat': 22.0, 'type': 'LineString'}
        >>> next(gen_subresults([record], kw))[0]
        [(1.2, 22.0)]
    """
    for id_, group in it.groupby(records, ft.def_itemgetter(kw.key)):
        first_row = next(group)
        _type = first_row['type']
        sub_records = it.chain([first_row], group)

        if _type == 'Point':
            for row in sub_records:
                yield ((row[kw.lon], row[kw.lat]), row)
        elif _type == 'LineString':
            yield ([(r[kw.lon], r[kw.lat]) for r in sub_records], first_row)
        elif _type == 'Polygon':
            groups = it.groupby(sub_records, itemgetter('pos'))
            polygon = [[(r[kw.lon], r[kw.lat]) for r in g[1]] for g in groups]
            yield (polygon, first_row)
        else:
            raise TypeError('Invalid type: {}'.format(_type))


def records2geojson(records, **kwargs):
    """Converts records into a GeoJSON file like object.

     Args:
        records (Iter[dict]): Rows of data whose keys are the field names.
            E.g., output from any `meza.io.read_geojson`.

        kwargs (dict): Keyword arguments.

    Kwargs:
        key (str): GeoJSON Feature ID (default: 'id').
        lon (int): longitude field name (default: 'lon').
        lat (int): latitude field name (default: 'lat').
        crs (str): coordinate reference system field name (default:
            'urn:ogc:def:crs:OGC:1.3:CRS84').
        indent (int): Number of spaces to indent (default: 2).
        sort_keys (bool): Sort rows by keys (default: True).
        ensure_ascii (bool): Sort response dict by keys (default: False).

    See also:
        `meza.convert.records2json`
        `meza.io.read_geojson`

    Returns:
        obj: io.StringIO instance

    Examples:
        >>> from json import loads

        >>> record = {
        ...     'id': 'gid', 'p1': 'prop', 'type': 'Point',
        ...     'lon': Decimal('12.2'), 'lat': Decimal('22.0')}
        ...
        >>> result = loads(next(records2geojson([record])))
        >>> result['type'] == 'FeatureCollection'
        True
        >>> result['bbox']
        [12.2, 22.0, 12.2, 22.0]
        >>> crs = 'urn:ogc:def:crs:OGC:1.3:CRS84'
        >>> result['crs'] == {'type': 'name', 'properties': {'name': crs}}
        True
        >>> features = result['features']
        >>> sorted(features[0].keys()) == [
        ...     'geometry', 'id', 'properties', 'type']
        True
        >>> features[0]['geometry'] == {
        ...     'type': 'Point', 'coordinates': [12.2, 22.0]}
        True
    """
    defaults = {
        'key': 'id', 'lon': 'lon', 'lat': 'lat', 'indent': 2, 'sort_keys': True,
        'crs': 'urn:ogc:def:crs:OGC:1.3:CRS84'}

    kw = ft.Objectify(kwargs, **defaults)
    crs = {'type': 'name', 'properties': {'name': kw.crs}}

    subresults = gen_subresults(records, kw)
    features = list(gen_features(subresults, kw))
    coords = [f['geometry']['coordinates'] for f in features]
    get_lon = lambda x: map(itemgetter(0), x)
    get_lat = lambda x: map(itemgetter(1), x)

    try:
        chained = (it.chain.from_iterable(map(get_lon, c)) for c in coords)
        lons = set(it.chain.from_iterable(chained))
    except TypeError:
        try:
            lons = set(it.chain.from_iterable(map(get_lon, coords)))
        except TypeError:
            # it's a point
            lons = set(get_lon(coords))
            lats = set(get_lat(coords))
        else:
            # it's a line
            lats = set(it.chain.from_iterable(map(get_lat, coords)))
    else:
        # it's a polygon
        chained = (it.chain.from_iterable(map(get_lat, c)) for c in coords)
        lats = set(it.chain.from_iterable(chained))

    if kw.sort_keys:
        crs = order_dict(crs, ['type', 'properties'])

    output = {
        'type': 'FeatureCollection',
        'bbox': [min(lons), min(lats), max(lons), max(lats)],
        'features': features,
        'crs': crs}

    if kw.sort_keys:
        output_order = ['type', 'bbox', 'features', 'crs']
        output = order_dict(output, output_order)

    dkwargs = ft.dfilter(kwargs, ['indent', 'sort_keys'], True)
    json = dumps(output, cls=ft.CustomEncoder, **dkwargs)
    return StringIO(str(json))