python source code of simpletable

""" This file implements a Table class
    that is designed to be the basis of any format

Requirements
------------

* FIT format:
    * astropy:
        provides a replacement to pyfits
        pyfits can still be used instead but astropy is now the default

* HDF5 format:
    * pytables

RuntimeError will be raised when writing to a format associated with missing
package.


.. code-block::python

    >>> t = SimpleTable('path/mytable.csv')
    # get a subset of columns only
    >>> s = t.get('M_* logTe logLo U B V I J K')
    # set some aliases
    >>> t.set_alias('logT', 'logTe')
    >>> t.set_alias('logL', 'logLLo')
    # make a query on one or multiple column
    >>> q = s.selectWhere('logT logL', '(J > 2) & (10 ** logT > 5000)')
    # q is also a table object
    >>> q.Plotter.plot('logT', 'logL', ',')
    # makes a simple plot (see :module:`plotter`)
    >>> s.write('newtable.fits')
    # export the initial subtable to a new file
"""
from __future__ import (absolute_import, division, print_function)

__version__ = '3.0'
__all__ = ['AstroHelpers', 'AstroTable', 'SimpleTable', 'stats']

import sys
import math
from copy import deepcopy
import re
import itertools
from functools import wraps, partial
import numpy as np
from numpy import deg2rad, rad2deg, sin, cos, sqrt, arcsin, arctan2
from numpy.lib import recfunctions
import types

try:
    from astropy.io import fits as pyfits
except ImportError:
    try:
        import pyfits       
    except ImportError:
        pyfits = None

try:
    import tables
except ImportError:
    tables = None

try:
    import pandas as _pd
except ImportError:
    _pd = None

try:
    from astropy.table import Table as _astropytable
except ImportError:
    _astropytable = None

try:
    from .plotter import Plotter
except ImportError:
    Plotter = None

# ==============================================================================
# Python 3 compatibility behavior
# ==============================================================================
# remap some python 2 built-ins on to py3k behavior or equivalent
# Most of them become generators
import operator

PY3 = sys.version_info[0] > 2

if PY3:
    iteritems = operator.methodcaller('items')
    itervalues = operator.methodcaller('values')
    basestring = (str, bytes)
else:
    range = xrange
    from itertools import izip as zip
    iteritems = operator.methodcaller('iteritems')
    itervalues = operator.methodcaller('itervalues')
    basestring = (str, unicode)


# ==============================================================================
# Specials -- special functions
# ==============================================================================

def pretty_size_print(num_bytes):
    """
    Output number of bytes in a human readable format

    keywords
    --------
    num_bytes: int
        number of bytes to convert

    returns
    -------
    output: str
        string representation of the size with appropriate unit scale
    """
    if num_bytes is None:
        return

    KiB = 1024
    MiB = KiB * KiB
    GiB = KiB * MiB
    TiB = KiB * GiB
    PiB = KiB * TiB
    EiB = KiB * PiB
    ZiB = KiB * EiB
    YiB = KiB * ZiB

    if num_bytes > YiB:
        output = '%.3g YB' % (num_bytes / YiB)
    elif num_bytes > ZiB:
        output = '%.3g ZB' % (num_bytes / ZiB)
    elif num_bytes > EiB:
        output = '%.3g EB' % (num_bytes / EiB)
    elif num_bytes > PiB:
        output = '%.3g PB' % (num_bytes / PiB)
    elif num_bytes > TiB:
        output = '%.3g TB' % (num_bytes / TiB)
    elif num_bytes > GiB:
        output = '%.3g GB' % (num_bytes / GiB)
    elif num_bytes > MiB:
        output = '%.3g MB' % (num_bytes / MiB)
    elif num_bytes > KiB:
        output = '%.3g KB' % (num_bytes / KiB)
    else:
        output = '%.3g Bytes' % (num_bytes)

    return output


def _fits_read_header(hdr):
    """
    Convert pyfits header into dictionary with relevant values

    Parameters
    ----------

    hdr: pyftis.Header
        fits unit

    Returns
    -------
    header: dict
        header dictionary

    alias: dict
        aliases

    units: dict
        units

    comments: dict
        comments/description of keywords
    """
    header = {}
    alias = {}
    units = {}
    comments = {}

    # generic cards
    genTerms = ['XTENSION', 'BITPIX', 'NAXIS', 'NAXIS1',
                'NAXIS2', 'PCOUNT', 'GCOUNT', 'TFIELDS',
                'EXTNAME']
    fieldTerms = ['TTYPE', 'TFORM', 'TUNIT', 'ALIAS']

    # read col comments
    # for k, name, comment in hdr.ascard['TTYPE*']:
    try:
        for card in hdr.cards['TTYPE*']:
            name = card.value
            comments[name] = card.comment
            u = hdr.get(card.keyword.replace('TYPE', 'UNIT'), None)
            if u is not None:
                units[name] = u

        # for k, val, _ in hdr.ascard['ALIAS*']:
        for card in hdr.cards['ALIAS*']:
            k = card.keyword
            val = card.value
            al, orig = val.split('=')
            alias[al] = orig
    except:   #pyfits stsci
        for card in hdr.ascard['TTYPE*']:
            name = card.value
            comments[name] = card.comment
            u = hdr.get(card.key.replace('TYPE', 'UNIT'), None)
            if u is not None:
                units[name] = u

        # for k, val, _ in hdr.ascard['ALIAS*']:
        for card in hdr.ascard['ALIAS*']:
            k = card.key
            val = card.value
            al, orig = val.split('=')
            alias[al] = orig

    # other specific keywords: COMMENT, HISTORY
    header_comments = []
    header_history = []
    for k, v in hdr.items():
        if (k not in genTerms) and (k[:5] not in fieldTerms):
            if (k == 'COMMENT'):
                header_comments.append(v)
            elif (k == 'HISTORY'):
                header_history.append(v)
            else:
                header[k] = v

    # COMMENT, HISTORY polish
    if len(header_comments) > 0:
        header['COMMENT'] = '\n'.join(header_comments)
    if len(header_history) > 0:
        header['HISTORY'] = '\n'.join(header_history)

    if 'EXTNAME' in hdr:
        header['NAME'] = hdr['EXTNAME']

    return header, alias, units, comments


def _fits_generate_header(tab):
    """ Generate the corresponding fits Header that contains all necessary info

    Parameters
    ----------

    tab: SimpleTable instance
        table

    Returns
    -------
    hdr: pyfits.Header
        header instance
    """
    # get column cards

    cards = []

    # names units and comments
    for e, k in enumerate(tab.keys()):
        cards.append(('TTYPE{0:d}'.format(e + 1), k, tab._desc.get(k, '')))
        u = tab._units.get(k, '')
        if u not in ['', 'None', None]:
            cards.append(('TUNIT{0:d}'.format(e + 1), tab._units.get(k, ''),
                          'unit of {0:s}'.format(k)))

    # add aliases
    for e, v in enumerate(tab._aliases.items()):
        cards.append( ('ALIAS{0:d}'.format(e + 1), '='.join(v), '') )

    if tab.header['NAME'] not in ['', 'None', None, 'No Name']:
        cards.append(('EXTNAME', tab.header['NAME'], ''))

    hdr = pyfits.Header(cards)

    for k, v in tab.header.items():
        if (v not in ['', 'None', None]) & (k != 'NAME'):
            if (k != 'COMMENT') & (k != 'HISTORY'):
                hdr.update(k, v)
            else:
                txt = v.split('\n')
                for j in txt:
                    if k == 'COMMENT':
                        hdr.add_comment(j)
                    elif k == 'HISTORY':
                        hdr.add_history(j)
    return hdr


def _fits_writeto(filename, data, header=None, output_verify='exception',
                  clobber=False, checksum=False):
    """
    Create a new FITS file using the supplied data/header.
    Patched version of pyfits to correctly include provided header

    Parameters
    ----------
    filename : file path, file object, or file like object
        File to write to.  If opened, must be opened in a writeable binary
        mode such as 'wb' or 'ab+'.

    data : array, record array, or groups data object
        data to write to the new file

    header : `Header` object, optional
        the header associated with ``data``. If `None`, a header
        of the appropriate type is created for the supplied data. This
        argument is optional.

    output_verify : str
        Output verification option.  Must be one of ``"fix"``, ``"silentfix"``,
        ``"ignore"``, ``"warn"``, or ``"exception"``.  May also be any
        combination of ``"fix"`` or ``"silentfix"`` with ``"+ignore"``,
        ``+warn``, or ``+exception" (e.g. ``"fix+warn"``).  See :ref:`verify`
        for more info.

    clobber : bool, optional
        If `True`, and if filename already exists, it will overwrite
        the file.  Default is `False`.

    checksum : bool, optional
        If `True`, adds both ``DATASUM`` and ``CHECKSUM`` cards to the
        headers of all HDU's written to the file
    """

    hdu = pyfits.convenience._makehdu(data, header)
    hdu.header.update(header.cards)
    if hdu.is_image and not isinstance(hdu, pyfits.PrimaryHDU):
        hdu = pyfits.PrimaryHDU(data, header=header)
    hdu.writeto(filename, clobber=clobber, output_verify=output_verify,
                checksum=checksum)


def _fits_append(filename, data, header=None, checksum=False, verify=True,
                 **kwargs):
    """
    Append the header/data to FITS file if filename exists, create if not.

    If only ``data`` is supplied, a minimal header is created.
    Patched version of pyfits to correctly include provided header

    Parameters
    ----------
    filename : file path, file object, or file like object
        File to write to.  If opened, must be opened for update (rb+) unless it
        is a new file, then it must be opened for append (ab+).  A file or
        `~gzip.GzipFile` object opened for update will be closed after return.

    data : array, table, or group data object
        the new data used for appending

    header : `Header` object, optional
        The header associated with ``data``.  If `None`, an appropriate header
        will be created for the data object supplied.

    checksum : bool, optional
        When `True` adds both ``DATASUM`` and ``CHECKSUM`` cards to the header
        of the HDU when written to the file.

    verify : bool, optional
        When `True`, the existing FITS file will be read in to verify it for
        correctness before appending.  When `False`, content is simply appended
        to the end of the file.  Setting ``verify`` to `False` can be much
        faster.

    kwargs
        Any additional keyword arguments to be passed to
        `astropy.io.fits.open`.
    """

    name, closed, noexist_or_empty = pyfits.convenience._stat_filename_or_fileobj(filename)

    if noexist_or_empty:
        #
        # The input file or file like object either doesn't exits or is
        # empty.  Use the writeto convenience function to write the
        # output to the empty object.
        #
        _fits_writeto(filename, data, header, checksum=checksum, **kwargs)
    else:
        hdu = pyfits.convenience._makehdu(data, header)
        hdu.header.update(header.cards)

        if isinstance(hdu, pyfits.PrimaryHDU):
            hdu = pyfits.ImageHDU(data, header)

        if verify or not closed:
            f = pyfits.convenience.fitsopen(filename, mode='append')
            f.append(hdu)

            # Set a flag in the HDU so that only this HDU gets a checksum when
            # writing the file.
            hdu._output_checksum = checksum
            f.close(closed=closed)
        else:
            f = pyfits.convenience._File(filename, mode='append')
            hdu._output_checksum = checksum
            hdu._writeto(f)
            f.close()


def _ascii_read_header(fname, comments='#', delimiter=None, commentedHeader=True,
                       *args, **kwargs):
    """
    Read ASCII/CSV header

    Parameters
    ----------
    fname: str or stream
        File, filename, or generator to read.
        Note that generators should return byte strings for Python 3k.

    comments: str, optional
        The character used to indicate the start of a comment;
        default: '#'.

    delimiter: str, optional
        The string used to separate values.  By default, this is any
        whitespace.

    commentedHeader: bool, optional
        if set, the last line of the header is expected to be the column titles

    Returns
    -------
    nlines: int
        number of lines from the header

    header: dict
        header dictionary

    alias: dict
        aliases

    units: dict
        units

    comments: dict
        comments/description of keywords

    names: sequence
        sequence or str, first data line after header, expected to be the column
        names.
    """
    if hasattr(fname, 'read'):
        stream = fname
    else:
        stream = open(fname, 'r')

    header = {}
    alias = {}
    units = {}
    desc = {}

    def parseStrNone(v):
        """ robust parse """
        _v = v.split()
        if (len(_v) == 0):
            return None
        else:
            _v = ' '.join(_v)
            if (_v.lower()) == 'none' or (_v.lower() == 'null'):
                return None
            else:
                return _v

    done = False
    oldline = None
    lasthdr = None
    nlines = 0
    header.setdefault('COMMENT', '')
    header.setdefault('HISTORY', '')
    while done is False:
        line = stream.readline()[:-1]  # getting rid of '\n'
        nlines += 1
        if (line[0] == comments):  # header part
            if (len(line) > 2):
                if line[1] == comments:  # column meta data
                    # column meta is expected to start with ##
                    k = line[2:].split('\t')
                    colname = k[0].strip()
                    colunit = None
                    colcomm = None
                    if len(k) > 1:
                        colunit = parseStrNone(k[1])
                    if len(k) > 2:
                        colcomm = parseStrNone(k[2])

                    if colunit is not None:
                        units[colname] = colunit
                    if colcomm is not None:
                        desc[colname] = colcomm
                else:
                    # header is expected as "# key \t value"
                    k = line[1:].split('\t')
                    if len(k) > 1:
                        key = k[0].strip()  # remove trainling spaces
                        val = ' '.join(k[1:]).strip()

                        if key in ('', None, 'None', 'NONE', 'COMMENT'):
                            header['COMMENT'] = header['COMMENT'] + '\n' + val
                        if key in ('HISTORY', ):
                            header['HISTORY'] = header['HISTORY'] + '\n' + val
                        elif 'alias' in key.lower():
                            # take care of aliases
                            al, orig = val.split('=')
                            alias[al] = orig
                        else:
                            header[key] = val
                        lasthdr = key
                    else:
                        header['COMMENT'] = header['COMMENT'] + '\n' + line[1:]
        else:
            done = True
            if commentedHeader and (oldline is not None):
                names = oldline.split(delimiter)
                nlines -= 1
                if lasthdr == names[0]:
                    header.pop(lasthdr)
            else:
                names = line.split(delimiter)
        oldline = line[1:]

    if not hasattr(fname, 'read'):
        stream.close()
    else:
        stream.seek(stream.tell() - len(line))
        nlines = 0  # make sure the value is set to the current position

    return nlines, header, units, desc, alias, names


def _hdf5_write_data(filename, data, tablename=None, mode='w', append=False,
                     header={}, units={}, comments={}, aliases={}, **kwargs):
    """ Write table into HDF format

    Parameters
    ----------
    filename : file path, or tables.File instance
        File to write to.  If opened, must be opened and writable (mode='w' or 'a')

    data: recarray
        data to write to the new file

    tablename: str
        path of the node including table's name

    mode: str
        in ('w', 'a') mode to open the file

    append: bool
        if set, tends to append data to an existing table

    header: dict
        table header

    units: dict
        dictionary of units

    alias: dict
        aliases

    comments: dict
        comments/description of keywords

    .. note::
        other keywords are forwarded to :func:`tables.open_file`
    """

    if hasattr(filename, 'read'):
        raise Exception("HDF backend does not implement stream")

    if append is True:
        mode = 'a'
    silent = kwargs.pop('silent', False)

    if isinstance(filename, tables.File):
        if (filename.mode != mode) & (mode != 'r'):
            raise tables.FileModeError('The file is already opened in a different mode')
        hd5 = filename
    else:
        hd5 = tables.open_file(filename, mode=mode)

    # check table name and path
    tablename = tablename or header.get('NAME', None)
    if tablename in ('', None, 'Noname', 'None'):
        tablename = '/data'

    w = tablename.split('/')
    where = '/'.join(w[:-1])
    name = w[-1]
    if where in ('', None):
        where = '/'
    if where[0] != '/':
        where = '/' + where

    if append:
        try:
            t = hd5.get_node(where + name)
            t.append(data.astype(t.description._v_dtype))
            t.flush()
        except tables.NoSuchNodeError:
            if not silent:
                print(("Warning: Table {0} does not exists.  \n A new table will be created").format(where + name))
            append = False

    if not append:
        # t = hd5.createTable(where, name, data, **kwargs)
        t = hd5.create_table(where, name, data, **kwargs)

        # update header
        for k, v in header.items():
            if (k == 'FILTERS') & (float(t.attrs['VERSION']) >= 2.0):
                t.attrs[k.lower()] = v
            else:
                t.attrs[k] = v
        if 'TITLE' not in header:
            t.attrs['TITLE'] = name

        # add column descriptions and units
        for e, colname in enumerate(data.dtype.names):
            _u = units.get(colname, None)
            _d = comments.get(colname, None)
            if _u is not None:
                t.attrs['FIELD_{0:d}_UNIT'] = _u
            if _d is not None:
                t.attrs['FIELD_{0:d}_DESC'] = _d

        # add aliases
        for i, (k, v) in enumerate(aliases.items()):
            t.attrs['ALIAS{0:d}'.format(i)] = '{0:s}={1:s}'.format(k, v)

        t.flush()

    if not isinstance(filename, tables.File):
        hd5.flush()
        hd5.close()


def _hdf5_read_data(filename, tablename=None, silent=False, *args, **kwargs):
    """ Generate the corresponding ascii Header that contains all necessary info

    Parameters
    ----------
    filename: str
        file to read from

    tablename: str
        node containing the table

    silent: bool
        skip verbose messages

    Returns
    -------
    hdr: str
        string that will be be written at the beginning of the file
    """
    source = tables.open_file(filename, *args, **kwargs)

    if tablename is None:
        node = source.listNodes('/')[0]
        tablename = node.name
    else:
        if tablename[0] != '/':
            node = source.get_node('/' + tablename)
        else:
            node = source.get_node(tablename)
    if not silent:
        print("\tLoading table: {0}".format(tablename))

    hdr = {}
    aliases = {}

    # read header
    exclude = ['NROWS', 'VERSION', 'CLASS', 'EXTNAME', 'TITLE']
    for k in node.attrs._v_attrnames:
        if (k not in exclude):
            if (k[:5] != 'FIELD') & (k[:5] != 'ALIAS'):
                hdr[k] = node.attrs[k]
            elif k[:5] == 'ALIAS':
                c0, c1 = node.attrs[k].split('=')
                aliases[c0] = c1

    empty_name = ['', 'None', 'Noname', None]
    if node.attrs['TITLE'] not in empty_name:
        hdr['NAME'] = node.attrs['TITLE']
    else:
        hdr['NAME'] = '{0:s}/{1:s}'.format(filename, node.name)

    # read column meta
    units = {}
    desc = {}

    for (k, colname) in enumerate(node.colnames):
        _u = getattr(node.attrs, 'FIELD_{0:d}_UNIT'.format(k), None)
        _d = getattr(node.attrs, 'FIELD_{0:d}_DESC'.format(k), None)
        if _u is not None:
            units[colname] = _u
        if _d is not None:
            desc[colname] = _d

    data = node[:]

    source.close()

    return hdr, aliases, units, desc, data


def _ascii_generate_header(tab, comments='#', delimiter=' ',
                           commentedHeader=True):
    """ Generate the corresponding ascii Header that contains all necessary info

    Parameters
    ----------

    tab: SimpleTable instance
        table

    comments: str
        string to prepend header lines

    delimiter: str, optional
        The string used to separate values.  By default, this is any
        whitespace.

    commentedHeader: bool, optional
        if set, the last line of the header is expected to be the column titles

    Returns
    -------
    hdr: str
        string that will be be written at the beginning of the file
    """
    hdr = []

    if comments is None:
        comments = ''

    # table header
    length = max(map(len, tab.header.keys()))
    fmt = '{{0:s}} {{1:{0:d}s}}\t{{2:s}}'.format(length)
    for k, v in tab.header.items():
        for vk in v.split('\n'):
            if len(vk) > 0:
                hdr.append(fmt.format(comments, k.upper(), vk.strip()))

    # column metadata
    hdr.append(comments)  # add empty line
    length = max(map(len, tab.keys()))
    fmt = '{{0:s}}{{0:s}} {{1:{0:d}s}}\t{{2:s}}\t{{3:s}}'.format(length)
    for colname in tab.keys():
        unit = tab._units.get(colname, 'None')
        desc = tab._desc.get(colname, 'None')
        hdr.append(fmt.format(comments, colname, unit, desc))

    # aliases
    if len(tab._aliases) > 0:
        hdr.append(comments)  # add empty line
        for k, v in tab._aliases.items():
            hdr.append('{0:s} alias\t{1:s}={2:s}'.format(comments, k, v))

    # column names
    hdr.append(comments)
    if commentedHeader:
        hdr.append('{0:s} {1:s}'.format(comments, delimiter.join(tab.keys())))
    else:
        hdr.append('{0:s}'.format(delimiter.join(tab.keys())))

    return '\n'.join(hdr)


def _latex_writeto(filename, tab, comments='%'):
    """ Write the data into a latex table format

    Parameters
    ----------
    filename: str
        file or unit to write into

    tab: SimpleTable instance
        table

    comments: str
        string to prepend header lines

    delimiter: str, optional
        The string used to separate values.  By default, this is any
        whitespace.

    commentedHeader: bool, optional
        if set, the last line of the header is expected to be the column titles
    """
    txt = "\\begin{table}\n\\begin{center}\n"

    # add caption
    tabname = tab.header.get('NAME', None)
    if tabname not in ['', None, 'None']:
        txt += "\\caption{{{0:s}}}\n".format(tabname)

    # tabular
    txt += '\\begin{{tabular}}{{{0:s}}}\n'.format('c' * tab.ncols)
    txt += tab.pprint(delim=' & ', fields='MAG*', headerChar='', endline='\\\\\n', all=True, ret=True)
    txt += '\\end{tabular}\n'

    # end table
    txt += "\\end{center}\n"

    # add notes if any
    if len(tab._desc) > 0:
        txt += '\% notes \n\\begin{scriptsize}\n'
        for e, (k, v) in enumerate(tab._desc.items()):
            if v not in (None, 'None', 'none', ''):
                txt += '{0:d} {1:s}: {2:s} \\\\\n'.format(e, k, v)
        txt += '\\end{scriptsize}\n'
    txt += "\\end{table}\n"
    if hasattr(filename, 'write'):
        filename.write(txt)
    else:
        with open(filename, 'w') as unit:
            unit.write(txt)


def _convert_dict_to_structured_ndarray(data):
    """convert_dict_to_structured_ndarray

    Parameters
    ----------

    data: dictionary like object
        data structure which provides iteritems and itervalues

    returns
    -------
    tab: structured ndarray
        structured numpy array
    """
    newdtype = []
    try:
        for key, dk in iteritems(data):
            _dk = np.asarray(dk)
            dtype = _dk.dtype
            # unknown type is converted to text
            if dtype.type == np.object_:
                if len(data) == 0:
                    longest = 0
                else:
                    longest = len(max(_dk, key=len))
                    _dk = _dk.astype('|%iS' % longest)
            if _dk.ndim > 1:
                newdtype.append((str(key), _dk.dtype, (_dk.shape[1],)))
            else:
                newdtype.append((str(key), _dk.dtype))
        tab = np.rec.fromarrays(itervalues(data), dtype=newdtype)
    except AttributeError:  # not a dict
        # hope it's a tuple ((key, value),) pairs.
        from itertools import tee
        d1, d2 = tee(data)
        for key, dk in d1:
            _dk = np.asarray(dk)
            dtype = _dk.dtype
            # unknown type is converted to text
            if dtype.type == np.object_:
                if len(data) == 0:
                    longest = 0
                else:
                    longest = len(max(_dk, key=len))
                    _dk = _dk.astype('|%iS' % longest)
            if _dk.ndim > 1:
                newdtype.append((str(key), _dk.dtype, (_dk.shape[1],)))
            else:
                newdtype.append((str(key), _dk.dtype))
        tab = np.rec.fromarrays((dk for (_, dk) in d2), dtype=newdtype)

    return tab


def __indent__(rows, header=None, units=None, headerChar='-',
               delim=' | ', endline='\n', **kwargs):
    """Indents a table by column.

    Parameters
    ----------
    rows: sequences of rows
        one sequence per row.

    header: sequence of str
        row consists of the columns' names

    units: sequence of str
        Sequence of units

    headerChar: char
        Character to be used for the row separator line

    delim: char
        The column delimiter.

    returns
    -------
    txt: str
        string represation of rows
    """
    length_data = list(map(max, zip(*[list(map(len, k)) for k in rows])))
    length = length_data[:]

    if (header is not None):
        length_header = list(map(len, header))
        length = list(map(max, zip(length_data, length_header)))

    if (units is not None):
        length_units = list(map(len, units))
        length = list(map(max, zip(length_data, length_units)))

    if headerChar not in (None, '', ' '):
        rowSeparator = headerChar * (sum(length) + len(delim) * (len(length) - 1)) + endline
    else:
        rowSeparator = ''

    # make the format
    fmt = ['{{{0:d}:{1:d}s}}'.format(k, l) for (k, l) in enumerate(length)]
    fmt = delim.join(fmt) + endline
    # write the string
    txt = rowSeparator
    if header is not None:
        txt += fmt.format(*header)  # + endline
        txt += rowSeparator
    if units is not None:
        txt += fmt.format(*units)  # + endline
        txt += rowSeparator
    for r in rows:
        txt += fmt.format(*r)  # + endline
    txt += rowSeparator
    return txt


def pprint_rec_entry(data, num=0, keys=None):
        """ print one line with key and values properly to be readable

        Parameters
        ----------
        data: recarray
            data to extract entry from

        num: int, slice
            indice selection

        keys: sequence or str
            if str, can be a regular expression
            if sequence, the sequence of keys to print
        """
        if (keys is None) or (keys == '*'):
            _keys = data.dtype.names
        elif type(keys) in basestring:
            _keys = [k for k in data.dtype.names if (re.match(keys, k) is not None)]
        else:
            _keys = keys

        length = max(map(len, _keys))
        fmt = '{{0:{0:d}s}}: {{1}}'.format(length)
        data = data[num]

        for k in _keys:
            print(fmt.format(k, data[k]))


def pprint_rec_array(data, idx=None, fields=None, ret=False, all=False,
                     headerChar='-', delim=' | ', endline='\n' ):
        """ Pretty print the table content
            you can select the table parts to display using idx to
            select the rows and fields to only display some columns
            (ret is only for insternal use)

        Parameters
        ----------
        data: array
            array to show

        idx: sequence, slide
            sub selection to print

        fields: str, sequence
            if str can be a regular expression, and/or list of fields separated
            by spaces or commas

        ret: bool
            if set return the string representation instead of printing the result

        all: bool
            if set, force to show all rows

        headerChar: char
            Character to be used for the row separator line

        delim: char
            The column delimiter.
        """
        if (fields is None) or (fields == '*'):
            _keys = data.dtype.names
        elif type(fields) in basestring:
            if ',' in fields:
                _fields = fields.split(',')
            elif ' ' in fields:
                _fields = fields.split()
            else:
                _fields = [fields]
            lbls = data.dtype.names
            _keys = []
            for _fk in _fields:
                _keys += [k for k in lbls if (re.match(_fk, k) is not None)]
        else:
            lbls = data.dtype.names
            _keys = []
            for _fk in _fields:
                _keys += [k for k in lbls if (re.match(_fk, k) is not None)]

        nfields = len(_keys)
        nrows = len(data)
        fields = list(_keys)

        if idx is None:
            if (nrows < 10) or (all is True):
                rows = [ [ str(data[k][rk]) for k in _keys ] for rk in range(nrows)]
            else:
                _idx = range(6)
                rows = [ [ str(data[k][rk]) for k in _keys ] for rk in range(5) ]
                if nfields > 1:
                    rows += [ ['...' for k in range(nfields) ] ]
                else:
                    rows += [ ['...' for k in range(nfields) ] ]
                rows += [ [ str(data[k][rk]) for k in fields ] for rk in range(-5, 0)]
        elif isinstance(idx, slice):
            _idx = range(idx.start, idx.stop, idx.step or 1)
            rows = [ [ str(data[k][rk]) for k in fields ] for rk in _idx]
        else:
            rows = [ [ str(data[k][rk]) for k in fields ] for rk in idx]

        out = __indent__(rows, header=_keys, units=None, delim=delim,
                         headerChar=headerChar, endline=endline)
        if ret is True:
            return out
        else:
            print(out)


def elementwise(func):
    """
    Quick and dirty elementwise function decorator it provides a quick way
    to apply a function either on one element or a sequence of elements
    """
    @wraps(func)
    def wrapper(it, **kwargs):
        if hasattr(it, '__iter__') & (type(it) not in basestring):
            _f = partial(func, **kwargs)
            return map(_f, it)
        else:
            return func(it, **kwargs)
    return wrapper


class AstroHelpers(object):
    """ Helpers related to astronomy data """

    @staticmethod
    @elementwise
    def hms2deg(_str, delim=':'):
        """ Convert hex coordinates into degrees

        Parameters
        ----------
        str: string or sequence
            string to convert

        delimiter: str
            character delimiting the fields

        Returns
        -------
        deg: float
            angle in degrees
        """
        if _str[0] == '-':
            neg = -1
            _str = _str[1:]
        else:
            neg = 1
        _str = _str.split(delim)
        return neg * ((((float(_str[-1]) / 60. +
                         float(_str[1])) / 60. +
                        float(_str[0])) / 24. * 360.))

    @staticmethod
    @elementwise
    def deg2dms(val, delim=':'):
        """ Convert degrees into hex coordinates

        Parameters
        ----------
        deg: float
            angle in degrees

        delimiter: str
            character delimiting the fields

        Returns
        -------
        str: string or sequence
            string to convert
        """
        if val < 0:
            sign = -1
        else:
            sign = 1
        d = int( sign * val )
        m = int( (sign * val - d) * 60. )
        s = (( sign * val - d) * 60.  - m) * 60.
        return '{0}{1}{2}{3}{4}'.format( sign * d, delim, m, delim, s)

    @staticmethod
    @elementwise
    def deg2hms(val, delim=':'):
        """ Convert degrees into hex coordinates

        Parameters
        ----------
        deg: float
            angle in degrees

        delimiter: str
            character delimiting the fields

        Returns
        -------
        str: string or sequence
            string to convert
        """
        if val < 0:
            sign = -1
        else:
            sign = 1
        h = int( sign * val / 45. * 3.)   # * 24 / 360
        m = int( (sign * val / 45. * 3. - h) * 60. )
        s = (( sign * val / 45. * 3. - h) * 60.  - m) * 60.
        return '{0}{1}{2}{3}{4}'.format( sign * h, delim, m, delim, s)

    @staticmethod
    @elementwise
    def dms2deg(_str, delim=':'):
        """ Convert hex coordinates into degrees

        Parameters
        ----------
        str: string or sequence
            string to convert

        delimiter: str
            character delimiting the fields

        Returns
        -------
        deg: float
            angle in degrees
        """
        if _str[0] == '-':
            neg = -1
            _str = _str[1:]
        else:
            neg = 1
        _str = _str.split(delim)
        return (neg * ((float(_str[-1]) / 60. + float(_str[1])) / 60. + float(_str[0])))

    @staticmethod
    @elementwise
    def euler(ai_in, bi_in, select, b1950=False, dtype='f8'):
        """
        Transform between Galactic, celestial, and ecliptic coordinates.
        Celestial coordinates (RA, Dec) should be given in equinox J2000
        unless the b1950 is True.

        +-------+--------------+------------+----------+----------+-----------+
        |select | From         | To         |   select |   From   |  To       |
        +-------+--------------+------------+----------+----------+-----------+
        |1      |RA-Dec (2000) | Galactic   |     4    | Ecliptic |  RA-Dec   |
        +-------+--------------+------------+----------+----------+-----------+
        |2      |Galactic      | RA-DEC     |     5    | Ecliptic |  Galactic |
        +-------+--------------+------------+----------+----------+-----------+
        |3      |RA-Dec        | Ecliptic   |     6    | Galactic |  Ecliptic |
        +-------+--------------+------------+----------+----------+-----------+

        Parameters
        ----------

        long_in: float, or sequence
            Input Longitude in DEGREES, scalar or vector.

        lat_in: float, or sequence
            Latitude in DEGREES

        select: int
            Integer from 1 to 6 specifying type of coordinate transformation.

        b1950: bool
            set equinox set to 1950


        Returns
        -------
        long_out: float, seq
            Output Longitude in DEGREES

        lat_out: float, seq
            Output Latitude in DEGREES


        .. note::

            Written W. Landsman,  February 1987
            Adapted from Fortran by Daryl Yentis NRL
            Converted to IDL V5.0   W. Landsman   September 1997
            Made J2000 the default, added /FK4 keyword  W. Landsman December 1998
            Add option to specify SELECT as a keyword W. Landsman March 2003
            Converted from IDL to numerical Python: Erin Sheldon, NYU, 2008-07-02
        """

        # Make a copy as an array. ndmin=1 to avoid messed up scalar arrays
        ai = np.array(ai_in, ndmin=1, copy=True, dtype=dtype)
        bi = np.array(bi_in, ndmin=1, copy=True, dtype=dtype)

        PI = math.pi
        # HALFPI = PI / 2.0
        D2R = PI / 180.0
        R2D = 1.0 / D2R

        twopi   = 2.0 * PI
        fourpi  = 4.0 * PI

        #   J2000 coordinate conversions are based on the following constants
        #   (see the Hipparcos explanatory supplement).
        #  eps = 23.4392911111d           Obliquity of the ecliptic
        #  alphaG = 192.85948d            Right Ascension of Galactic North Pole
        #  deltaG = 27.12825d             Declination of Galactic North Pole
        #  lomega = 32.93192d             Galactic longitude of celestial equator
        #  alphaE = 180.02322d            Ecliptic longitude of Galactic North Pole
        #  deltaE = 29.811438523d         Ecliptic latitude of Galactic North Pole
        #  Eomega  = 6.3839743d           Galactic longitude of ecliptic equator
        # Parameters for all the different conversions
        if b1950:
            # equinox = '(B1950)'
            psi    = np.array([ 0.57595865315, 4.9261918136,
                                0.00000000000, 0.0000000000,
                                0.11129056012, 4.7005372834], dtype=dtype)
            stheta = np.array([ 0.88781538514, -0.88781538514,
                                0.39788119938, -0.39788119938,
                                0.86766174755, -0.86766174755], dtype=dtype)
            ctheta = np.array([ 0.46019978478, 0.46019978478,
                                0.91743694670, 0.91743694670,
                                0.49715499774, 0.49715499774], dtype=dtype)
            phi    = np.array([ 4.9261918136,  0.57595865315,
                                0.0000000000, 0.00000000000,
                                4.7005372834, 0.11129056012], dtype=dtype)
        else:
            # equinox = '(J2000)'
            psi    = np.array([ 0.57477043300, 4.9368292465,
                                0.00000000000, 0.0000000000,
                                0.11142137093, 4.71279419371], dtype=dtype)
            stheta = np.array([ 0.88998808748, -0.88998808748,
                                0.39777715593, -0.39777715593,
                                0.86766622025, -0.86766622025], dtype=dtype)
            ctheta = np.array([ 0.45598377618, 0.45598377618,
                                0.91748206207, 0.91748206207,
                                0.49714719172, 0.49714719172], dtype=dtype)
            phi    = np.array([ 4.9368292465,  0.57477043300,
                                0.0000000000, 0.00000000000,
                                4.71279419371, 0.11142137093], dtype=dtype)

        # zero offset
        i  = select - 1
        a  = ai * D2R - phi[i]

        b = bi * D2R
        sb = sin(b)
        cb = cos(b)
        cbsa = cb * sin(a)
        b  = -stheta[i] * cbsa + ctheta[i] * sb
        w, = np.where(b > 1.0)
        if w.size > 0:
            b[w] = 1.0
        bo = arcsin(b) * R2D
        a  = arctan2( ctheta[i] * cbsa + stheta[i] * sb, cb * cos(a) )
        ao = ( (a + psi[i] + fourpi) % twopi) * R2D
        return ao, bo

    @staticmethod
    def sphdist(ra1, dec1, ra2, dec2):
        """measures the spherical distance between 2 points

        Parameters
        ----------
        ra1: float or sequence
            first right ascensions in degrees

        dec1: float or sequence
            first declination in degrees
        ra2: float or sequence
            second right ascensions in degrees
        dec2: float or sequence
            first declination in degrees

        Returns
        -------
        Outputs: float or sequence
            returns a distance in degrees
        """
        dec1_r = deg2rad(dec1)
        dec2_r = deg2rad(dec2)
        return 2. * rad2deg(arcsin(sqrt((sin((dec1_r - dec2_r) / 2)) ** 2 +
                                        cos(dec1_r) * cos(dec2_r) * (
                                            sin((deg2rad(ra1 - ra2)) / 2)) **
                                        2)))

    @staticmethod
    def conesearch(ra0, dec0, ra, dec, r, outtype=0):
        """ Perform a cone search on a table

        Parameters
        ----------
        ra0: ndarray[ndim=1, dtype=float]
            column name to use as RA source in degrees

        dec0: ndarray[ndim=1, dtype=float]
            column name to use as DEC source in degrees

        ra: float
            ra to look for (in degree)

        dec: float
            ra to look for (in degree)

        r: float
            distance in degrees

        outtype: int
            type of outputs
                0 -- minimal, indices of matching coordinates
                1 -- indices and distances of matching coordinates
                2 -- full, boolean filter and distances

        Returns
        -------
        t: tuple
            if outtype is 0:
                only return indices from ra0, dec0
            elif outtype is 1:
                return indices from ra0, dec0 and distances
            elif outtype is 2:
                return conditional vector and distance to all ra0, dec0
        """
        @elementwise
        def getDist( pk ):
            """ get spherical distance between 2 points """
            return AstroHelpers.sphdist(pk[0], pk[1], ra, dec)

        dist = np.array(list(getDist(zip(ra0, dec0))))
        v = (dist <= r)

        if outtype == 0:
            return np.ravel(np.where(v))
        elif outtype == 1:
            return np.ravel(np.where(v)), dist[v]
        else:
            return v, dist


# ==============================================================================
# SimpleTable -- provides table manipulations with limited storage formats
# ==============================================================================
class SimpleTable(object):
    """ Table class that is designed to be the basis of any format wrapping
    around numpy recarrays

    Attributes
    ----------

    fname: str or object
        if str, the file to read from. This may be limited to the format
        currently handled automatically. If the format is not correctly handled,
        you can try by providing an object.__

        if object with a structure like dict, ndarray, or recarray-like
            the data will be encapsulated into a Table

    caseless: bool
        if set, column names will be caseless during operations

    aliases: dict
        set of column aliases (can be defined later :func:`set_alias`)

    units: dict
        set of column units (can be defined later :func:`set_unit`)

    desc: dict
        set of column description or comments (can be defined later :func:`set_comment`)

    header: dict
        key, value pair corresponding to the attributes of the table
    """

    def __init__(self, fname, *args, **kwargs):

        dtype = kwargs.pop('dtype', None)
        dtype = kwargs.pop('format', dtype)
        self.caseless = kwargs.get('caseless', False)
        self._aliases = kwargs.get('aliases', {})
        self._units = kwargs.get('units', {})
        self._desc = kwargs.get('desc', {})

        if (isinstance(fname, (dict, tuple, list, types.GeneratorType))) or (dtype in [dict, 'dict']):
            try:
                self.header = fname.pop('header', {})
            except (AttributeError, TypeError):
                self.header = kwargs.pop('header', {})
            self.data = _convert_dict_to_structured_ndarray(fname)
        elif (type(fname) in basestring) or (dtype is not None):
            if (type(fname) in basestring):
                extension = fname.split('.')[-1]
            else:
                extension = None
            if (extension == 'csv') or dtype == 'csv':
                kwargs.setdefault('delimiter', ',')
                commentedHeader = kwargs.pop('commentedHeader', False)
                n, header, units, comments, aliases, names = _ascii_read_header(fname, commentedHeader=commentedHeader, **kwargs)
                if 'names' in kwargs:
                    n -= 1
                kwargs.setdefault('names', names)
                if _pd is not None:   # pandas is faster
                    kwargs.setdefault('comment', '#')
                    kwargs.setdefault('skiprows', n)
                    self.data = _pd.read_csv(fname, *args, **kwargs).to_records()
                else:
                    kwargs.setdefault('skip_header', n)
                    kwargs.setdefault('comments', '#')
                    self.data = np.recfromcsv(fname, *args, **kwargs)
                self.header = header
                self._units.update(**units)
                self._desc.update(**comments)
                self._aliases.update(**aliases)
                kwargs.setdefault('names', True)
            elif (extension in ('tsv', 'dat', 'txt')) or (dtype in ('tsv', 'dat', 'txt')):
                commentedHeader = kwargs.pop('commentedHeader', True)
                n, header, units, comments, aliases, names = _ascii_read_header(fname, commentedHeader=commentedHeader, **kwargs)
                kwargs.setdefault('names', names)
                if _pd is not None:   # pandas is faster
                    kwargs.setdefault('delimiter', '\s+')
                    kwargs.setdefault('comment', '#')
                    self.data = _pd.read_csv(fname, *args, **kwargs).to_records()
                else:
                    kwargs.setdefault('delimiter', None)
                    kwargs.setdefault('comments', '#')
                    kwargs.setdefault('skip_header', n)
                    self.data = np.recfromtxt(fname, *args, **kwargs)
                self.header = header
                self._units.update(**units)
                self._desc.update(**comments)
                self._aliases.update(**aliases)
            elif (extension == 'fits') or dtype == 'fits':
                if pyfits is None:
                    raise RuntimeError('Cannot read this format, Astropy or pyfits not found')
                if ('extname' not in kwargs) and ('ext' not in kwargs) and (len(args) == 0):
                    args = (1, )
                self.data = np.array(pyfits.getdata(fname, *args, **kwargs))
                header, aliases, units, comments = _fits_read_header(pyfits.getheader(fname, *args, **kwargs))
                self.header = header
                self._desc.update(**comments)
                self._units.update(**units)
                self._aliases.update(**aliases)
            elif (extension in ('hdf5', 'hd5', 'hdf')) or (dtype in ('hdf5', 'hd5', 'hdf')):
                if tables is None:
                    raise RuntimeError('Cannot read this format, pytables not found')
                hdr, aliases, units, desc, data = _hdf5_read_data(fname, *args, **kwargs)
                self.data = data
                self.header = hdr
                self._units.update(**units)
                self._desc.update(**desc)
                self._aliases.update(**aliases)
            elif (extension in ('vot', 'votable')) or (dtype in ('vot', 'votable')):
                # Votable case
                if _astropytable is None:
                    raise RuntimeError('Cannot read this votable format, astropy not found')
                data = _astropytable.read(fname, format='votable', *args, **kwargs)
                units = [(k, data[k].unit.name) for k in data.keys()]
                desc = [(k, data[k].description) for k in data.keys()]
                self.data = data.as_array()
                self.header = {}
                self._units.update(units)
                self._desc.update(desc)
            else:
                raise Exception('Format {0:s} not handled'.format(extension))
        elif type(fname) == np.ndarray:
            self.data = fname
            self.header = {}
        elif type(fname) == pyfits.FITS_rec:
            self.data = np.array(fname)
            self.header = {}
        elif isinstance(fname, SimpleTable):
            cp = kwargs.pop('copy', True)
            if cp:
                self.data = deepcopy(fname.data)
                self.header = deepcopy(fname.header)
                self._aliases = deepcopy(fname._aliases)
                self._units = deepcopy(fname._units)
                self._desc = deepcopy(fname._desc)
            else:
                self.data = fname.data
                self.header = fname.header
                self._aliases = fname._aliases
                self._units = fname._units
                self._desc = fname._desc
        elif hasattr(fname, 'dtype'):
            self.data = np.array(fname)
            self.header = {}
        else:
            raise Exception('Type {0!s:s} not handled'.format(type(fname)))
        if 'NAME' not in self.header:
            if type(fname) not in basestring:
                self.header['NAME'] = 'No Name'
            else:
                self.header['NAME'] = fname

    def pprint_entry(self, num, keys=None):
        """ print one line with key and values properly to be readable

        Parameters
        ----------
        num: int, slice
            indice selection

        keys: sequence or str
            if str, can be a regular expression
            if sequence, the sequence of keys to print
        """
        if (keys is None) or (keys == '*'):
            _keys = self.keys()
        elif type(keys) in basestring:
            _keys = [k for k in (self.keys() + tuple(self._aliases.keys()))
                     if (re.match(keys, k) is not None)]
        else:
            _keys = keys

        length = max(map(len, _keys))
        fmt = '{{0:{0:d}s}}: {{1}}'.format(length)
        data = self[num]

        for k in _keys:
            print(fmt.format(k, data[self.resolve_alias(k)]))

    def pprint(self, idx=None, fields=None, ret=False, all=False,
               full_match=False, headerChar='-', delim=' | ', endline='\n',
               **kwargs):
        """ Pretty print the table content
            you can select the table parts to display using idx to
            select the rows and fields to only display some columns
            (ret is only for insternal use)

        Parameters
        ----------

        idx: sequence, slide
            sub selection to print

        fields: str, sequence
            if str can be a regular expression, and/or list of fields separated
            by spaces or commas

        ret: bool
            if set return the string representation instead of printing the result

        all: bool
            if set, force to show all rows

        headerChar: char
            Character to be used for the row separator line

        delim: char
            The column delimiter.
        """
        if full_match is True:
            fn = re.fullmatch
        else:
            fn = re.match

        if (fields is None) or (fields == '*'):
            _keys = self.keys()
        elif type(fields) in basestring:
            if ',' in fields:
                _fields = fields.split(',')
            elif ' ' in fields:
                _fields = fields.split()
            else:
                _fields = [fields]
            lbls = self.keys() + tuple(self._aliases.keys())
            _keys = []
            for _fk in _fields:
                _keys += [k for k in lbls if (fn(_fk, k) is not None)]
        else:
            lbls = self.keys() + tuple(self._aliases.keys())
            _keys = []
            for _fk in _fields:
                _keys += [k for k in lbls if (fn(_fk, k) is not None)]

        nfields = len(_keys)

        fields = list(map( self.resolve_alias, _keys ))

        if idx is None:
            if (self.nrows < 10) or all:
                rows = [ [ str(self[k][rk]) for k in _keys ] for rk in range(self.nrows)]
            else:
                _idx = range(6)
                rows = [ [ str(self[k][rk]) for k in _keys ] for rk in range(5) ]
                if nfields > 1:
                    rows += [ ['...' for k in range(nfields) ] ]
                else:
                    rows += [ ['...' for k in range(nfields) ] ]
                rows += [ [ str(self[k][rk]) for k in fields ] for rk in range(-5, 0)]
        elif isinstance(idx, slice):
            _idx = range(idx.start, idx.stop, idx.step or 1)
            rows = [ [ str(self[k][rk]) for k in fields ] for rk in _idx]
        else:
            rows = [ [ str(self[k][rk]) for k in fields ] for rk in idx]

        if len(self._units) == 0:
            units = None
        else:
            units = [ '(' + str( self._units.get(k, None) or '') + ')' for k in fields ]

        out = __indent__(rows, header=_keys, units=units, delim=delim,
                         headerChar=headerChar, endline=endline)
        if ret is True:
            return out
        else:
            print(out)

    def write(self, fname, **kwargs):
        """ write table into file

        Parameters
        ----------
        fname: str
            filename to export the table into

        .. note::
            additional keywords are forwarded to the corresponding libraries
            :func:`pyfits.writeto` or :func:`pyfits.append`
            :func:`np.savetxt`
        """
        extension = kwargs.pop('extension', None)
        if extension is None:
            extension = fname.split('.')[-1]
        if (extension == 'csv'):
            comments = kwargs.pop('comments', '#')
            delimiter = kwargs.pop('delimiter', ',')
            commentedHeader = kwargs.pop('commentedHeader', False)
            hdr = _ascii_generate_header(self, comments=comments, delimiter=delimiter,
                                         commentedHeader=commentedHeader)
            header = kwargs.pop('header', hdr)
            np.savetxt(fname, self.data, delimiter=delimiter, header=header,
                       comments='', **kwargs)
        elif (extension in ['txt', 'dat']):
            comments = kwargs.pop('comments', '#')
            delimiter = kwargs.pop('delimiter', ' ')
            commentedHeader = kwargs.pop('commentedHeader', True)
            hdr = _ascii_generate_header(self, comments=comments, delimiter=delimiter,
                                         commentedHeader=commentedHeader)
            header = kwargs.pop('header', hdr)
            np.savetxt(fname, self.data, delimiter=delimiter, header=header,
                       comments='', **kwargs)
        elif (extension == 'fits'):
            hdr0 = kwargs.pop('header', None)
            append = kwargs.pop('append', False)
            hdr = _fits_generate_header(self)
            if hdr0 is not None:
                hdr.update(**hdr0)
            if append:
                _fits_append(fname, self.data, hdr, **kwargs)
            else:
                # patched version to correctly include the header
                _fits_writeto(fname, self.data, hdr, **kwargs)
        elif (extension in ('hdf', 'hdf5', 'hd5')):
            _hdf5_write_data(fname, self.data, header=self.header,
                             units=self._units, comments=self._desc,
                             aliases=self._aliases, **kwargs)
        else:
            raise Exception('Format {0:s} not handled'.format(extension))

    def to_records(self, **kwargs):
        """ Construct a numpy record array from this dataframe """
        return self.data

    def to_pandas(self, **kwargs):
        """ Construct a pandas dataframe

        Parameters
        ----------
        data : ndarray 
            (structured dtype), list of tuples, dict, or DataFrame
        keys: sequence, optional
            ordered subset of columns to export
        index : string, list of fields, array-like
            Field of array to use as the index, alternately a specific set of
            input labels to use
        exclude : sequence, default None
            Columns or fields to exclude
        columns : sequence, default None
            Column names to use. If the passed data do not have names
            associated with them, this argument provides names for the
            columns. Otherwise this argument indicates the order of the columns
            in the result (any names not found in the data will become all-NA
            columns)
        coerce_float : boolean, default False
            Attempt to convert values to non-string, non-numeric objects (like
            decimal.Decimal) to floating point, useful for SQL result sets

        Returns
        -------
        df : DataFrame
        """
        try:
            from pandas import DataFrame
            keys = kwargs.pop('keys', None)
            return DataFrame.from_dict(self.to_dict(keys=keys), **kwargs)
        except ImportError as error:
            print("Pandas import error")
            raise error

    def to_dict(self, keys=None, contiguous=False):
        """ Construct a dictionary from this dataframe with contiguous arrays

        Parameters
        ----------
        keys: sequence, optional
            ordered subset of columns to export

        contiguous: boolean
            make sure each value is a contiguous numpy array object
            (C-aligned)

        Returns
        -------
        data: dict
            converted data
        """
        if keys is None:
            keys = self.keys()
        if contiguous:
            return {k: np.ascontiguousarray(self[k]) for k in keys}
        return {k: self[k] for k in keys}

    def to_xarray(self, **kwargs):
        """ Construct an xarray dataset

        Each column will be converted into an independent variable in the
        Dataset. If the dataframe's index is a MultiIndex, it will be expanded
        into a tensor product of one-dimensional indices (filling in missing
        values with NaN). This method will produce a Dataset very similar to
        that on which the 'to_dataframe' method was called, except with
        possibly redundant dimensions (since all dataset variables will have
        the same dimensionality).
        """
        try:
            from xarray import Dataset
            return Dataset.from_dataframe(self.to_pandas(**kwargs))
        except ImportError as error:
            print("xray import error")
            raise error

    def to_vaex(self, **kwargs):
        """
        Create an in memory Vaex dataset

        Parameters
        ----------
        name: str
            unique for the dataset
        keys: sequence, optional
            ordered subset of columns to export

        Returns
        -------
        df: vaex.DataSetArrays
            vaex dataset
        """
        try:
            import vaex
            return vaex.from_arrays(**self.to_dict(contiguous=True, **kwargs))
        except ImportError as error:
            print("Vaex import error")
            raise error

    def to_dask(self, **kwargs):
        """ Construct a Dask DataFrame

        This splits an in-memory Pandas dataframe into several parts and constructs
        a dask.dataframe from those parts on which Dask.dataframe can operate in
        parallel.

        Note that, despite parallelism, Dask.dataframe may not always be faster
        than Pandas.  We recommend that you stay with Pandas for as long as
        possible before switching to Dask.dataframe.

        Parameters
        ----------
        keys: sequence, optional
            ordered subset of columns to export
        npartitions : int, optional
            The number of partitions of the index to create. Note that depending on
            the size and index of the dataframe, the output may have fewer
            partitions than requested.
        chunksize : int, optional
            The size of the partitions of the index.
        sort: bool
            Sort input first to obtain cleanly divided partitions or don't sort and
            don't get cleanly divided partitions
        name: string, optional
            An optional keyname for the dataframe.  Defaults to hashing the input

        Returns
        -------
        dask.DataFrame or dask.Series
            A dask DataFrame/Series partitioned along the index
        """
        try:
            from dask import dataframe
            keys = kwargs.pop('keys', None)
            return dataframe.from_pandas(self.to_pandas(keys=keys), **kwargs)
        except ImportError as error:
            print("Dask import error")
            raise error

    def to_astropy_table(self, **kwargs):
        """
        A class to represent tables of heterogeneous data.

        `astropy.table.Table` provides a class for heterogeneous tabular data,
        making use of a `numpy` structured array internally to store the data
        values.  A key enhancement provided by the `Table` class is the ability
        to easily modify the structure of the table by adding or removing
        columns, or adding new rows of data.  In addition table and column
        metadata are fully supported.

        Parameters
        ----------
        masked : bool, optional
            Specify whether the table is masked.
        names : list, optional
            Specify column names
        dtype : list, optional
            Specify column data types
        meta : dict, optional
            Metadata associated with the table.
        copy : bool, optional
            Copy the input data (default=True).
        rows : numpy ndarray, list of lists, optional
            Row-oriented data for table instead of ``data`` argument
        copy_indices : bool, optional
            Copy any indices in the input data (default=True)
        **kwargs : dict, optional
            Additional keyword args when converting table-like object

        Returns
        -------
        df: astropy.table.Table
            dataframe
        """
        try:
            from astropy.table import Table
            keys = kwargs.pop('keys', None)
            return Table(self.to_records(keys=keys), **kwargs)
        except ImportError as e:
            print("Astropy import error")
            raise e

    def _repr_html_(self):
        return self.to_pandas().head()._repr_html_()

    def set_alias(self, alias, colname):
        """
        Define an alias to a column

        Parameters
        ----------
        alias: str
            The new alias of the column

        colname: str
            The column being aliased
        """
        if (colname not in self.keys()):
            raise KeyError("Column {0:s} does not exist".format(colname))
        self._aliases[alias] = colname

    def reverse_alias(self, colname):
        """
        Return aliases of a given column.

        Given a colname, return a sequence of aliases associated to this column
        Aliases are defined by using .define_alias()
        """
        _colname = self.resolve_alias(colname)
        if (_colname not in self.keys()):
            raise KeyError("Column {0:s} does not exist".format(colname))

        return tuple([ k for (k, v) in self._aliases.iteritems() if (v == _colname) ])

    def resolve_alias(self, colname):
        """
        Return the name of an aliased column.

        Given an alias, return the column name it aliases. This
        function is a no-op if the alias is a column name itself.

        Aliases are defined by using .define_alias()
        """
        # User aliases
        if hasattr(colname, '__iter__') & (type(colname) not in basestring):
            return [ self.resolve_alias(k) for k in colname ]
        else:
            if self.caseless is True:
                maps = dict( [ (k.lower(), v) for k, v in self._aliases.items() ] )
                maps.update( (k.lower(), k) for k in self.keys() )
                return maps.get(colname.lower(), colname)
            else:
                return self._aliases.get(colname, colname)

    def set_unit(self, colname, unit):
        """ Set the unit of a column referenced by its name

        Parameters
        ----------
        colname: str
            column name or registered alias

        unit: str
            unit description
        """
        if isinstance(unit, basestring) and isinstance(colname, basestring):
            self._units[self.resolve_alias(colname)] = str(unit)
        else:
            for k, v in zip(colname, unit):
                self._units[self.resolve_alias(k)] = str(v)

    def set_comment(self, colname, comment):
        """ Set the comment of a column referenced by its name

        Parameters
        ----------
        colname: str
            column name or registered alias

        comment: str
            column description
        """
        if isinstance(comment, basestring) and isinstance(colname, basestring):
            self._desc[self.resolve_alias(colname)] = str(comment)
        else:
            for k, v in zip(colname, comment):
                self._desc[self.resolve_alias(k)] = str(v)

    def keys(self, regexp=None, full_match=False):
        """
        Return the data column names or a subset of it

        Parameters
        ----------
        regexp: str
            pattern to filter the keys with

        full_match: bool
            if set, use :func:`re.fullmatch` instead of :func:`re.match`

        Try to apply the pattern at the start of the string, returning
        a match object, or None if no match was found.

        returns
        -------
        seq: sequence
            sequence of keys
        """
        if (regexp is None) or (regexp == '*'):
            return self.colnames
        elif type(regexp) in basestring:
            if full_match is True:
                fn = re.fullmatch
            else:
                fn = re.match

            if regexp.count(',') > 0:
                _re = regexp.split(',')
            elif regexp.count(' ') > 0:
                _re = regexp.split()
            else:
                _re = [regexp]

            lbls = self.colnames + tuple(self._aliases.keys())
            _keys = []
            for _rk in _re:
                _keys += [k for k in lbls if (fn(_rk, k) is not None)]

            return _keys
        elif hasattr(regexp, '__iter__'):
            _keys = []
            for k in regexp:
                _keys += self.keys(k)
            return _keys
        else:
            raise ValueError('Unexpected type {0} for regexp'.format(type(regexp)))

    @property
    def name(self):
        """ name of the table given by the Header['NAME'] attribute """
        return self.header.get('NAME', None)

    @property
    def colnames(self):
        """ Sequence of column names """
        return self.data.dtype.names

    @property
    def ncols(self):
        """ number of columns """
        return len(self.colnames)

    @property
    def nrows(self):
        """ number of lines """
        return len(self.data)

    @property
    def nbytes(self):
        """ number of bytes of the object """
        n = sum(k.nbytes if hasattr(k, 'nbytes') else sys.getsizeof(k)
                for k in self.__dict__.values())
        return n

    def __len__(self):
        """ number of lines """
        return self.nrows

    @property
    def shape(self):
        """ shape of the data """
        return self.data.shape

    @property
    def dtype(self):
        """ dtype of the data """
        return self.data.dtype

    @property
    def Plotter(self):
        """ Plotter instance related to this dataset.
        Requires plotter add-on to work """
        if Plotter is None:
            raise AttributeError('the add-on was not found, this property is not available')
        else:
            return Plotter(self, label=self.name)

    def __getitem__(self, v):
        return np.asarray(self.data.__getitem__(self.resolve_alias(v)))

    def take(self, indices, axis=None, out=None, mode='raise'):
        """
        Take elements from an array along an axis.

        This function does the same thing as "fancy" indexing (indexing arrays
        using arrays); however, it can be easier to use if you need elements
        along a given axis.

        Parameters
        ----------
        indices : array_like
            The indices of the values to extract.
            Also allow scalars for indices.

        axis : int, optional
            The axis over which to select values. By default, the flattened
            input array is used.

        out : ndarray, optional
            If provided, the result will be placed in this array. It should
            be of the appropriate shape and dtype.

        mode : {'raise', 'wrap', 'clip'}, optional
            Specifies how out-of-bounds indices will behave.

            * 'raise' -- raise an error (default)
            * 'wrap' -- wrap around
            * 'clip' -- clip to the range

            'clip' mode means that all indices that are too large are replaced
            by the index that addresses the last element along that axis. Note
            that this disables indexing with negative numbers.

        Returns
        -------
        subarray : ndarray
            The returned array has the same type as `a`.
        """
        return self.data.take(indices, axis, out, mode)

    def compress(self, condition, axis=None, out=None):
        """
        Return selected slices of an array along given axis.

        When working along a given axis, a slice along that axis is returned in
        `output` for each index where `condition` evaluates to True. When
        working on a 1-D array, `compress` is equivalent to `extract`.

        Parameters
        ----------
        condition : 1-D array of bools
            Array that selects which entries to return. If len(condition)
            is less than the size of `a` along the given axis, then output is
            truncated to the length of the condition array.

        axis : int, optional
            Axis along which to take slices. If None (default), work on the
            flattened array.

        out : ndarray, optional
            Output array.  Its type is preserved and it must be of the right
            shape to hold the output.

        Returns
        -------
        compressed_array : ndarray
            A copy of `a` without the slices along axis for which `condition`
            is false.
        """
        return self.data.compress(condition, axis, out)

    def get(self, v, full_match=False):
        """ returns a table from columns given as v

        this function is equivalent to :func:`__getitem__` but preserve the
        Table format and associated properties (units, description, header)

        Parameters
        ----------
        v: str
            pattern to filter the keys with

        full_match: bool
            if set, use :func:`re.fullmatch` instead of :func:`re.match`

        """
        new_keys = self.keys(v)
        t = self.__class__(self[new_keys])
        t.header.update(**self.header)
        t._aliases.update((k, v) for (k, v) in self._aliases.items() if v in new_keys)
        t._units.update((k, v) for (k, v) in self._units.items() if v in new_keys)
        t._desc.update((k, v) for (k, v) in self._desc.items() if v in new_keys)
        return t

    def __setitem__(self, k, v):
        if k in self:
            return self.data.__setitem__(self.resolve_alias(k), v)
        else:
            object.__setitem__(self, k, v)

    def __getattr__(self, k):
        try:
            return self.data.__getitem__(self.resolve_alias(k))
        except:
            return object.__getattribute__(self, k)

    def __iter__(self):
        newtab = self.select('*', [0])
        for d in self.data:
            newtab.data[0] = d
            yield newtab
        # return self.data.__iter__()

    def iterkeys(self):
        """ Iterator over the columns of the table """
        for k in self.colnames:
            yield k

    def itervalues(self):
        """ Iterator over the lines of the table """
        for l in self.data:
            yield l

    def items(self):
        """ Iterator on the (key, value) pairs """
        for k in self.colnames:
            yield k, self[k]

    def info(self):
        """ prints information on the table """
        s = "\nTable: {name:s}\n       nrows={s.nrows:d}, ncols={s.ncols:d}, mem={size:s}"
        s = s.format(name=self.header.get('NAME', 'Noname'), s=self,
                     size=pretty_size_print(self.nbytes))

        s += '\n\nHeader:\n'
        vals = list(self.header.items())
        length = max(map(len, self.header.keys()))
        fmt = '\t{{0:{0:d}s}} {{1}}\n'.format(length)
        for k, v in vals:
            s += fmt.format(k, v)

        vals = [(k, self._units.get(k, ''), self._desc.get(k, ''))
                for k in self.colnames]
        lengths = [(len(k), len(self._units.get(k, '')), len(self._desc.get(k, '')))
                   for k in self.colnames]
        lengths = list(map(max, (zip(*lengths))))

        s += '\nColumns:\n'

        fmt = '\t{{0:{0:d}s}} {{1:{1:d}s}} {{2:{2:d}s}}\n'.format(*(k + 1 for k in lengths))
        for k, u, c in vals:
            s += fmt.format(k, u, c)

        print(s)

        if len(self._aliases) > 0:
            print("\nTable contains alias(es):")
            for k, v in self._aliases.items():
                print('\t{0:s} --> {1:s}'.format(k, v))

    def __repr__(self):
        s = object.__repr__(self)
        s += "\nTable: {name:s}\n       nrows={s.nrows:d}, ncols={s.ncols:d}, mem={size:s}"
        return s.format(name=self.header.get('NAME', 'Noname'), s=self,
                        size=pretty_size_print(self.nbytes))

    def __getslice__(self, i, j):
        return self.data.__getslice__(i, j)

    def __contains__(self, k):
        if hasattr(k, 'decode'):
            _k = k.decode('utf8')
        else:
            _k = k
        return (_k in self.colnames) or (_k in self._aliases)

    def __array__(self):
        return self.data

    def __call__(self, *args, **kwargs):
        if (len(args) > 0) or (len(kwargs) > 0):
            return self.evalexpr(*args, **kwargs)
        else:
            return self.info()

    def sort(self, keys, copy=False):
        """
        Sort the table inplace according to one or more keys. This operates on
        the existing table (and does not return a new table).

        Parameters
        ----------

        keys: str or seq(str)
            The key(s) to order by

        copy: bool
            if set returns a sorted copy instead of working inplace
        """
        if not hasattr(keys, '__iter__'):
            keys = [keys]

        if copy is False:
            self.data.sort(order=keys)
        else:
            t = self.__class__(self, copy=True)
            t.sort(keys, copy=False)
            return t

    def match(self, r2, key):
        """ Returns the indices at which the tables match
        matching uses 2 columns that are compared in values

        Parameters
        ----------
        r2:  Table
            second table to use

        key: str
            fields used for comparison.

        Returns
        -------
        indexes: tuple
            tuple of both indices list where the two columns match.
        """
        return np.where( np.equal.outer( self[key], r2[key] ) )

    def stack(self, r, *args, **kwargs):
        """
        Superposes arrays fields by fields inplace

        t.stack(t1, t2, t3, default=None, inplace=True)

        Parameters
        ----------
        r: Table
        """
        if not hasattr(r, 'data'):
            raise AttributeError('r should be a Table object')
        defaults = kwargs.get('defaults', None)
        inplace = kwargs.get('inplace', False)

        data = [self.data, r.data] + [k.data for k in args]
        sdata = recfunctions.stack_arrays(data, defaults, usemask=False,
                                          asrecarray=True)

        if inplace:
            self.data = sdata
        else:
            t = self.__class__(self)
            t.data = sdata
            return t

    def join_by(self, r2, key, jointype='inner', r1postfix='1', r2postfix='2',
                defaults=None, asrecarray=False, asTable=True):
        """
        Join arrays `r1` and `r2` on key `key`.

        The key should be either a string or a sequence of string corresponding
        to the fields used to join the array.
        An exception is raised if the `key` field cannot be found in the two input
        arrays.
        Neither `r1` nor `r2` should have any duplicates along `key`: the presence
        of duplicates will make the output quite unreliable. Note that duplicates
        are not looked for by the algorithm.

        Parameters
        ----------
        key: str or seq(str)
            corresponding to the fields used for comparison.

        r2: Table
            Table to join with

        jointype: str in {'inner', 'outer', 'leftouter'}
            * 'inner'     : returns the elements common to both r1 and r2.
            * 'outer'     : returns the common elements as well as the elements of r1 not in r2 and the elements of not in r2.
            * 'leftouter' : returns the common elements and the elements of r1 not in r2.

        r1postfix: str
            String appended to the names of the fields of r1 that are present in r2

        r2postfix:  str
            String appended to the names of the fields of r2 that are present in r1

        defaults:   dict
            Dictionary mapping field names to the corresponding default values.

        Returns
        -------
        tab: Table
            joined table

        .. note::

            * The output is sorted along the key.

            * A temporary array is formed by dropping the fields not in the key
              for the two arrays and concatenating the result. This array is
              then sorted, and the common entries selected. The output is
              constructed by filling the fields with the selected entries.
              Matching is not preserved if there are some duplicates...
        """
        arr = recfunctions.join_by(key, self.data, r2.data, jointype=jointype,
                                   r1postfix=r1postfix, r2postfix=r2postfix,
                                   defaults=defaults, usemask=False,
                                   asrecarray=True)

        return SimpleTable(arr)

    @property
    def empty_row(self):
        """ Return an empty row array respecting the table format """
        return np.rec.recarray(shape=(1,), dtype=self.data.dtype)

    def add_column(self, name, data, dtype=None, unit=None, description=None):
        """
        Add one or multiple columns to the table

        Parameters
        ----------
        name: str or sequence(str)
           The name(s) of the column(s) to add

        data: ndarray, or sequence of ndarray
            The column data, or sequence of columns

        dtype: dtype
            numpy dtype for the data to add

        unit: str
            The unit of the values in the column

        description: str
            A description of the content of the column
        """

        _data = np.array(data, dtype=dtype)
        dtype = _data.dtype

        # unknown type is converted to text
        if dtype.type == np.object_:
            if len(data) == 0:
                longest = 0
            else:
                longest = len(max(data, key=len))
                _data = np.asarray(data, dtype='|%iS' % longest)

        dtype = _data.dtype

        if len(self.data.dtype) > 0:
            # existing data in the table
            if type(name) in basestring:
                # _name = name.encode('utf8')
                _name = str(name)
            else:
                # _name = [k.encode('utf8') for k in name]
                _name = [str(k) for k in name]

            self.data = recfunctions.append_fields(self.data, _name, _data,
                                                   dtypes=dtype, usemask=False,
                                                   asrecarray=True)

        else:
            if _data.ndim > 1:
                newdtype = (str(name), _data.dtype, (_data.shape[1],))
            else:
                newdtype = (str(name), _data.dtype)
            self.data = np.array(_data, dtype=[newdtype])

        if unit is not None:
            self.set_unit(name, unit)

        if description is not None:
            self.set_comment(name, description)

    def append_row(self, iterable):
        """
        Append one row in this table.

        see also: :func:`stack`

        Parameters
        ----------
        iterable: iterable
            line to add
        """
        if (len(iterable) != self.ncols):
            raise AttributeError('Expecting as many items as columns')
        r = self.empty_row
        for k, v in enumerate(iterable):
            r[0][k] = v
        self.stack(r)

    def remove_columns(self, names):
        """
        Remove several columns from the table

        Parameters
        ----------
        names: sequence
            A list containing the names of the columns to remove
        """
        self.pop_columns(names)

    def pop_columns(self, names):
        """
        Pop several columns from the table

        Parameters
        ----------

        names: sequence
            A list containing the names of the columns to remove

        Returns
        -------

        values: tuple
            list of columns
        """

        if not hasattr(names, '__iter__') or type(names) in basestring:
            names = [names]

        p = [self[k] for k in names]

        _names = set([ self.resolve_alias(k) for k in names ])
        self.data = recfunctions.drop_fields(self.data, _names)
        for k in names:
            self._aliases.pop(k, None)
            self._units.pop(k, None)
            self._desc.pop(k, None)

        return p

    def find_duplicate(self, index_only=False, values_only=False):
        """Find duplication in the table entries, return a list of duplicated
        elements Only works at this time is 2 lines are *the same entry* not if
        2 lines have *the same values*
        """
        dup = []
        idd = []
        for i in range(len(self.data)):
            if (self.data[i] in self.data[i + 1:]):
                if (self.data[i] not in dup):
                    dup.append(self.data[i])
                    idd.append(i)
        if index_only:
            return idd
        elif values_only:
            return dup
        else:
            return zip(idd, dup)

    def evalexpr(self, expr, exprvars=None, dtype=float):
        """ evaluate expression based on the data and external variables
            all np function can be used (log, exp, pi...)

        Parameters
        ----------
        expr: str
            expression to evaluate on the table
            includes mathematical operations and attribute names

        exprvars: dictionary, optional
            A dictionary that replaces the local operands in current frame.

        dtype: dtype definition
            dtype of the output array

        Returns
        -------
        out : NumPy array
            array of the result
        """
        _globals = {}
        for k in ( list(self.colnames) + list(self._aliases.keys()) ):
            if k in expr:
                _globals[k] = self[k]

        if exprvars is not None:
            if (not (hasattr(exprvars, 'keys') & hasattr(exprvars, '__getitem__' ))):
                raise AttributeError("Expecting a dictionary-like as condvars")
            for k, v in ( exprvars.items() ):
                _globals[k] = v

        # evaluate expression, to obtain the final filter
        r    = np.empty( self.nrows, dtype=dtype)
        r[:] = eval(expr, _globals, np.__dict__)

        return r

    def where(self, condition, condvars=None, *args, **kwargs):
        """ Read table data fulfilling the given `condition`.
        Only the rows fulfilling the `condition` are included in the result.

        Parameters
        ----------
        condition: str
            expression to evaluate on the table
            includes mathematical operations and attribute names

        condvars: dictionary, optional
            A dictionary that replaces the local operands in current frame.

        Returns
        -------
        out: ndarray/ tuple of ndarrays
        result equivalent to :func:`np.where`

        """
        ind = np.where(self.evalexpr(condition, condvars, dtype=bool ), *args, **kwargs)
        return ind

    def select(self, fields, indices=None, **kwargs):
        """
        Select only a few fields in the table

        Parameters
        ----------
        fields: str or sequence
            fields to keep in the resulting table

        indices: sequence or slice
            extract only on these indices

        returns
        -------
        tab: SimpleTable instance
            resulting table
        """
        _fields = self.keys(fields)

        if fields == '*':
            if indices is None:
                return self
            else:
                tab = self.__class__(self[indices])
                for k in self.__dict__.keys():
                    if k not in ('data', ):
                        setattr(tab, k, deepcopy(self.__dict__[k]))
                return tab
        else:
            d = {}
            for k in _fields:
                _k = self.resolve_alias(k)
                if indices is not None:
                    d[k] = self[_k][indices]
                else:
                    d[k] = self[_k]
            d['header'] = deepcopy(self.header)
            tab = self.__class__(d)
            for k in self.__dict__.keys():
                if k not in ('data', ):
                    setattr(tab, k, deepcopy(self.__dict__[k]))
            return tab

    def selectWhere(self, fields, condition, condvars=None, **kwargs):
        """ Read table data fulfilling the given `condition`.
            Only the rows fulfilling the `condition` are included in the result.

        Parameters
        ----------
        fields: str or sequence
            fields to keep in the resulting table

        condition: str
            expression to evaluate on the table
            includes mathematical operations and attribute names

        condvars: dictionary, optional
            A dictionary that replaces the local operands in current frame.

        Returns
        -------
        tab: SimpleTable instance
            resulting table
        """
        if condition in [True, 'True', None]:
            ind = None
        else:
            ind = self.where(condition, condvars, **kwargs)[0]

        tab = self.select(fields, indices=ind)

        return tab

    def groupby(self, *key):
        """
        Create an iterator which returns (key, sub-table) grouped by each value
        of key(value)

        Parameters
        ----------
        key: str
            expression or pattern to filter the keys with

        Returns
        -------
        key: str or sequence
            group key

        tab: SimpleTable instance
           sub-table of the group
           header, aliases and column metadata are preserved (linked to the
           master table).
        """
        _key = self.keys(key)
        getter = operator.itemgetter(*_key)

        for k, grp in itertools.groupby(self.data, getter):
            t = self.__class__(np.dstack(grp))
            t.header = self.header
            t._aliases = self._aliases
            t._units = self._units
            t._desc = self._desc
            yield (k, t)

    def stats(self, fn=None, fields=None, fill=None):
        """ Make statistics on columns of a table

        Parameters
        ----------
        fn: callable or sequence of callables
            functions to apply to each column
            default: (np.mean, np.std, np.nanmin, np.nanmax)

        fields: str or sequence
            any key or key expression to subselect columns
            default is all columns

        fill: value
            value when not applicable
            default np.nan

        returns
        -------
        tab: Table instance
            collection of statistics, one column per function in fn and 1 ligne
            per column in the table
        """
        from collections import OrderedDict

        if fn is None:
            fn = (stats.mean, stats.std,
                stats.min, stats.max,
                stats.has_nan)

        d = OrderedDict()
        d.setdefault('FIELD', [])
        for k in fn:
            d.setdefault(k.__name__, [])

        if fields is None:
            fields = self.colnames
        else:
            fields = self.keys(fields)

        if fill is None:
            fill = np.nan

        for k in fields:
            d['FIELD'].append(k)
            for fnk in fn:
                try:
                    val = fnk(self[k])
                except:
                    val = fill
                d[fnk.__name__].append(val)

        return self.__class__(d, dtype=dict)

    # method aliases
    remove_column = remove_columns

    # deprecated methods
    addCol = add_column
    addLine = append_row
    setComment = set_comment
    setUnit = set_unit
    delCol = remove_columns


class AstroTable(SimpleTable):
    """
    Derived from the Table, this class add implementations of common astro
    tools especially conesearch
    """
    def __init__(self, *args, **kwargs):
        super(self.__class__, self).__init__(*args, **kwargs)
        self._ra_name, self._dec_name = self.__autoRADEC__()
        if (len(args) > 0):
            if isinstance(args[0], AstroTable):
                self._ra_name = args[0]._ra_name
                self._dec_name = args[0]._dec_name
        self._ra_name = kwargs.get('ra_name', self._ra_name)
        self._dec_name = kwargs.get('dec_name', self._dec_name)

    def __autoRADEC__(self):
        """ Tries to identify the columns containing RA and DEC coordinates """
        if 'ra' in self:
            ra_name = 'ra'
        elif 'RA' in self:
            ra_name = 'RA'
        else:
            ra_name = None
        if 'dec' in self:
            dec_name = 'dec'
        elif 'DEC' in self:
            dec_name = 'DEC'
        else:
            dec_name = None
        return ra_name, dec_name

    def set_RA(self, val):
        """ Set the column that defines RA coordinates """
        assert(val in self), 'column name {} not found in the table'.format(val)
        self._ra_name = val

    def set_DEC(self, val):
        """ Set the column that defines DEC coordinates """
        assert(val in self), 'column name {} not found in the table'.format(val)
        self._dec_name = val

    def get_RA(self, degree=True):
        """ Returns RA, converted from hexa/sexa into degrees """
        if self._ra_name is None:
            return None
        if (not degree) or (self.dtype[self._ra_name].kind != 'S'):
            return self[self._ra_name]
        else:
            if (len(str(self[0][self._ra_name]).split(':')) == 3):
                return np.asarray(AstroHelpers.hms2deg(self[self._ra_name],
                                                       delim=':'))
            elif (len(str(self[0][self._ra_name]).split(' ')) == 3):
                return np.asarray(AstroHelpers.hms2deg(self[self._ra_name],
                                                       delim=' '))
            else:
                raise Exception('RA Format not understood')

    def get_DEC(self, degree=True):
        """ Returns RA, converted from hexa/sexa into degrees """
        if self._dec_name is None:
            return None
        if (not degree) or (self.dtype[self._dec_name].kind != 'S'):
            return self[self._dec_name]
        else:
            if (len(str(self[0][self._dec_name]).split(':')) == 3):
                return np.asarray(AstroHelpers.dms2deg(self[self._dec_name],
                                                       delim=':'))
            elif (len(str(self[0][self._dec_name]).split(' ')) == 3):
                return np.asarray(AstroHelpers.dms2deg(self[self._dec_name],
                                                       delim=' '))
            else:
                raise Exception('RA Format not understood')

    def info(self):
        s = "\nTable: {name:s}\n       nrows={s.nrows:d}, ncols={s.ncols:d}, mem={size:s}"
        s = s.format(name=self.header.get('NAME', 'Noname'), s=self,
                     size=pretty_size_print(self.nbytes))

        s += '\n\nHeader:\n'
        vals = list(self.header.items())
        length = max(map(len, self.header.keys()))
        fmt = '\t{{0:{0:d}s}} {{1}}\n'.format(length)
        for k, v in vals:
            s += fmt.format(k, v)

        vals = [(k, self._units.get(k, ''), self._desc.get(k, ''))
                for k in self.colnames]
        lengths = [(len(k), len(self._units.get(k, '')), len(self._desc.get(k, '')))
                   for k in self.colnames]
        lengths = list(map(max, (zip(*lengths))))

        if (self._ra_name is not None) & (self._dec_name is not None):
            s += "\nPosition coordinate columns: {0}, {1}\n".format(self._ra_name,
                                                                    self._dec_name)

        s += '\nColumns:\n'

        fmt = '\t{{0:{0:d}s}} {{1:{1:d}s}} {{2:{2:d}s}}\n'.format(*(k + 1 for k in lengths))
        for k, u, c in vals:
            s += fmt.format(k, u, c)

        print(s)

        if len(self._aliases) > 0:
            print("\nTable contains alias(es):")
            for k, v in self._aliases.items():
                print('\t{0:s} --> {1:s}'.format(k, v))

    def coneSearch(self, ra, dec, r, outtype=0):
        """ Perform a cone search on a table

        Parameters
        ----------
        ra0: ndarray[ndim=1, dtype=float]
            column name to use as RA source in degrees

        dec0: ndarray[ndim=1, dtype=float]
            column name to use as DEC source in degrees

        ra: float
            ra to look for (in degree)

        dec: float
            ra to look for (in degree)

        r: float
            distance in degrees

        outtype: int
            type of outputs
                0 -- minimal, indices of matching coordinates
                1 -- indices and distances of matching coordinates
                2 -- full, boolean filter and distances

        Returns
        -------
        t: tuple
            if outtype is 0:
                only return indices from ra0, dec0
            elif outtype is 1:
                return indices from ra0, dec0 and distances
            elif outtype is 2:
                return conditional vector and distance to all ra0, dec0
        """
        if (self._ra_name is None) or (self._dec_name is None):
            raise AttributeError('Coordinate columns not set.')

        ra0  = self.get_RA()
        dec0 = self.get_DEC()
        return AstroHelpers.conesearch(ra0, dec0, ra, dec, r, outtype=outtype)

    def zoneSearch(self, ramin, ramax, decmin, decmax, outtype=0):
        """ Perform a zone search on a table, i.e., a rectangular selection

        Parameters
        ----------
        ramin: float
            minimal value of RA

        ramax: float
            maximal value of RA

        decmin: float
            minimal value of DEC

        decmax: float
            maximal value of DEC

        outtype: int
            type of outputs
                0 or 1 -- minimal, indices of matching coordinates
                2 -- full, boolean filter and distances

        Returns
        -------
        r: sequence
            indices or conditional sequence of matching values
        """

        assert( (self._ra_name is not None) & (self._dec_name is not None) ), 'Coordinate columns not set.'

        ra0  = self.get_RA()
        dec0 = self.get_DEC()
        ind = (ra0 >= ramin) & (ra0 <= ramax) & (dec0 >= decmin) & (dec0 <= decmax)
        if outtype <= 2:
            return ind
        else:
            return np.where(ind)

    def where(self, condition=None, condvars=None, cone=None, zone=None, **kwargs):
        """ Read table data fulfilling the given `condition`.
        Only the rows fulfilling the `condition` are included in the result.

        Parameters
        ----------
        condition: str
            expression to evaluate on the table
            includes mathematical operations and attribute names

        condvars: dictionary, optional
            A dictionary that replaces the local operands in current frame.

        Returns
        -------
        out: ndarray/ tuple of ndarrays
        result equivalent to :func:`np.where`
        """
        if cone is not None:
            if len(cone) != 3:
                raise ValueError('Expecting cone keywords as a triplet (ra, dec, r)')
        if zone is not None:
            if len(zone) != 4:
                raise ValueError('Expecting zone keywords as a tuple of 4 elements (ramin, ramax, decmin, decmax)')

        if condition is not None:
            ind = super(self.__class__, self).where(condition, **kwargs)
            if ind is None:
                if (cone is None) & (zone is None):
                    return None
        else:
            ind = True

        blobs = []
        if (cone is not None) and (zone is not None):  # cone + zone
            ra, dec, r = cone
            ind, d = self.coneSearch(ra, dec, r, outtype=2)
            ind = ind & self.zoneSearch(zone[0], zone[1], zone[2], zone[3], outtype=2)
            d = d[ind]
            blobs.append(d)
        elif (cone is not None):
            ra, dec, r = cone
            _ind, d = self.coneSearch(ra, dec, r, outtype=2)
            ind = ind & _ind.astype(bool)
            blobs.append(d[ind])
        elif (zone is not None):
            _ind = self.zoneSearch(zone[0], zone[1], zone[2], zone[3], outtype=1)
            ind = ind & _ind

        ind = np.where(ind)[0]

        return ind, blobs

    def selectWhere(self, fields, condition=None, condvars=None, cone=None, zone=None, **kwargs):
        """ Read table data fulfilling the given `condition`.
            Only the rows fulfilling the `condition` are included in the result.
            conesearch is also possible through the keyword cone formatted as (ra, dec, r)
            zonesearch is also possible through the keyword zone formatted as (ramin, ramax, decmin, decmax)

            Combination of multiple selections is also available.
        """
        ind, blobs = self.where(condition, condvars, cone, zone, **kwargs)
        tab = self.select(fields, indices=ind)

        if cone is not None:
            tab.add_column('separation', np.squeeze(blobs), unit='degree')

        if self._ra_name in tab:
            tab.set_RA(self._ra_name)

        if self._dec_name in tab:
            tab.set_DEC(self._dec_name)

        return tab


class stats(object):
    @classmethod
    def has_nan(s, v):
        return (True in np.isnan(v))

    @classmethod
    def mean(s, v):
        return np.nanmean(v)

    @classmethod
    def max(s, v):
        return np.nanmax(v)

    @classmethod
    def min(s, v):
        return np.nanmin(v)

    @classmethod
    def std(s, v):
        return np.nanstd(v)

    @classmethod
    def var(s, v):
        return np.var(v)

    @classmethod
    def p16(s, v):
        try:
            return np.nanpercentile(v, 16)
        except AttributeError:
            return np.percentile(v, 16)

    @classmethod
    def p84(s, v):
        try:
            return np.nanpercentile(v, 84)
        except AttributeError:
            return np.percentile(v, 84)

    @classmethod
    def p50(s, v):
        try:
            return np.nanmedian(v)
        except AttributeError:
            return np.percentile(v, 50)


'''
# =============================================================================
# Adding some plotting functions
# =============================================================================

try:
    import pylab as plt

    def plot_function(tab, fn, *args, **kwargs):
        """ Generate a plotting method of tab from a given function

        Parameters
        ----------
        tab: SimpleTable instance
            table instance

        fn: str or callable
            if str, will try a function in matplotlib
            if callable, calls the function directly

        xname: str
            expecting a column name from the table

        yname: str, optional
            if provided, another column to use for the plot

        onlywhere: sequence or str, optional
            if provided, selects only data with this condition
            the condition can be a ndarray slice or a string.
            When a string is given, the evaluation calls :func:`SimpleTable.where`

        ax: matplotlib.Axes instance
            if provided make sure it uses the axis to do the plots if a mpl
            function is used.

        Returns
        -------
        r: object
            anything returned by the called function
        """
        if not hasattr(fn, '__call__'):
            ax = kwargs.pop('ax', None)
            if ax is None:
                ax = plt.gca()
            _fn = getattr(ax, fn, None)
            if _fn is None:
                raise AttributeError('function neither callable or found in matplotlib')
        else:
            _fn = fn

        onlywhere = kwargs.pop('onlywhere', None)
        if type(onlywhere) in basestring:
            select = tab.where(onlywhere)
        else:
            select = onlywhere

        _args = ()
        for a in args:
            if (hasattr(a, '__iter__')):
                try:
                    b = tab[a]
                    if select is not None:
                        b = b.compress(select)
                    if (len(b.dtype) > 1):
                        b = list((b[k] for k in b.dtype.names))
                    _args += (b, )
                except Exception as e:
                    print(e)
                    _args += (a, )
            else:
                _args += (a, )

        return _fn(*_args, **kwargs)

    def attached_function(fn, doc=None, errorlevel=0):
        """ eclare a function as a method to the class table"""

        def _fn(self, *args, **kwargs):
            try:
                return plot_function(self, fn, *args, **kwargs)
            except Exception as e:
                if errorlevel < 1:
                    pass
                else:
                    raise e

        if doc is not None:
            _fn.__doc__ = doc

        return _fn

    SimpleTable.plot_function = plot_function
    SimpleTable.plot = attached_function('plot', plt.plot.__doc__)
    SimpleTable.hist = attached_function('hist', plt.hist.__doc__)
    SimpleTable.hist2d = attached_function('hist2d', plt.hist2d.__doc__)
    SimpleTable.hexbin = attached_function('hexbin', plt.hexbin.__doc__)
    SimpleTable.scatter = attached_function('scatter', plt.scatter.__doc__)

    # newer version of matplotlib
    if hasattr(plt, 'violinplot'):
        SimpleTable.violinplot = attached_function('violinplot', plt.violinplot.__doc__)
    if hasattr(plt, 'boxplot'):
        SimpleTable.boxplot = attached_function('boxplot', plt.boxplot.__doc__)

except Exception as e:
    print(e)
'''