python source code of mapper

# -*- coding: utf-8 -*-
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

import json

import six
import tableschema
import sqlalchemy as sa
from sqlalchemy import CheckConstraint as Check
from sqlalchemy.dialects.postgresql import ARRAY, JSON, JSONB, UUID


# Module API

class Mapper(object):

    # Public

    def __init__(self, prefix, dialect='sqlite'):
        """Mapper to convert/restore FD entities to/from SQL entities
        """
        self.__prefix = prefix
        self.__dialect = dialect

    def convert_bucket(self, bucket):
        """Convert bucket to SQL
        """
        return self.__prefix + bucket

    def convert_descriptor(self, bucket, descriptor, index_fields=[], autoincrement=None):
        """Convert descriptor to SQL
        """

        # Prepare
        columns = []
        indexes = []
        fallbacks = []
        constraints = []
        column_mapping = {}
        table_name = self.convert_bucket(bucket)
        comment = _get_comment(descriptor.get('title', ''), descriptor.get('description', ''))
        schema = tableschema.Schema(descriptor)

        # Autoincrement
        if autoincrement is not None:
            columns.append(sa.Column(
                autoincrement, sa.Integer, autoincrement=True, nullable=False))

        # Fields
        for field in schema.fields:
            column_type = self.convert_type(field.type)
            if not column_type:
                column_type = sa.Text
                fallbacks.append(field.name)
            nullable = not field.required
            comment = _get_field_comment(field)
            unique = field.constraints.get('unique', False)
            checks = []
            for name, value in field.constraints.items():
                if name == 'minLength':
                    checks.append(Check('LENGTH("%s") >= %s' % (field.name, value)))
                elif name == 'maxLength':
                    checks.append(Check('LENGTH("%s") <= %s' % (field.name, value)))
                elif name == 'minimum':
                    checks.append(Check('"%s" >= %s' % (field.name, value)))
                elif name == 'maximum':
                    checks.append(Check('"%s" <= %s' % (field.name, value)))
                elif name == 'pattern':
                    if self.__dialect in ['postgresql']:
                        checks.append(Check('"%s" ~ \'%s\'' % (field.name, value)))
                    else:
                        checks.append(Check('"%s" REGEXP \'%s\'' % (field.name, value)))
                elif name == 'enum':
                    column_type = sa.Enum(*value, name='%s_%s_enum' % (table_name, field.name))
            column = sa.Column(*([field.name, column_type] + checks),
                nullable=nullable, comment=comment, unique=unique)
            columns.append(column)
            column_mapping[field.name] = column

        # Primary key
        pk = descriptor.get('primaryKey', None)
        if pk is not None:
            if isinstance(pk, six.string_types):
                pk = [pk]
        if autoincrement is not None:
            if pk is not None:
                pk = [autoincrement] + pk
            else:
                pk = [autoincrement]
        if pk is not None:
            constraint = sa.PrimaryKeyConstraint(*pk)
            constraints.append(constraint)

        # Foreign keys
        if self.__dialect in ['postgresql', 'sqlite']:
            fks = descriptor.get('foreignKeys', [])
            for fk in fks:
                fields = fk['fields']
                resource = fk['reference']['resource']
                foreign_fields = fk['reference']['fields']
                if isinstance(fields, six.string_types):
                    fields = [fields]
                if resource != '':
                    table_name = self.convert_bucket(resource)
                if isinstance(foreign_fields, six.string_types):
                    foreign_fields = [foreign_fields]
                composer = lambda field: '.'.join([table_name, field])
                foreign_fields = list(map(composer, foreign_fields))
                constraint = sa.ForeignKeyConstraint(fields, foreign_fields)
                constraints.append(constraint)

        # Indexes
        if self.__dialect == 'postgresql':
            for index, index_definition in enumerate(index_fields):
                name = table_name + '_ix%03d' % index
                index_columns = [column_mapping[field] for field in index_definition]
                indexes.append(sa.Index(name, *index_columns))

        return columns, constraints, indexes, fallbacks, comment

    def convert_row(self, keyed_row, schema, fallbacks):
        """Convert row to SQL
        """
        for key, value in list(keyed_row.items()):
            field = schema.get_field(key)
            if not field:
                del keyed_row[key]
            if key in fallbacks:
                value = _uncast_value(value, field=field)
            else:
                value = field.cast_value(value)
            keyed_row[key] = value
        return keyed_row

    def convert_type(self, type):
        """Convert type to SQL
        """

        # Default dialect
        mapping = {
            'any': sa.Text,
            'array': None,
            'boolean': sa.Boolean,
            'date': sa.Date,
            'datetime': sa.DateTime,
            'duration': None,
            'geojson': None,
            'geopoint': None,
            'integer': sa.Integer,
            'number': sa.Float,
            'object': None,
            'string': sa.Text,
            'time': sa.Time,
            'year': sa.Integer,
            'yearmonth': None,
        }

        # Postgresql dialect
        if self.__dialect == 'postgresql':
            mapping.update({
                'array': JSONB,
                'geojson': JSONB,
                'number': sa.Numeric,
                'object': JSONB,
            })

        # Not supported type
        if type not in mapping:
            message = 'Field type "%s" is not supported'
            raise tableschema.exceptions.StorageError(message % type)

        return mapping[type]

    def restore_bucket(self, table_name):
        """Restore bucket from SQL
        """
        if table_name.startswith(self.__prefix):
            return table_name.replace(self.__prefix, '', 1)
        return None

    def restore_descriptor(self, table_name, columns, constraints, autoincrement=None):
        """Restore descriptor from SQL
        """

        # Fields
        fields = []
        for column in columns:
            if column.name == autoincrement:
                continue
            field_type = self.restore_type(column.type)
            field = {'name': column.name, 'type': field_type}
            if not column.nullable:
                field['constraints'] = {'required': True}
            fields.append(field)

        # Primary key
        pk = []
        for constraint in constraints:
            if isinstance(constraint, sa.PrimaryKeyConstraint):
                for column in constraint.columns:
                    if column.name == autoincrement:
                        continue
                    pk.append(column.name)

        # Foreign keys
        fks = []
        if self.__dialect in ['postgresql', 'sqlite']:
            for constraint in constraints:
                if isinstance(constraint, sa.ForeignKeyConstraint):
                    resource = ''
                    own_fields = []
                    foreign_fields = []
                    for element in constraint.elements:
                        own_fields.append(element.parent.name)
                        if element.column.table.name != table_name:
                            resource = self.restore_bucket(element.column.table.name)
                        foreign_fields.append(element.column.name)
                    if len(own_fields) == len(foreign_fields) == 1:
                        own_fields = own_fields.pop()
                        foreign_fields = foreign_fields.pop()
                    fks.append({
                        'fields': own_fields,
                        'reference': {'resource': resource, 'fields': foreign_fields},
                    })

        # Desscriptor
        descriptor = {}
        descriptor['fields'] = fields
        if len(pk) > 0:
            if len(pk) == 1:
                pk = pk.pop()
            descriptor['primaryKey'] = pk
        if len(fks) > 0:
            descriptor['foreignKeys'] = fks

        return descriptor

    def restore_row(self, row, schema, autoincrement):
        """Restore row from SQL
        """
        row = list(row)
        for index, field in enumerate(schema.fields, start=1 if autoincrement else 0):
            if self.__dialect == 'postgresql':
                if field.type in ['array', 'object']:
                    continue
            row[index] = field.cast_value(row[index])
        return row

    def restore_type(self, type):
        """Restore type from SQL
        """

        # All dialects
        mapping = {
            ARRAY: 'array',
            sa.Boolean: 'boolean',
            sa.Date: 'date',
            sa.DateTime: 'datetime',
            sa.Float: 'number',
            sa.Integer: 'integer',
            JSONB: 'object',
            JSON: 'object',
            sa.Numeric: 'number',
            sa.Text: 'string',
            sa.Time: 'time',
            sa.VARCHAR: 'string',
            UUID: 'string',
        }

        # Get field type
        field_type = None
        for key, value in mapping.items():
            if isinstance(type, key):
                field_type = value

        # Not supported
        if field_type is None:
            message = 'Type "%s" is not supported'
            raise tableschema.exceptions.StorageError(message % type)

        return field_type


# Internal

def _uncast_value(value, field):
    # Eventially should be moved to:
    # https://github.com/frictionlessdata/tableschema-py/issues/161
    if isinstance(value, (list, dict)):
        value = json.dumps(value)
    else:
        value = str(value)
    return value


def _get_field_comment(field, separator=' - '):
    """
    Create SQL comment from field's title and description

    :param field: tableschema-py Field, with optional 'title' and 'description' values
    :param separator:
    :return:

    >>> _get_field_comment(tableschema.Field({'title': 'my_title', 'description': 'my_desc'}))
    'my_title - my_desc'
    >>> _get_field_comment(tableschema.Field({'title': 'my_title', 'description': None}))
    'my_title'
    >>> _get_field_comment(tableschema.Field({'title': '', 'description': 'my_description'}))
    'my_description'
    >>> _get_field_comment(tableschema.Field({}))
    ''
    """
    title = field.descriptor.get('title') or ''
    description = field.descriptor.get('description') or ''
    return _get_comment(description, title, separator)


def _get_comment(description, title, separator=' - '):
    if title == '':
        return description
    if description == '':
        return title
    return title + separator + description