python source code of base

"""
Redshift database backend for Django based upon django PostgreSQL backend.

Requires psycopg 2: http://initd.org/projects/psycopg2
"""
from __future__ import absolute_import

from copy import deepcopy
import re
import uuid
import logging

from django.conf import settings
from django.core.exceptions import FieldDoesNotExist
from django.db.backends.base.validation import BaseDatabaseValidation
from django.db.backends.postgresql_psycopg2.base import (
    DatabaseFeatures as BasePGDatabaseFeatures,
    DatabaseWrapper as BasePGDatabaseWrapper,
    DatabaseOperations as BasePGDatabaseOperations,
    DatabaseSchemaEditor as BasePGDatabaseSchemaEditor,
    DatabaseClient,
    DatabaseCreation as BasePGDatabaseCreation,
    DatabaseIntrospection,
)
from django.db.utils import NotSupportedError

from django_redshift_backend.distkey import DistKey


logger = logging.getLogger('django.db.backends')


class DatabaseFeatures(BasePGDatabaseFeatures):
    can_return_id_from_insert = False
    can_return_ids_from_bulk_insert = False
    has_select_for_update = False
    supports_column_check_constraints = False
    can_distinct_on_fields = False
    allows_group_by_selected_pks = False
    has_native_uuid_field = False


class DatabaseOperations(BasePGDatabaseOperations):

    def last_insert_id(self, cursor, table_name, pk_name):
        """
        Amazon Redshift doesn't support RETURNING, so this method
        retrieve MAX(pk) after insertion as a workaround.

        refs:
        * http://stackoverflow.com/q/19428860
        * http://stackoverflow.com/q/25638539

        How about ``return cursor.lastrowid`` that is implemented in
        django.db.backends.base.operations? Unfortunately, it doesn't
        work too.

        NOTE: in some case, MAX(pk) workaround does not work correctly.
        Bulk insertion makes non-contiguous IDs like: 1, 4, 7, 10, ...
        and single insertion after such bulk insertion generates strange
        id value like 2.
        """
        cursor.execute('SELECT MAX({pk}) from {table}'.format(
            pk=pk_name, table=self.quote_name(table_name)))
        return cursor.fetchone()[0]

    def for_update_sql(self, nowait=False):
        raise NotSupportedError(
            'SELECT FOR UPDATE is not implemented for this database backend')

    def deferrable_sql(self):
        # unused
        return ""

    def sequence_reset_sql(self, style, model_list):
        # impossible with Redshift to reset a sequence
        return []

    def get_db_converters(self, expression):
        converters = super(DatabaseOperations, self).get_db_converters(expression)
        internal_type = expression.output_field.get_internal_type()
        if internal_type == 'UUIDField':
            converters.append(self.convert_uuidfield_value)
        return converters

    def convert_uuidfield_value(self, value, expression, connection, context):
        if value is not None:
            value = uuid.UUID(value)
        return value

    def distinct_sql(self, fields, *args):
        if fields:
            # https://github.com/jazzband/django-redshift-backend/issues/14
            # Redshift doesn't support DISTINCT ON
            raise NotSupportedError('DISTINCT ON fields is not supported '
                                      'by this database backend')
        return super(DatabaseOperations, self).distinct_sql(fields, *args)


def _related_non_m2m_objects(old_field, new_field):
    # Filters out m2m objects from reverse relations.
    # Returns (old_relation, new_relation) tuples.
    return zip(
        (obj
         for obj in old_field.model._meta.related_objects
         if not obj.field.many_to_many),
        (obj
         for obj in new_field.model._meta.related_objects
         if not obj.field.many_to_many)
    )


class DatabaseSchemaEditor(BasePGDatabaseSchemaEditor):

    sql_create_table = "CREATE TABLE %(table)s (%(definition)s) %(options)s"

    @property
    def multiply_varchar_length(self):
        return int(getattr(settings, "REDSHIFT_VARCHAR_LENGTH_MULTIPLIER", 1))

    def _model_indexes_sql(self, model):
        # Redshift doesn't support INDEX.
        return []

    def _create_like_index_sql(self, model, field):
        # Redshift doesn't support INDEX.
        return None

    def alter_index_together(self, model, old_index_together, new_index_together):
        # Redshift doesn't support INDEX.
        return

    def _create_index_sql(self, model, fields, suffix="", sql=None):
        raise NotSupportedError("Redshift doesn't support INDEX")

    def create_model(self, model):
        """
        Takes a model and creates a table for it in the database.
        Will also create any accompanying indexes or unique constraints.
        """
        # Create column SQL, add FK deferreds if needed
        column_sqls = []
        params = []
        for field in model._meta.local_fields:
            # SQL
            definition, extra_params = self.column_sql(model, field)
            if definition is None:
                continue

            # ## if 'definition' contains 'varchar', length must be 3 times
            # ## because Redshift requires bytes length for utf-8 chars.
            m = re.match('varchar\((\d+?)\)', definition)
            if m:
                definition = re.sub(
                    'varchar\((\d+?)\)',
                    "varchar({0})".format(
                        str(int(m.group(1)) * self.multiply_varchar_length)),
                    definition)

            field.db_parameters(connection=self.connection)
            # Autoincrement SQL (for backends with inline variant)
            col_type_suffix = field.db_type_suffix(connection=self.connection)
            if col_type_suffix:
                definition += " %s" % col_type_suffix
            params.extend(extra_params)
            # FK
            if field.remote_field and field.db_constraint:
                to_table = field.remote_field.related_model._meta.db_table
                to_column = field.remote_field.related_model._meta.get_field(
                    field.remote_field.field_name).column
                if self.connection.features.supports_foreign_keys:
                    self.deferred_sql.append(self._create_fk_sql(
                        model, field, "_fk_%(to_table)s_%(to_column)s"))
                elif self.sql_create_inline_fk:
                    definition += " " + self.sql_create_inline_fk % {
                        "to_table": self.quote_name(to_table),
                        "to_column": self.quote_name(to_column),
                    }
            # Add the SQL to our big list
            column_sqls.append("%s %s" % (
                self.quote_name(field.column),
                definition,
            ))
            # Autoincrement SQL (for backends with post table definition variant)
            if field.get_internal_type() in ("AutoField", "BigAutoField"):
                autoinc_sql = self.connection.ops.autoinc_sql(
                    model._meta.db_table, field.column)
                if autoinc_sql:
                    self.deferred_sql.extend(autoinc_sql)

        # Add any unique_togethers (always deferred, as some fields might
        # be created afterwards, like geometry fields with some backends)
        for fields in model._meta.unique_together:
            columns = [model._meta.get_field(field).column for field in fields]
            self.deferred_sql.append(self._create_unique_sql(model, columns))

        # Make the table
        sql = self.sql_create_table % {
            "table": self.quote_name(model._meta.db_table),
            "definition": ", ".join(column_sqls),
            "options": self._get_create_options(model)
        }
        if model._meta.db_tablespace:
            tablespace_sql = self.connection.ops.tablespace_sql(
                model._meta.db_tablespace)
            if tablespace_sql:
                sql += ' ' + tablespace_sql
        # Prevent using [] as params, in the case a literal '%' is used in the definition
        self.execute(sql, params or None)

        # Add any field index and index_together's
        # (deferred as SQLite3 _remake_table needs it)
        self.deferred_sql.extend(self._model_indexes_sql(model))

        # Make M2M tables
        for field in model._meta.local_many_to_many:
            if field.remote_field.through._meta.auto_created:
                self.create_model(field.remote_field.through)

    def add_field(self, model, field):
        """
        Creates a field on a model.
        Usually involves adding a column, but may involve adding a
        table instead (for M2M fields)
        """
        # Special-case implicit M2M tables
        if field.many_to_many and field.remote_field.through._meta.auto_created:
            return self.create_model(field.remote_field.through)

        # Get the column's definition
        # ## To add column to existent table on Redshift, field.null must be allowed
        field.null = True
        definition, params = self.column_sql(model, field, include_default=True)
        # It might not actually have a column behind it
        if definition is None:
            return

        # ## original BasePGDatabaseSchemaEditor.add_field has check constraints here.
        # ## Redshift doesn't support it.

        # Build the SQL and run it
        sql = self.sql_create_column % {
            "table": self.quote_name(model._meta.db_table),
            "column": self.quote_name(field.column),
            "definition": definition,
        }
        self.execute(sql, params)

        # ## original BasePGDatabaseSchemaEditor.add_field drop default here
        # ## Redshift doesn't support DROP DEFAULT.

        # ## original BasePGDatabaseSchemaEditor.add_field has CREATE INDEX.
        # ## Redshift doesn't support INDEX.

        # Add any FK constraints later
        if (field.remote_field and
                self.connection.features.supports_foreign_keys and
                field.db_constraint):
            self.deferred_sql.append(
                self._create_fk_sql(model, field, "_fk_%(to_table)s_%(to_column)s"))
        # Reset connection if required
        if self.connection.features.connection_persists_old_columns:
            self.connection.close()

    def _alter_field(self, model, old_field, new_field, old_type, new_type,
                     old_db_params, new_db_params, strict=False):
        """Actually perform a "physical" (non-ManyToMany) field update."""

        # Drop any FK constraints, we'll remake them later
        fks_dropped = set()
        if old_field.remote_field and old_field.db_constraint:
            fk_names = self._constraint_names(model, [old_field.column], foreign_key=True)
            if strict and len(fk_names) != 1:
                raise ValueError(
                    "Found wrong number (%s) of foreign key constraints for %s.%s" %
                    (len(fk_names), model._meta.db_table, old_field.column))
            for fk_name in fk_names:
                fks_dropped.add((old_field.column,))
                self.execute(self._delete_constraint_sql(
                    self.sql_delete_fk, model, fk_name))
        # Has unique been removed?
        if (old_field.unique and
                (not new_field.unique or
                 (not old_field.primary_key and new_field.primary_key))):
            # Find the unique constraint for this field
            constraint_names = self._constraint_names(
                model, [old_field.column], unique=True)
            if strict and len(constraint_names) != 1:
                raise ValueError(
                    "Found wrong number (%s) of unique constraints for %s.%s" %
                    (len(constraint_names), model._meta.db_table, old_field.column))
            for constraint_name in constraint_names:
                self.execute(self._delete_constraint_sql(
                    self.sql_delete_unique, model, constraint_name))
        # Drop incoming FK constraints if we're a primary key and things are going
        # to change.
        if old_field.primary_key and new_field.primary_key and old_type != new_type:
            # '_meta.related_field' also contains M2M reverse fields, these
            # will be filtered out
            for _old_rel, new_rel in _related_non_m2m_objects(old_field, new_field):
                rel_fk_names = self._constraint_names(
                    new_rel.related_model, [new_rel.field.column], foreign_key=True
                )
                for fk_name in rel_fk_names:
                    self.execute(self._delete_constraint_sql(
                        self.sql_delete_fk, new_rel.related_model, fk_name))
        # Removed an index? (no strict check, as multiple indexes are possible)
        if (old_field.db_index and
                not new_field.db_index and
                not old_field.unique and
                not (not new_field.unique and old_field.unique)):
            # Find the index for this field
            index_names = self._constraint_names(model, [old_field.column], index=True)
            for index_name in index_names:
                self.execute(self._delete_constraint_sql(
                    self.sql_delete_index, model, index_name))
        # Change check constraints?
        if old_db_params['check'] != new_db_params['check'] and old_db_params['check']:
            constraint_names = self._constraint_names(
                model, [old_field.column], check=True)
            if strict and len(constraint_names) != 1:
                raise ValueError(
                    "Found wrong number (%s) of check constraints for %s.%s" %
                    (len(constraint_names), model._meta.db_table, old_field.column))
            for constraint_name in constraint_names:
                self.execute(self._delete_constraint_sql(
                    self.sql_delete_check, model, constraint_name))
        # Have they renamed the column?
        if old_field.column != new_field.column:
            self.execute(self._rename_field_sql(
                model._meta.db_table, old_field, new_field, new_type))
        # Next, start accumulating actions to do
        actions = []
        post_actions = []

        # When changing a column NULL constraint to NOT NULL with a given
        # default value, we need to perform 4 steps:
        #  1. Add a default for new incoming writes
        #  2. Update existing NULL rows with new default
        #  3. Replace NULL constraint with NOT NULL
        #  4. Drop the default again.
        # Default change?
        old_default = self.effective_default(old_field)
        new_default = self.effective_default(new_field)
        needs_database_default = (
            old_default != new_default and
            new_default is not None and
            not self.skip_default(new_field)
        )
        # Type or default is changed?
        if (old_type != new_type) or needs_database_default:
            # ## To change column type or default, We need this migration sequence:
            # ##
            # ## 1. Add new column with temporary name
            # ## 2. Migrate values from original column to temprary column
            # ## 3. Drop old column
            # ## 4. Rename temporary column name to original column name

            # ## ALTER TABLE <table> ADD COLUMN 'tmp' <type> DEFAULT <value>
            definition, params = self.column_sql(model, new_field, include_default=True)
            new_defaults = [new_default] if new_default is not None else []
            actions.append((
                self.sql_create_column % {
                    "table": self.quote_name(model._meta.db_table),
                    "column": self.quote_name(new_field.column + "_tmp"),
                    "definition": definition,
                },
                new_defaults
            ))
            # ## UPDATE <table> SET 'tmp' = <orig column>
            actions.append((
                "UPDATE %(table)s SET %(new_column)s = %(old_column)s" % {
                    "table": model._meta.db_table,
                    "new_column": self.quote_name(new_field.column + "_tmp"),
                    "old_column": self.quote_name(new_field.column),
                }, []
            ))
            # ## ALTER TABLE <table> DROP COLUMN <orig column>
            actions.append((
                self.sql_delete_column % {
                    "table": model._meta.db_table,
                    "column": self.quote_name(new_field.column),
                }, [],
            ))
            # ## ALTER TABLE <table> RENAME COLUMN 'tmp' <orig column>
            actions.append((
                self.sql_rename_column % {
                    "table": model._meta.db_table,
                    "old_column": self.quote_name(new_field.column + "_tmp"),
                    "new_column": self.quote_name(new_field.column),
                }, []
            ))

        # Apply those actions
        for sql, params in actions:
            self.execute(sql, params)
        if post_actions:
            for sql, params in post_actions:
                self.execute(sql, params)
        # Added a unique?
        if ((not old_field.unique and new_field.unique) or
           (old_field.primary_key and not new_field.primary_key and new_field.unique)):
            self.execute(self._create_unique_sql(model, [new_field.column]))

        # ## original BasePGDatabaseSchemaEditor._alter_field has CREATE INDEX
        # ## Redshift doesn't support it.

        # Type alteration on primary key? Then we need to alter the column
        # referring to us.
        rels_to_update = []
        if old_field.primary_key and new_field.primary_key and old_type != new_type:
            rels_to_update.extend(_related_non_m2m_objects(old_field, new_field))
        # Changed to become primary key?
        # Note that we don't detect unsetting of a PK, as we assume another field
        # will always come along and replace it.
        if not old_field.primary_key and new_field.primary_key:
            # First, drop the old PK
            constraint_names = self._constraint_names(model, primary_key=True)
            if strict and len(constraint_names) != 1:
                raise ValueError("Found wrong number (%s) of PK constraints for %s" % (
                    len(constraint_names),
                    model._meta.db_table,
                ))
            for constraint_name in constraint_names:
                self.execute(self._delete_constraint_sql(
                    self.sql_delete_pk, model, constraint_name))
            # Make the new one
            self.execute(
                self.sql_create_pk % {
                    "table": self.quote_name(model._meta.db_table),
                    "name": self.quote_name(self._create_index_name(
                        model, [new_field.column], suffix="_pk")),
                    "columns": self.quote_name(new_field.column),
                }
            )
            # Update all referencing columns
            rels_to_update.extend(_related_non_m2m_objects(old_field, new_field))
        # Handle our type alters on the other end of rels from the PK stuff above
        for old_rel, new_rel in rels_to_update:
            rel_db_params = new_rel.field.db_parameters(connection=self.connection)
            rel_type = rel_db_params['type']
            fragment, other_actions = self._alter_column_type_sql(
                new_rel.related_model._meta.db_table, old_rel.field, new_rel.field,
                rel_type
            )
            self.execute(
                self.sql_alter_column % {
                    "table": self.quote_name(new_rel.related_model._meta.db_table),
                    "changes": fragment[0],
                },
                fragment[1],
                )
            for sql, params in other_actions:
                self.execute(sql, params)
        # Does it have a foreign key?
        if (new_field.remote_field and
                (fks_dropped or
                    not old_field.remote_field or
                    not old_field.db_constraint) and
                new_field.db_constraint):
            self.execute(self._create_fk_sql(
                model, new_field, "_fk_%(to_table)s_%(to_column)s"))
        # Rebuild FKs that pointed to us if we previously had to drop them
        if old_field.primary_key and new_field.primary_key and old_type != new_type:
            for rel in new_field.model._meta.related_objects:
                if not rel.many_to_many:
                    self.execute(self._create_fk_sql(
                        rel.related_model, rel.field, "_fk"))
        # Does it have check constraints we need to add?
        if old_db_params['check'] != new_db_params['check'] and new_db_params['check']:
            self.execute(
                self.sql_create_check % {
                    "table": self.quote_name(model._meta.db_table),
                    "name": self.quote_name(self._create_index_name(
                        model, [new_field.column], suffix="_check")),
                    "column": self.quote_name(new_field.column),
                    "check": new_db_params['check'],
                }
            )

        # ## original BasePGDatabaseSchemaEditor._alter_field drop default here
        # ## Redshift doesn't support DROP DEFAULT.

        # Reset connection if required
        if self.connection.features.connection_persists_old_columns:
            self.connection.close()

    def _get_create_options(self, model):
        """
        Provide options to create the table. Supports:
            - distkey
            - sortkey

        N.B.: no validation is made on these options, we'll let the Database
              do the validation for us.
        """
        def quoted_column_name(field_name):
            # We strip the '-' that may precede the field name in an `ordering`
            # specification.
            try:
                colname = model._meta.get_field(
                    field_name.strip('-')).get_attname_column()[1]
            except FieldDoesNotExist:
                # Out of an abundance of caution - e.g., so that you get a more
                # appropriate error message higher up the stack.
                colname = field_name
            return self.connection.ops.quote_name(colname)

        create_options = []

        distkey = None
        for idx in model._meta.indexes:
            if isinstance(idx, DistKey):
                if distkey:
                    raise ValueError("Model {} has more than one DistKey.".format(
                        model.__name__))
                distkey = idx
        if distkey:
            # It would be nicer to enforce this by having DistKey's ctor accept exactly
            # one field. However overriding the superclass Index ctor causes problems
            # with migrations, so we validate here instead.
            if len(distkey.fields) != 1:
                raise ValueError('DistKey on model {} must have exactly '
                                 'one field.'.format(model.__name__))
            normalized_field = quoted_column_name(distkey.fields[0])
            create_options.append("DISTKEY({})".format(normalized_field))
            # TODO: Support DISTSTYLE ALL.

        if model._meta.ordering:
            normalized_fields = [
                quoted_column_name(field)
                for field in model._meta.ordering
            ]
            create_options.append("SORTKEY({fields})".format(
                fields=', '.join(normalized_fields)))

        return " ".join(create_options)


redshift_data_types = {
    "AutoField": "integer identity(1, 1)",
    "BigAutoField": "bigint identity(1, 1)",
    "TextField": "varchar(max)",  # text must be varchar(max)
    "UUIDField": "varchar(32)",  # redshift doesn't support uuid fields
}


class DatabaseCreation(BasePGDatabaseCreation):
    pass


class DatabaseWrapper(BasePGDatabaseWrapper):
    vendor = 'redshift'

    SchemaEditorClass = DatabaseSchemaEditor

    data_types = deepcopy(BasePGDatabaseWrapper.data_types)
    data_types.update(redshift_data_types)

    def __init__(self, *args, **kwargs):
        super(DatabaseWrapper, self).__init__(*args, **kwargs)

        self.features = DatabaseFeatures(self)
        self.ops = DatabaseOperations(self)
        self.client = DatabaseClient(self)
        self.creation = DatabaseCreation(self)
        self.introspection = DatabaseIntrospection(self)
        self.validation = BaseDatabaseValidation(self)

    def check_constraints(self, table_names=None):
        """
        No constraints to check in Redshift.
        """
        pass