python source code of debugmode

"""
Provides `DebugMode`, an evaluation mode for debugging theano internals.

TODO: add support for IfElse Op, LazyLinker, PureOp, etc.

"""
from __future__ import absolute_import, print_function, division

import copy
import sys
import gc
import logging
import six.moves.copyreg as copyreg
from itertools import chain, product as itertools_product
from theano.compat import izip

import numpy

import theano
from theano import gof, config
from theano.compat import get_unbound_function
from six import iteritems, itervalues
from six.moves import StringIO, xrange
from theano.gof import (graph, utils, link, ops_with_inner_function)
from theano.gof.link import raise_with_op
from theano.compile.function_module import (
    FunctionMaker, Function, infer_reuse_pattern,
    SymbolicInputKit, SymbolicOutput, Supervisor, std_fgraph)
from theano.compile.mode import Mode, register_mode
from theano.compile.ops import OutputGuard

__docformat__ = "restructuredtext en"
_logger = logging.getLogger("theano.compile.debugmode")


# Filter to avoid duplicating optimization warnings
class NoDuplicateOptWarningFilter(logging.Filter):
    prev_msgs = set([])

    def filter(self, record):
        msg = record.getMessage()
        if msg.startswith('Optimization Warning: '):
            if msg in self.prev_msgs:
                return False
            else:
                self.prev_msgs.add(msg)
                return True
        return True

_logger.addFilter(NoDuplicateOptWarningFilter())


########################
#
# Exceptions
#
########################
class DebugModeError(Exception):
    """
    Generic Exception raised to indicate an internal theano problem.

    """

    pass


class BadThunkOutput(DebugModeError):
    """
    Exception: Calling the same Op twice gives inconsistent outputs.

    It can be raised, for instance, if an Op's c_code and perform method
    do not agree, or if one of these methods do not give the same result
    when called twice with the same inputs (but different memory layouts
    for the output).

    """

    r = None
    """
    The `Variable` instance for which conflicting values were computed.

    """

    thunk1 = ''
    val1 = None
    """
    The value computed by `thunk1`.

    """

    thunk2 = ''
    val2 = None
    """
    The value computed by `thunk2`.

    """

    def __init__(self, r, thunk1, val1, thunk2, val2, inputs_val=()):
        super(BadThunkOutput, self).__init__()
        self.r = r
        self.thunk1 = thunk1
        self.val1 = val1
        self.thunk2 = thunk2
        self.val2 = val2
        self.inputs_val = inputs_val

    def offending_op(self):
        """
        Return the Op class whose c_code and perform implementations
        didn't match.

        """
        return type(self.r.owner.op)

    def __str__(self):
        return self.str_diagnostic()

    def str_diagnostic(self):
        """
        Return a pretty multiline string representing the cause of
        the exception.

        """
        sio = StringIO()
        print("BadThunkOutput", file=sio)
        print("  Apply   :", self.r.owner, file=sio)
        print("  op      :", self.offending_op(), file=sio)
        print("  Outputs Type:", self.r.type, file=sio)
        print("  Outputs Shape:", getattr(self.val1, 'shape', None), file=sio)
        print("  Outputs Strides:", getattr(self.val1, 'strides', None),
              file=sio)
        print("  Inputs Type :", [i.type for i in self.r.owner.inputs],
              file=sio)
        print("  Inputs Shape:", [getattr(val, 'shape', None)
                                  for val in self.inputs_val], file=sio)
        print("  Inputs Strides:", [getattr(val, 'strides', None)
                                    for val in self.inputs_val], file=sio)
        scalar_values = []
        for ipt in self.inputs_val:
            if getattr(ipt, "size", -1) <= 10:
                scalar_values.append(ipt)
            else:
                scalar_values.append("not shown")
        print("  Inputs values: %s" % scalar_values, file=sio)
        print("  Bad Variable:", self.r, file=sio)
        print("  thunk1  :", self.thunk1, file=sio)
        print("  thunk2  :", self.thunk2, file=sio)

        # Don't import it at the top of the file to prevent circular import.
        utt = theano.tests.unittest_tools
        print(utt.str_diagnostic(self.val1, self.val2, None, None), file=sio)
        ret = sio.getvalue()
        return ret


class BadOptimization(DebugModeError):
    """
    Exception: some variable and its substitute take different runtime values.

    """

    new_r = None
    """
    A `Variable` instance that took a different value from `old_r`,
    but which replaced `old_r`.

    """

    old_r = None
    """
    A `Variable` instance that was replaced by `new_r`.

    """

    old_r_val = None
    """
    The value computed for `old_r`.

    """

    new_r_val = None
    """
    The value computed for `new_r`.

    """

    reason = None
    """
    An object that indicates why old_r was turned into new_r.

    Convention is that this is the name of the optimization that
    requested the replacement.

    """

    old_graph = ""
    """
    A multiline string representation of the graph leading to
    old_r, at the time of the replacement.

    """

    new_graph = ""
    """
    A multiline string representation of the graph leading to
    new_r, at the time of the replacement.

    """

    def __init__(self, old_r, new_r, old_r_val, new_r_val, reason,
                 old_graph, new_graph):
        super(BadOptimization, self).__init__()
        self.old_r = old_r
        self.new_r = new_r
        self.old_r_val = old_r_val
        self.new_r_val = new_r_val
        self.reason = reason
        self.old_graph = old_graph
        self.new_graph = new_graph

    def __str__(self):
        return self.str_diagnostic()

    def str_diagnostic(self):
        """
        Return a pretty multiline string representating the cause
        of the exception.

        """
        sio = StringIO()
        val_str_len_limit = 800
        print("BadOptimization Error", super(BadOptimization,
                                             self).__str__(), file=sio)
        print("  Variable: id", id(self.new_r), self.new_r, file=sio)
        print("  Op", self.new_r.owner, file=sio)
        print("  Value Type:", type(self.new_r_val), file=sio)
        try:
            ssio = StringIO()
            print("  Old Value shape, dtype, strides:", end=' ', file=ssio)
            print(self.old_r_val.shape, end=' ', file=ssio)
            print(self.old_r_val.dtype, end=' ', file=ssio)
            print(self.old_r_val.strides, file=ssio)
            # only if all succeeds to we add anything to sio
            print(ssio.getvalue(), file=sio)
        except Exception:
            pass

        str_old_r_val = str(self.old_r_val)
        if len(str_old_r_val) > val_str_len_limit:
            print("  Old Value: ", str(self.old_r_val)[
                :val_str_len_limit], '...', file=sio)
        else:
            print("  Old Value: ", str(self.old_r_val), file=sio)

        try:
            ssio = StringIO()
            print("  New Value shape, dtype, strides:", end=' ', file=ssio)
            print(self.new_r_val.shape, end=' ', file=ssio)
            print(self.new_r_val.dtype, end=' ', file=ssio)
            print(self.new_r_val.strides, file=ssio)
            # only if all succeeds to we add anything to sio
            print(ssio.getvalue(), file=sio)
        except Exception:
            pass
        str_new_r_val = str(self.new_r_val)
        if len(str_new_r_val) > val_str_len_limit:
            print("  New Value: ", str(self.new_r_val)[
                :val_str_len_limit], '...', file=sio)
        else:
            print("  New Value: ", str(self.new_r_val), file=sio)

        try:
            ov = numpy.asarray(self.old_r_val)
            nv = numpy.asarray(self.new_r_val)
            ssio = StringIO()
            abs_diff = numpy.absolute(nv - ov)
            print("  Max Abs Diff: ", numpy.max(abs_diff), file=ssio)
            print("  Mean Abs Diff: ", numpy.mean(abs_diff), file=ssio)
            print("  Median Abs Diff: ", numpy.median(abs_diff), file=ssio)
            print("  Std Abs Diff: ", numpy.std(abs_diff), file=ssio)
            arg_max_val = numpy.argmax(abs_diff)
            values_at_max = (nv.flatten()[arg_max_val],
                             ov.flatten()[arg_max_val])
            print("  Value at Max Diff: ", values_at_max, file=ssio)

            # N.B. the maximum(..., 1e-8) protects against div by 0 when
            #      nv == ov == 0
            reldiff = (abs_diff /
                       numpy.maaximum(numpy.absolute(nv) + numpy.absolute(ov),
                                      1e-8))
            print("  Max Rel Diff: ", numpy.max(reldiff), file=ssio)
            print("  Mean Rel Diff: ", numpy.mean(reldiff), file=ssio)
            print("  Median Rel Diff: ", numpy.median(reldiff), file=ssio)
            print("  Std Rel Diff: ", numpy.std(reldiff), file=ssio)
            arg_max_val = numpy.argmax(reldiff)
            values_at_max = (nv.flatten()[arg_max_val],
                             ov.flatten()[arg_max_val])
            print("  Value at Max Diff: ", values_at_max, file=ssio)
            # only if all succeeds to we add anything to sio
            print(ssio.getvalue(), file=sio)
        except Exception:
            pass

        print("  Reason: ", str(self.reason), file=sio)
        print("  Old Graph:", file=sio)
        print(self.old_graph, file=sio)
        print("  New Graph:", file=sio)
        print(self.new_graph, file=sio)
        print("", file=sio)
        print("Hint: relax the tolerance by setting tensor.cmp_sloppy=1",
              file=sio)
        print("  or even tensor.cmp_sloppy=2 for less-strict comparison",
              file=sio)
        return sio.getvalue()


class BadDestroyMap(DebugModeError):
    """
    Exception: Some perform() or c_code() modified an input that
    wasn't in the destroy_map.

    """
    def __init__(self, node, idx, old_val, new_val, perform):
        super(BadDestroyMap, self).__init__()
        self.node = node
        self.idx = idx
        self.old_val = old_val
        self.new_val = new_val
        self.perform = perform

    def __str__(self):
        sio = StringIO()
        print("  node:", self.node, file=sio)
        print("  perform:", self.perform, file=sio)
        print("  node.inputs:", [(str(i), id(i))
                                 for i in self.node.inputs], file=sio)
        print("  destroy_map:", getattr(self.node.op,
                                        'destroy_map', {}), file=sio)
        print("  changed input idx:", self.idx, file=sio)
        print("  changed input type:", self.node.inputs[self.idx].type,
              file=sio)
        print("  repr (old val):", repr(self.old_val), file=sio)
        print("  repr (new val):", repr(self.new_val), file=sio)
        try:
            npy_old_val = numpy.asarray(self.old_val)
            npy_new_val = numpy.asarray(self.new_val)
            print("  value dtype (new <space> old):", npy_new_val.dtype,
                  npy_old_val.dtype, file=sio)
            print("  value shape (new <space> old):", npy_new_val.shape,
                  npy_old_val.shape, file=sio)
            print("  value min (new <space> old):", npy_new_val.min(),
                  npy_old_val.min(), file=sio)
            print("  value max (new <space> old):", npy_new_val.max(),
                  npy_old_val.max(), file=sio)
            delta = npy_new_val - npy_old_val
            print("  value min (new-old):", delta.min(), file=sio)
            print("  value max (new-old):", delta.max(), file=sio)
            print("  value argmin (new-old):",
                  numpy.unravel_index(delta.argmin(), npy_new_val.shape),
                  file=sio)
            print("  value argmax (new-old):",
                  numpy.unravel_index(delta.argmax(), npy_new_val.shape),
                  file=sio)
            print("  location of first 10 mismatches:",
                  numpy.transpose(numpy.nonzero(delta))[:10], file=sio)
            print("", file=sio)
        except Exception as e:
            print("(Numpy-hints failed with: %s)" % str(e), file=sio)
        print("  Hint: this can also be caused by a deficient "
              "values_eq_approx() or __eq__() implementation "
              "[which compared input values]", file=sio)
        return sio.getvalue()


class BadViewMap(DebugModeError):
    """
    Exception: Some perform() or c_code() created a memory alias
    that wasn't in the view_map.

    """

    def __init__(self, node, output_idx, out_storage,
                 in_alias_idx=None, out_alias_idx=None):
        super(BadViewMap, self).__init__()
        self.node = node
        self.output_idx = output_idx
        self.out_storage = out_storage
        self.in_alias_idx = in_alias_idx
        self.out_alias_idx = out_alias_idx

    def __str__(self):
        sio = StringIO()
        print("  node:", self.node, file=sio)
        print("  node.inputs:", [(str(i), id(i))
                                 for i in self.node.inputs], file=sio)
        print("  node.outputs:", [(str(i), id(i))
                                  for i in self.node.outputs], file=sio)
        print("  view_map:", getattr(self.node.op, 'view_map', {}), file=sio)
        print("  destroy_map:", getattr(self.node.op,
                                        'destroy_map', {}), file=sio)
        print("  aliased output:", self.output_idx, file=sio)
        print("  aliased output storage:", self.out_storage, file=sio)
        if self.in_alias_idx:
            print("  aliased to inputs:", self.in_alias_idx, file=sio)
        if self.out_alias_idx:
            print("  aliased to outputs:", self.out_alias_idx, file=sio)
        return sio.getvalue()


class StochasticOrder(DebugModeError):
    """
    Exception: Repeated Optimizations of the same graph do not give
    identical results.

    The most common cause is that an Optimization iterates over some
    objects in a memory-address-dependent order (such as id() or
    object.hash()).  If you see this error and you think it is related
    to optimizations within Theano, email theano-dev with the message
    attached to this exception.

    """
    pass


class InvalidValueError(DebugModeError):
    """
    Exception: some Op an output value that is inconsistent with
    the Type of that output.

    """

    def __init__(self, r, v, client_node=None, hint='none',
                 specific_hint='none'):
        super(InvalidValueError, self).__init__()
        self.r = r
        self.v = v
        self.client_node = client_node
        self.hint = hint
        self.specific_hint = specific_hint

    def __str__(self):
        r, v = self.r, self.v
        type_r = r.type
        type_v = type(v)
        v_val = str(v)[0:100]
        v_dtype = 'N/A'
        v_shape = 'N/A'
        v_min = 'N/A'
        v_max = 'N/A'
        v_isfinite = 'N/A'
        try:
            v_shape = v.shape
            v_dtype = v.dtype
            v_min = v.min()
            v_max = v.max()
            v_isfinite = numpy.all(numpy.isfinite(v))
        except Exception:
            pass
        client_node = self.client_node
        hint = self.hint
        specific_hint = self.specific_hint
        context = debugprint(r, prefix='  ', depth=12,
                             file=StringIO()).getvalue()
        return """InvalidValueError
        type(variable) = %(type_r)s
        variable       = %(r)s
        type(value)    = %(type_v)s
        dtype(value)   = %(v_dtype)s
        shape(value)   = %(v_shape)s
        value          = %(v_val)s
        min(value)     = %(v_min)s
        max(value)     = %(v_max)s
        isfinite       = %(v_isfinite)s
        client_node    = %(client_node)s
        hint           = %(hint)s
        specific_hint  = %(specific_hint)s
        context        = ...\n%(context)s
        """ % locals()

########################
#
# Private Functions
#
########################


def char_from_number(number):
    """
    Converts number to string by rendering it in base 26 using
    capital letters as digits.

    """

    base = 26

    rval = ""

    if number == 0:
        rval = 'A'

    while number != 0:
        remainder = number % base
        new_char = chr(ord('A') + remainder)
        rval = new_char + rval
        number //= base

    return rval


def debugprint(r, prefix='', depth=-1, done=None, print_type=False,
               file=sys.stdout, print_destroy_map=False,
               print_view_map=False, order=None, ids='CHAR',
               stop_on_name=False, prefix_child=None,
               scan_ops=None, profile=None,
               scan_inner_to_outer_inputs=None, smap=None):
    """
    Print the graph leading to `r` to given depth.

    Parameters
    ----------
    r
        Variable instance.
    prefix
        Prefix to each line (typically some number of spaces).
    depth
        Maximum recursion depth (Default -1 for unlimited).
    done
        dict of Apply instances that have already been printed and their
        associated printed ids.
    print_type
        Whether to print the Variable type after the other infos.
    file
        File-like object to which to print.
    print_destroy_map
        Whether to print the op destroy_map after other info.
    print_view_map
        Whether to print the op view_map after other info.
    order
        If not empty will print the index in the toposort.
    ids
        How do we print the identifier of the variable :
        id - print the python id value,
        int - print integer character,
        CHAR - print capital character,
        "" - don't print an identifier.
    stop_on_name
        When True, if a node in the graph has a name, we don't print anything
        below it.
    scan_ops
        Scan ops in the graph will be added inside this list for later printing
        purposes.
    scan_inner_to_outer_inputs
        A dictionary mapping a scan ops inner function inputs to the scan op
        inputs (outer inputs) for printing purposes.
    smap
        None or the storage_map when printing an Theano function.
    """
    if depth == 0:
        return

    if order is None:
        order = []

    if done is None:
        done = dict()

    if scan_ops is None:
        scan_ops = []

    if print_type:
        type_str = ' <%s>' % r.type
    else:
        type_str = ''

    if prefix_child is None:
        prefix_child = prefix

    def get_id_str(obj):
        if obj in done:
            id_str = done[obj]
        elif ids == "id":
            id_str = "[id %s]" % str(id(r))
        elif ids == "int":
            id_str = "[id %s]" % str(len(done))
        elif ids == "CHAR":
            id_str = "[id %s]" % char_from_number(len(done))
        elif ids == "":
            id_str = ""
        done[obj] = id_str

        return id_str

    if hasattr(r.owner, 'op'):
        # this variable is the output of computation,
        # so just print out the apply
        a = r.owner

        r_name = getattr(r, 'name', '')
        # normally if the name isn't set, it'll be None, so
        # r_name is None here
        if r_name is None:
            r_name = ''

        if print_destroy_map:
            destroy_map_str = str(getattr(r.owner.op, 'destroy_map', ''))
        else:
            destroy_map_str = ''

        if print_view_map:
            view_map_str = str(getattr(r.owner.op, 'view_map', ''))
        else:
            view_map_str = ''
        if destroy_map_str and destroy_map_str != '{}':
            destroy_map_str = 'd=' + destroy_map_str
        if view_map_str and view_map_str != '{}':
            view_map_str = 'v=' + view_map_str

        o = ''
        if order:
            o = str(order.index(r.owner))

        already_printed = a in done  # get_id_str put it in the dict
        id_str = get_id_str(a)

        if len(a.outputs) == 1:
            idx = ""
        else:
            idx = ".%i" % a.outputs.index(r)
        data = ""
        if smap:
            data = " " + str(smap.get(a.outputs[0], ''))
        if profile is None or a not in profile.apply_time:
            print('%s%s%s %s%s \'%s\' %s %s %s%s' % (prefix, a.op,
                                                     idx,
                                                     id_str, type_str,
                                                     r_name,
                                                     destroy_map_str,
                                                     view_map_str,
                                                     o, data), file=file)
        else:
            op_time = profile.apply_time[a]
            op_time_percent = (op_time / profile.fct_call_time) * 100
            tot_time_dict = profile.compute_total_times()
            tot_time = tot_time_dict[a]
            tot_time_percent = (tot_time_dict[a] / profile.fct_call_time) * 100

            if len(a.outputs) == 1:
                idx = ""
            else:
                idx = ".%i" % a.outputs.index(r)
            print("%s%s%s %s%s '%s' %s %s %s%s --> "
                  "%8.2es %4.1f%% %8.2es %4.1f%%"
                  % (prefix, a.op,
                     idx,
                     id_str, type_str,
                     r_name,
                     destroy_map_str,
                     view_map_str,
                     o, data,
                     op_time,
                     op_time_percent,
                     tot_time,
                     tot_time_percent), file=file)

        if not already_printed:
            if (not stop_on_name or
                    not (hasattr(r, 'name') and r.name is not None)):
                new_prefix = prefix_child + ' |'
                new_prefix_child = prefix_child + ' |'

                for idx, i in enumerate(a.inputs):
                    if idx == len(a.inputs) - 1:
                        new_prefix_child = prefix_child + '  '

                    if hasattr(i, 'owner') and hasattr(i.owner, 'op'):
                        if isinstance(i.owner.op,
                                      theano.scan_module.scan_op.Scan):
                            scan_ops.append(i)

                    debugprint(
                        i, new_prefix, depth=depth - 1, done=done,
                        print_type=print_type, file=file, order=order,
                        ids=ids, stop_on_name=stop_on_name,
                        prefix_child=new_prefix_child, scan_ops=scan_ops,
                        profile=profile,
                        scan_inner_to_outer_inputs=scan_inner_to_outer_inputs,
                        smap=smap)
    else:
        if scan_inner_to_outer_inputs is not None and\
           r in scan_inner_to_outer_inputs:

            id_str = get_id_str(r)
            outer_r = scan_inner_to_outer_inputs[r]

            if hasattr(outer_r.owner, 'op'):
                outer_id_str = get_id_str(outer_r.owner)
            else:
                outer_id_str = get_id_str(outer_r)
            print('%s%s %s%s -> %s' % (prefix, r, id_str, type_str,
                                       outer_id_str), file=file)
        else:
            # this is an input variable
            data = ""
            if smap:
                data = " " + str(smap.get(r, ''))
            id_str = get_id_str(r)
            print('%s%s %s%s%s' % (prefix, r, id_str,
                                   type_str, data),
                  file=file)

    return file


def _optcheck_fgraph(input_specs, output_specs, accept_inplace=False):
    """
    Create a FunctionGraph for debugging.

    Parameters
    ----------
    input_specs: WRITEME
        fgraph inputs.
    output_specs: WRITEME
        fgraph outputs.
    accept_inplace : bool
        Are inplace ops permitted in the original graph?

    Returns
    -------
    FunctionGraph
        A new FunctionGraph with a cloned graph, with debugging `Feature`
        instances already installed.

    """
    orig_inputs = [spec.variable for spec in input_specs]
    updates = [spec.update for spec in input_specs if spec.update]
    orig_outputs = [spec.variable for spec in output_specs] + updates

    equivalence_tracker = _VariableEquivalenceTracker()
    fgraph = gof.fg.FunctionGraph(orig_inputs, orig_outputs,
                                  features=[equivalence_tracker])
    # DestroyHandler may not be needed yet, as there is usually no
    # inplace operation in the graph at this stage. DestroyHandler
    # will be installed by an optimization after canonicalization,
    # before the inplace operations are applied. This results in a big
    # speed gain.
    #
    # If inplace operations are accepted and present, however,
    # DestroyHandler will be inserted in the loop below.

    if not accept_inplace:
        for node in fgraph.apply_nodes:
            if getattr(node.op, 'destroy_map', None):
                raise TypeError("Graph must not contain inplace operations",
                                node)
    else:
        # However, if some inplace ops are already in the graph,
        # DestroyHandler is needed for the Supervisor below to work correctly.
        for node in fgraph.apply_nodes:
            if getattr(node.op, 'destroy_map', None):
                fgraph.attach_feature(gof.DestroyHandler())
                break

    # We need to protect all immutable inputs from inplace operations.
    fgraph.attach_feature(Supervisor(
        input for spec, input in zip(input_specs, fgraph.inputs)
        if not (spec.mutable or (hasattr(fgraph, 'destroyers') and
                                 fgraph.destroyers(input)))))

    for feature in std_fgraph.features:
        fgraph.attach_feature(feature())

    return fgraph, list(map(SymbolicOutput, updates)), equivalence_tracker


class DataDestroyed():
    # this is a singleton class We put it in the storage_map when the
    # variable value was destroyed to prevent reusing bad value for
    # it.
    pass

data_destroyed = DataDestroyed()


def check_eq(var, val1, val2):
    if hasattr(var.tag, 'values_eq_approx'):
        return var.tag.values_eq_approx(val1, val2)
    else:
        return var.type.values_eq_approx(val1, val2)


def _check_inputs(node, storage_map, r_vals, dr_vals, active_nodes,
                  clobber_dr_vals=True,
                  perform=None, warn_input_not_reused=True):
    """
    Raise BadDestroyMap if necessary, update dr_vals.

    Returns a list of output variables that actually worked inplace
    (their value is aliased to the value of at least one input).

    It modify the storage_map to remove node.inputs variable that have
    been destroyed.

    """
    destroyed_idx_list = []
    destroy_map = getattr(node.op, 'destroy_map', {})
    for o_pos, i_pos_list in iteritems(destroy_map):
        destroyed_idx_list.extend(i_pos_list)
    destroyed_res_list = [node.inputs[i] for i in destroyed_idx_list]

    actually_inplace_outputs = []
    dmap = getattr(node.op, 'destroy_map', {})
    for oo, ii in iteritems(dmap):
        var = node.outputs[oo]
        out_var = storage_map[var][0]
        in_var = storage_map[node.inputs[ii[0]]][0]
        if (hasattr(var.type, 'may_share_memory') and
                var.type.may_share_memory(out_var, in_var)):
            actually_inplace_outputs.append(node.outputs[oo])

        if warn_input_not_reused and destroyed_res_list:
            if isinstance(node.op, OutputGuard):
                # The point of OutputGuard is to be declared as destructive
                # while not destroying anything
                continue
            if out_var is not in_var:
                _logger.warning("Optimization Warning: input idx %d marked "
                                "as destroyed was not changed for node '%s'",
                                ii[0], str(node))

    vmap = getattr(node.op, 'view_map', {})
    for oo, ii in iteritems(vmap):
        var = node.outputs[oo]
        out_var = storage_map[var][0]
        in_var = storage_map[node.inputs[ii[0]]][0]
        may_share = (hasattr(var.type, 'may_share_memory') and
                     var.type.may_share_memory(out_var, in_var))
        if may_share:
            actually_inplace_outputs.append(node.outputs[oo])

        if warn_input_not_reused:
            # We don't try to optimize simple scalar and empty ndarray,
            # as this is not worth our time. This happen at least in
            # Subtensor when the output is a scalar But this depend on
            # the version of numpy!
            if getattr(out_var, 'size', 2) <= 1:
                continue
            if isinstance(node.op, OutputGuard):
                # This class is not in the final graph.
                continue
            if not may_share:
                _logger.warning("Optimization Warning: input idx %d marked "
                                "as viewed but new memory allocated by node "
                                "'%s'", ii[0], str(node))

    for r_idx, r in enumerate(node.inputs):
        if not r.type.values_eq(r_vals[r], storage_map[r][0]):
            # some input node 'r' got changed by running the node
            # this may or may not be ok...
            if r in destroyed_res_list:
                # ok, we expected r to be destroyed
                if node in active_nodes:
                    if dr_vals.get(r, (0, node))[1] is not node:
                        # bad: there should only be one active node
                        # that destroys any variable
                        raise Exception('failure in topological ordering')
                    if clobber_dr_vals:
                        # no copy, this is the last use of this variable
                        dr_vals[r] = (storage_map[r][0], node)
                    # make sure that dr_vals[r] doens't get used again
                    storage_map[r][0] = data_destroyed
            else:
                raise BadDestroyMap(node, r_idx, r_vals[r],
                                    storage_map[r][0], perform)

    return actually_inplace_outputs


def _check_viewmap(node, storage_map):
    """
    This functions raises a BadViewMap exception when it detects the
    following:
    - Output node storages aliased to input storage, with no declaration
      in view_map.
    - If not aliased to an input, check if two outputs are aliased together
      and used subsequently in the graph.

    """

    for oi, onode in enumerate(node.outputs):

        good_alias, bad_alias = {}, {}
        outstorage = storage_map[onode][0]

        # first find out which input it aliases
        view_map = getattr(node.op, 'view_map', {})
        destroy_map = getattr(node.op, 'destroy_map', {})

        # In theory, theano's view_map only allows for 1 output to
        # alias 1 input. Checking for multiple aliases just in
        # case...

        for ii, inode in enumerate(node.inputs):
            in_storage = storage_map[inode][0]
            if in_storage is data_destroyed:
                # If the input have been destroyed, it can't be a
                # view. So no need to check. Also, we don't have the
                # original value, we we wouldn't be able to do this
                # useless check.
                continue
            if (hasattr(inode.type, 'may_share_memory') and
                    inode.type.may_share_memory(outstorage, in_storage)):

                nodeid = id(inode)
                bad_alias[nodeid] = ii

                # check that the aliasing was declared in [view|destroy]_map
                if ([ii] == view_map.get(oi, None) or
                        [ii] == destroy_map.get(oi, None)):

                    good_alias[nodeid] = bad_alias.pop(nodeid)

        # TODO: make sure this is correct
        # According to OB, duplicate inputs are rejected on build graph time
        # if they cause problems. So if they are here it should be ok.
        for key, val in iteritems(good_alias):
            bad_alias.pop(key, None)
        if bad_alias:
            raise BadViewMap(node, oi, outstorage, list(bad_alias.values()))

        # if its not aliased to input, check output->output aliasing
        if not good_alias and _is_used_in_graph(onode):
            for other_oi, other_onode in enumerate(node.outputs):
                if other_oi == oi:
                    continue

                other_storage = storage_map[other_onode][0]
                # check to see if we share memory with this other output
                # this is not a problem if the node is not actually used
                if (_is_used_in_graph(other_onode) and
                    hasattr(other_onode.type, 'may_share_memory') and
                    other_onode.type.may_share_memory(outstorage,
                                                      other_storage)):
                    raise BadViewMap(node, oi, outstorage,
                                     out_alias_idx=other_oi)


def _is_used_in_graph(var):
    """

    Returns
    -------
    bool
        True if `var` is used by another node in the graph.

    """
    return not(var.clients == [('output', 1)] or var.clients == [])


def _check_strides_match(a, b, warn_err, op):
    """

    Parameters
    ----------
    warn_err
        If 0, no warning, if 1 warning, if 2 error.

    """
    if warn_err == 0:
        return

    try:
        strides_eq = a.strides == b.strides
    except Exception:
        return  # no strides

    if not strides_eq:
        e = TypeError('Stride mismatch', (a.shape, b.shape, a.strides,
                                          b.strides, str(op)))
        if warn_err == 2:
            raise e
        else:
            print('WARNING:', e, file=sys.stderr)


def _lessbroken_deepcopy(a):
    """

    Parameters
    ----------
    a
        Any object

    Returns
    -------
    object
        A copy of `a` that shares no internal storage with the original
        (a deep copy). This function handles numpy arrays specially, because
        copy.deepcopy() called on a 0-d array will return a numpy scalar,
        not an array.

    """
    # this exists because copy.deepcopy on numpy arrays is broken
    # This logic is also in link.py
    from theano.gof.type import CDataType
    if type(a) in (numpy.ndarray, numpy.memmap):
        rval = a.copy()
    elif type(a) is CDataType._cdata_type:
        # This is not copyable (and should be used for constant data).
        rval = a
    else:
        rval = copy.deepcopy(a)

    assert type(rval) == type(a), (type(rval), type(a))
    if isinstance(rval, numpy.ndarray):
        assert rval.dtype == a.dtype
    return rval


def _find_bad_optimizations0(order, reasons, r_vals):
    """
    Use a simple algorithm to find broken optimizations.

    This algorithm is simple to understand, but sometimes when there's
    a problem it identifies the wrong optimization as the culprit.
    The problem stems from the fact that results are not evaluated in
    chronological order (looking at when they were introduced to the
    graph).

    """
    # iterate over variables looking for values that don't match the
    # values of the variables they replaced.  This is the sign of a
    # broken optimization.
    for i, node in enumerate(order):
        for new_r in node.outputs:
            for reason, r, old_graph_str, new_graph_str in reasons[new_r]:
                # check if the value for new_r doesn't match the value for r
                new_r_val = r_vals[new_r]
                r_val = r_vals[r]
                assert r.type == new_r.type

                if hasattr(new_r.tag, 'values_eq_approx'):
                    check = new_r.tag.values_eq_approx(r_val, new_r_val)
                elif hasattr(new_r, 'values_eq_approx'):
                    # This way will be deprecated later, but not right now
                    check = new_r.values_eq_approx(r_val, new_r_val)
                else:
                    check = r.type.values_eq_approx(r_val, new_r_val)
                if not check:
                    raise BadOptimization(old_r=r,
                                          new_r=new_r,
                                          old_r_val=r_val,
                                          new_r_val=new_r_val,
                                          reason=reason,
                                          old_graph=old_graph_str,
                                          new_graph=new_graph_str)


def _find_bad_optimizations1(order, reasons, r_vals):
    # iterate over variables looking for values that don't match the
    # values of the variables they replaced.  This is the sign of a
    # broken optimization.

    # identify sets of variables that are supposed to be equivalent
    equivalence_sets = {}
    program_position = {}  # node -> order idx

    for i, node in enumerate(order):
        program_position[node] = i
        for new_r in node.outputs:
            equivalence_sets.setdefault(new_r, set([new_r]))
            for reason, r, old_graph_str, new_graph_str in reasons[new_r]:
                equivalence_sets[new_r].update(equivalence_sets.setdefault(
                    r, set([r])))
                for er in equivalence_sets[r]:
                    equivalence_sets[er] = equivalence_sets[new_r]

    # identify equivalence sets that are broken
    equivalence_sets_broken = {}  # id(set) -> Bool
    there_is_a_problem = False
    for r, r_equiv in iteritems(equivalence_sets):
        if id(r_equiv) not in equivalence_sets_broken:
            equivalence_sets_broken[id(r_equiv)] = False
            # loop over the variables in the set comparing them to be
            # equal enough
            re0 = None
            for re in r_equiv:
                if re0:
                    new_r_val = r_vals[re]
                    r_val = r_vals[re0]
                    assert re.type == re0.type
                    if not re.type.values_eq_approx(r_val, new_r_val):
                        equivalence_sets_broken[id(r_equiv)] = True
                        there_is_a_problem = True
                re0 = re

    if there_is_a_problem:
        # which broken equivalence set has the earliest-occurring element?
        first_broken_set = None
        for i, node in enumerate(order):
            for r in node.outputs:
                r_equiv = equivalence_sets[r]
                if equivalence_sets_broken[id(r_equiv)]:
                    first_broken_set = r_equiv
        # TODO finish this to produce good diagnostic information
        print(first_broken_set)
        raise Exception('broken')


def _find_bad_optimizations2(order, reasons, r_vals):
    """
    Use a simple algorithm to find broken optimizations.

    This algorithm is simple to understand, but sometimes when there's
    a problem it identifies the wrong optimization as the culprit.
    The problem stems from the fact that results are not evaluated in
    chronological order (looking at when they were introduced to the
    graph).

    """

    checked_variables = set()

    def check_variable_norec(new_r):
        """
        Verify that `r` has the same value as the results it replaces.

        """
        for reason, r, old_graph_str, new_graph_str in reasons[new_r]:
            new_r_val = r_vals[new_r]
            r_val = r_vals[r]

            if (r.type != new_r.type) or (not r.type.values_eq_approx(
                    r_val, new_r_val)):
                raise BadOptimization(old_r=r,
                                      new_r=new_r,
                                      old_r_val=r_val,
                                      new_r_val=new_r_val,
                                      reason=reason,
                                      old_graph=old_graph_str,
                                      new_graph=new_graph_str)

    def check_variable(r):
        if r in checked_variables:
            return
        checked_variables.add(r)

        # (recursively) first check all the variables that could make
        # r look bad:
        list_of_vars = [old_r for (reason, old_r, olds, news) in reasons[r]]
        if (None is not r.owner):
            list_of_vars += r.owner.inputs

        for var_that_could_make_r_look_bad in list_of_vars:
            check_variable(var_that_could_make_r_look_bad)

        check_variable_norec(r)

    # iterate over variables looking for values that don't match the
    # values of the variables they replaced.  This is the sign of a
    # broken optimization.
    for i, node in enumerate(order):
        for new_r in node.outputs:
            check_variable(new_r)

_find_bad_optimizations = _find_bad_optimizations0


def _get_preallocated_maps(node, thunk, prealloc_modes, def_val,
                           storage_map, r_vals, dr_vals, perform,
                           active_order_set, inplace_outs, init_outputs):
    """
    Preallocate outputs in different memory layouts.

    """

    # To avoid circular imports
    from theano.tensor import TensorType
    from theano.sandbox.cuda import cuda_available, CudaNdarrayType
    if cuda_available:
        from theano.sandbox.cuda import CudaNdarray
        from theano.sandbox.cuda import dimshuffle as cuda_dimshuffle

    # TODO: Sparse? Scalar does not really make sense.

    # Do not preallocate memory for outputs that actually work inplace
    considered_outputs = []
    for r in node.outputs:
        if r not in inplace_outs:
            considered_outputs.append(r)

    # Output storage that was initially present in the storage_map
    if 'initial' in prealloc_modes or 'ALL' in prealloc_modes:
        initial_outputs = {}
        for r in considered_outputs:
            if r in init_outputs:
                initial_outputs[r] = init_outputs[r]

        if initial_outputs:
            yield ('initial', initial_outputs)

    # reuse_output: use a copy of the same storage returned the first time
    # TODO: optimization warning if the storage in reuse_outputs
    # is not reused
    if 'previous' in prealloc_modes or 'ALL' in prealloc_modes:
        reuse_outputs = {}
        for r in considered_outputs:
            # We want to reuse the exact same memory buffer,
            # so we keep the copy in r_vals
            new_r = _lessbroken_deepcopy(r_vals[r])
            reuse_outputs[r] = r_vals[r]
            r_vals[r] = new_r
            # Sometimes, outputs can be aliased together.
            # I'm not sure why it is legitimate, but there are tests about it.
            # So, we cannot fill r_vals[r] with def_val yet, we have to wait
            # until all output values are deepcopied.

        for r in considered_outputs:
            # There is no risk to overwrite inputs, since r does not work
            # inplace.
            if isinstance(r.type, (TensorType, CudaNdarrayType)):
                reuse_outputs[r][...] = numpy.asarray(
                    def_val).astype(r.type.dtype)

        if reuse_outputs:
            yield ('previous', reuse_outputs)
        # clear memory that is not needed any more
        del reuse_outputs

    # c_cont_output: use a c-continuous array
    # (for TensorType and CudaNdarray, else None)
    if 'c_contiguous' in prealloc_modes or 'ALL' in prealloc_modes:
        c_cont_outputs = {}
        for r in considered_outputs:
            if isinstance(r.type, (TensorType, CudaNdarrayType)):
                # Build a C-contiguous buffer
                new_buf = r.type.value_zeros(r_vals[r].shape)
                # CudaNdarray don't have flags field
                # assert new_buf.flags["C_CONTIGUOUS"]
                new_buf[...] = numpy.asarray(def_val).astype(r.type.dtype)

                c_cont_outputs[r] = new_buf

        if len(c_cont_outputs):
            yield ('c_contiguous', c_cont_outputs)
            del c_cont_outputs

    # f_cont_output: use a fortran-continuous ndarray
    # (for TensorType, only)
    if 'f_contiguous' in prealloc_modes or 'ALL' in prealloc_modes:
        f_cont_outputs = {}
        for r in considered_outputs:
            if isinstance(r.type, (TensorType, CudaNdarrayType)):
                new_buf = numpy.zeros(
                    shape=r_vals[r].shape,
                    dtype=r_vals[r].dtype,
                    order='F')
                new_buf[...] = def_val
                if isinstance(r.type, CudaNdarrayType):
                    # When the CudaNdarray is built, the underlying memory
                    # is c-contiguous, so we transpose it before and after.
                    new_buf = CudaNdarray(new_buf.T)
                    new_buf = cuda_dimshuffle(
                        new_buf, reversed(list(range(new_buf.ndim))))

                f_cont_outputs[r] = new_buf

        if len(f_cont_outputs):
            yield ('f_contiguous', f_cont_outputs)
            del f_cont_outputs

    # We assume that the different outputs of a same Op will behave
    # independently, and there is no need to test over all combinations
    # of outputs (the time taken is prohibitive).
    # When all outputs on a certain dimension are broadcastable, the Op
    # can assume that the shape is 1 on that dimension, and stride testing
    # is less relevant.
    # Dimensions should be align by the innermost index, so we iterate
    # from the end of shapes.
    if ('strided' in prealloc_modes or
            'wrong_size' in prealloc_modes or
            'ALL' in prealloc_modes):
        max_ndim = 0
        rev_out_broadcastable = []
        for r in considered_outputs:
            if isinstance(r.type, (TensorType, CudaNdarrayType)):
                if max_ndim < r.ndim:
                    rev_out_broadcastable += [True] * (r.ndim - max_ndim)
                    max_ndim = r.ndim
                assert len(rev_out_broadcastable) == max_ndim

                for i, b in enumerate(r.broadcastable[::-1]):
                    rev_out_broadcastable[i] = rev_out_broadcastable[i] and b
        out_broadcastable = rev_out_broadcastable[::-1]

    if 'strided' in prealloc_modes or 'ALL' in prealloc_modes:
        check_ndim = config.DebugMode.check_preallocated_output_ndim
        # Initial allocation
        init_strided = {}
        for r in considered_outputs:
            if isinstance(r.type, (TensorType, CudaNdarrayType)):
                # Create a buffer twice as large in every dimension,
                # except if broadcastable, or for dimensions above
                # config.DebugMode.check_preallocated_output_ndim
                buf_shape = []
                for s, b in zip(r_vals[r].shape, r.broadcastable):
                    if b or ((r.ndim - len(buf_shape)) > check_ndim):
                        buf_shape.append(s)
                    else:
                        buf_shape.append(s * 2)
                new_buf = r.type.value_zeros(buf_shape)
                new_buf[...] = numpy.asarray(def_val).astype(r.type.dtype)
                init_strided[r] = new_buf

        # The number of combinations is exponential in the number of
        # dimensions, and some ops can have tens of outputs. To prevent
        # tests from lasting days, we use the same strides for all
        # dimensions but the last check_ndim ones.
        # Moreover, to avoid memory problems, we do not test with strides
        # 2 and -2 on those dimensions.
        step_signs_list = []
        for b in out_broadcastable[-check_ndim:]:
            if b:
                step_signs_list.append((1,))
            else:
                step_signs_list.append((-1, 1))

        # Use the same step on all dimensions before the last check_ndim.
        if all(out_broadcastable[:-check_ndim]):
            step_signs_list = [(1,)] + step_signs_list
        else:
            step_signs_list = [(-1, 1)] + step_signs_list

        for step_signs in itertools_product(*step_signs_list):
            for step_size in (1, 2):
                strided = {}

                # First, the dimensions above check_ndim, then the other ones
                # Do not test with 2 or -2 for dimensions above check_ndim
                steps = [step_signs[0]] * len(out_broadcastable[:-check_ndim])
                steps += [s * step_size for s in step_signs[1:]]

                name = 'strided%s' % str(tuple(steps))
                for r in considered_outputs:
                    if r in init_strided:
                        strides = []
                        shapes = []
                        for i, size in enumerate(r_vals[r].shape):
                            shapes.append(slice(None, size, None))
                            strides.append(slice(None, None, steps[i]))

                        r_buf = init_strided[r]

                        if r_buf.ndim > 0:
                            r_buf = r_buf[tuple(strides)][tuple(shapes)]
                        assert r_buf.shape == r_vals[r].shape

                        r_buf[...] = numpy.asarray(def_val).astype(r_buf.dtype)
                        strided[r] = r_buf

                if strided:
                    yield (name, strided)
                del strided

    if 'wrong_size' in prealloc_modes or 'ALL' in prealloc_modes:
        # For each dimension, try size-1, size, size+1
        for dim, b in enumerate(out_broadcastable):
            if b:
                # The shape has to be 1
                continue

            shape_diff = [0] * max_ndim
            for diff in (-1, 1):
                shape_diff[dim] = diff

                wrong_size = {}
                name = 'wrong_size%s' % str(tuple(shape_diff))

                for r in considered_outputs:
                    if isinstance(r.type, (TensorType, CudaNdarrayType)):
                        r_shape_diff = shape_diff[:r.ndim]
                        out_shape = [max((s + sd), 0)
                                     for s, sd in zip(r_vals[r].shape,
                                                      r_shape_diff)]
                        new_buf = r.type.value_zeros(out_shape)
                        new_buf[...] = numpy.asarray(
                            def_val).astype(r.type.dtype)
                        wrong_size[r] = new_buf

                if wrong_size:
                    yield (name, wrong_size)
                del wrong_size


def _check_preallocated_output(node, thunk, prealloc_modes, def_val,
                               storage_map, r_vals, dr_vals, perform,
                               active_order_set, inplace_outs, init_outputs):
    """
    Try to apply thunk() on different output storages.

    """

    # If node has an inner compiled Theano function with mode DebugMode,
    # disable memory checks in that mode, since they were already run.
    try:
        changed_inner_mode = False
        if type(getattr(node, 'op', None)) in ops_with_inner_function:
            fn_attr_name = ops_with_inner_function[type(node.op)]
            fn = getattr(node.op, fn_attr_name, None)
            if (not fn or
                    not hasattr(fn, 'maker') or
                    not hasattr(fn.maker, 'mode')):
                _logger.warn('Expected theano function not found in %s.%s',
                             node.op, fn_attr_name)
            else:
                if isinstance(fn.maker.mode, DebugMode):
                    backup_mode = fn.maker.mode
                    new_mode = copy.copy(backup_mode)
                    # Disactivate as many checks as possible
                    new_mode.check_py_code = False
                    new_mode.check_isfinite = False
                    new_mode.require_matching_strides = 0
                    new_mode.check_preallocated_output = []
                    new_mode.stability_patience = 1
                    fn.maker.mode = new_mode
                    changed_inner_mode = True
                    _logger.info('changing inner mode')

        # Set of inputs that are marked as destroyed or viewed
        aliased_inputs = set()
        dmap = getattr(node.op, 'destroy_map', {})
        vmap = getattr(node.op, 'view_map', {})
        for i, r in enumerate(node.inputs):
            if any(i in v for v in chain(itervalues(dmap), itervalues(vmap))):
                aliased_inputs.add(r)

        _logger.debug('starting preallocated output checking')
        for (name, out_map) in _get_preallocated_maps(
                node, thunk, prealloc_modes, def_val, storage_map, r_vals,
                dr_vals, perform, active_order_set, inplace_outs,
                init_outputs):
            _logger.debug('  name = %s', name)

            thunk_name = '%s with %s output' % (perform, name)

            if not out_map:
                # Map is empty, there is no need to execute thunk() again
                _logger.warn('%s: out_map is empty', name)
                continue

            # Copy the inputs over, if they were marked as destroyed or viewed
            # (we will destroy the output at some point so it can destroy
            # the input)
            for r in aliased_inputs:
                storage_map[r][0] = _lessbroken_deepcopy(r_vals[r])

            # Get the appropriate output storages
            # (no copy)
            for r in node.outputs:
                storage_map[r][0] = out_map.get(r, None)

            thunk()

            # Check outputs
            for r in node.outputs:
                if not r.type.is_valid_value(storage_map[r][0]):
                    raise InvalidValueError(
                        r, storage_map[r][0],
                        hint=thunk_name,
                        specific_hint=r.type.value_validity_msg(
                            storage_map[r][0]))

            _check_inputs(node, storage_map, r_vals, dr_vals, active_order_set,
                          clobber_dr_vals=False,
                          perform=thunk_name,
                          warn_input_not_reused=False)

            _check_viewmap(node, storage_map)

            for r in node.outputs:
                if not check_eq(r, r_vals[r], storage_map[r][0]):
                    # TODO: indicate it is not a C/Py problem
                    inputs_val = [storage_map[inp][0] for inp in
                                  r.owner.inputs]
                    raise BadThunkOutput(r,
                                         thunk1='Reference value',
                                         val1=r_vals[r],
                                         thunk2=thunk_name,
                                         val2=storage_map[r][0],
                                         inputs_val=inputs_val)

            # Clear storage_map
            for r in node.outputs:
                storage_map[r][0] = None

        _logger.debug('finished preallocated output checking')
    finally:
        if changed_inner_mode:
            _logger.info('changing mode back')
            fn.maker.mode = backup_mode


class _FunctionGraphEvent(object):
    """
    A record of an event in the life of an FunctionGraph.

    The __eq__ function is important here, as it is the basis for
    comparing optimization runs.

    """

    kind = ""
    """
    One of 'import', 'change', 'prune'.

    """

    node = None
    """
    Either 'output' or an Apply instance.

    """

    op = None
    """Either 'output' or an Op instance"""

    idx = None
    """
    Change events involve an position index of the input variable.

    """

    reason = None
    """
    Change events sometimes have a reason.

    """

    def __init__(self, kind, node, idx=None, reason=None):
        self.kind = kind
        if node == 'output':
            self.node = 'output'
            self.op = 'output'
        else:
            self.node = node
            self.op = node.op
        self.idx = idx
        self.reason = reason

    def __str__(self):
        if self.kind == 'change':
            if (self.op != 'output'):
                msg = str(len(self.node.inputs))
            else:
                msg = ''

            return ' '.join(['change',
                             self.reason,
                             str(self.op),
                             str(self.idx),
                             msg])
        else:
            return str(self.__dict__)

    def __eq__(self, other):
        rval = type(self) == type(other)
        if rval:
            # nodes are not compared because this comparison is
            # supposed to be true for corresponding events that happen
            # in different FunctionGraph instances (different graphs)
            for attr in ['kind', 'op', 'idx', 'reason']:
                rval = rval and getattr(self, attr) == getattr(other, attr)
        return rval

    def __ne__(self, other):
        return not (self == other)


class _VariableEquivalenceTracker(object):
    """
    A FunctionGraph Feature that keeps tabs on an FunctionGraph and
    tries to detect problems.

    """

    fgraph = None
    """WRITEME"""

    equiv = None
    """WRITEME"""

    active_nodes = None
    """WRITEME"""

    inactive_nodes = None
    """WRITEME"""

    all_variables_ever = None
    """WRITEME"""

    reasons = None
    """WRITEME"""

    replaced_by = None
    """WRITEME"""

    event_list = None
    """WRITEME"""

    def __init__(self):
        self.fgraph = None

    def on_attach(self, fgraph):
        assert self.fgraph is None
        self.equiv = {}
        self.active_nodes = set()
        self.inactive_nodes = set()
        self.fgraph = fgraph
        self.all_variables_ever = []
        self.reasons = {}
        self.replaced_by = {}
        self.event_list = []
        for node in fgraph.toposort():
            self.on_import(fgraph, node, "on_attach")

    def on_detach(self, fgraph):
        assert fgraph is self.fgraph
        self.fgraph = None

    def on_prune(self, fgraph, node, reason):
        self.event_list.append(_FunctionGraphEvent('prune', node,
                                                   reason=reason))
        assert node in self.active_nodes
        assert node not in self.inactive_nodes
        self.active_nodes.remove(node)
        self.inactive_nodes.add(node)

    def on_import(self, fgraph, node, reason):
        self.event_list.append(_FunctionGraphEvent('import', node,
                                                   reason=reason))

        assert node not in self.active_nodes
        self.active_nodes.add(node)

        if node in self.inactive_nodes:
            self.inactive_nodes.remove(node)
            for r in node.outputs:
                assert r in self.equiv
        else:
            for r in node.outputs:
                assert r not in self.equiv
                self.equiv[r] = set([r])
                self.all_variables_ever.append(r)
                self.reasons.setdefault(r, [])
                self.replaced_by.setdefault(r, [])
            for r in node.inputs:
                self.reasons.setdefault(r, [])
                self.replaced_by.setdefault(r, [])

    def on_change_input(self, fgraph, node, i, r, new_r, reason=None):
        self.event_list.append(_FunctionGraphEvent('change', node,
                                                   reason=str(reason), idx=i))

        self.reasons.setdefault(new_r, [])
        self.replaced_by.setdefault(new_r, [])

        append_reason = True
        for tup in self.reasons[new_r]:
            if tup[0] == reason and tup[1] is r:
                append_reason = False

        if append_reason:
            # N.B. compute the debugprint now, because future
            # optimizations will change the graph
            done = dict()
            self.reasons[new_r].append(
                (reason,
                 r,
                 debugprint(r, prefix='  ', depth=6,
                            file=StringIO(), done=done).getvalue(),
                 debugprint(new_r, prefix='  ', depth=6,
                            file=StringIO(), done=done).getvalue()))
            self.replaced_by[r].append((reason, new_r))

        if r in self.equiv:
            r_set = self.equiv[r]
        else:
            r_set = self.equiv.setdefault(r, set([r]))
            self.all_variables_ever.append(r)

        if new_r in self.equiv:
            new_r_set = self.equiv[new_r]
        else:
            new_r_set = self.equiv.setdefault(new_r, set([new_r]))
            self.all_variables_ever.append(new_r)

        assert new_r in new_r_set
        assert r in r_set

        # update one equivalence set to contain the other
        # transfer all the elements of the old one to the new one
        r_set.update(new_r_set)
        for like_new_r in new_r_set:
            self.equiv[like_new_r] = r_set
            assert like_new_r in r_set

        assert self.equiv[r] is r_set
        assert self.equiv[new_r] is r_set

    def printstuff(self):
        for key in self.equiv:
            print(key)
            for e in self.equiv[key]:
                print('  ', e)


# List of default version of make thunk.
# This is needed to know if the user overrided it.
# The GpuOp will be added here when theano.sandbox.cuda is imported.
default_make_thunk = [get_unbound_function(theano.gof.Op.make_thunk)]


# Debug mode cheats and initializes the linker in a different way in
# its maker so we can cheat some more by having a linker to satisfy
# the external requirements of the .linker attribute of a mode
# 1) it's a class instance
# 2) it a has a .clone() method
class _DummyLinker(object):
    # This is not a real linker anyway
    def clone(self, allow_gc=None):
        return self


class _Linker(gof.link.LocalLinker):
    """
    Special debugging linker.

    """

    def __init__(self, maker, schedule=None):
        super(gof.LocalLinker, self).__init__()
        self.fgraph = None
        self.maker = maker
        if schedule:
            self.schedule = schedule

    def accept(self, fgraph, no_recycling=None):
        if no_recycling is None:
            no_recycling = []
        if self.fgraph is not None and self.fgraph is not fgraph:
            assert type(self) is _Linker
            return type(self)(maker=self.maker).accept(fgraph, no_recycling)
        self.fgraph = fgraph
        self.no_recycling = no_recycling
        return self

    def make_all(self, profiler=None, input_storage=None,
                 output_storage=None, storage_map=None):
        # can't import at toplevel because of circular import TODO:
        # don't do this ugly hacky way of setting the
        # filter_checks_isfinite
        from theano.tensor import TensorType  # to set filter_check_isfinite

        fgraph = self.fgraph
        input_storage_ = input_storage
        output_storage_ = output_storage

        # Compute a topological ordering that IGNORES the destroy_map
        # of destructive Ops.  This will be OK, because every thunk is
        # evaluated on a copy of its input.
        fgraph_equiv = fgraph.equivalence_tracker
        order_outputs = copy.copy(fgraph_equiv.all_variables_ever)
        del fgraph_equiv
        order_outputs.reverse()
        order = graph.io_toposort(fgraph.inputs, order_outputs)

        # an ordering of just the active nodes
        active_order = self.schedule(fgraph)
        active_order_set = set(active_order)

        # Disable no_recycling, in order to be able to use
        # check_preallocated_output even on the output of the function.
        # no_recycling in individual thunks does not really matter, since
        # the function's outputs will always be freshly allocated.
        no_recycling = []

        input_storage, output_storage, storage_map = link.map_storage(
            fgraph, order, input_storage_, output_storage_, storage_map)

        thunks_py = []  # python thunks
        thunks_c = []  # c thunks

        for node in order:
            compute_map = {}
            for k in node.inputs:
                compute_map[k] = [True]
            for k in node.outputs:
                compute_map[k] = [False]

            # Some Ops define a make_thunk with the expectation that
            # it will be called before the C code is compiled, because
            # the compilation of some dependency is triggered there.
            thunk_other = None

            if (get_unbound_function(node.op.make_thunk) not in
                    default_make_thunk):
                thunk = node.op.make_thunk(node,
                                           storage_map,
                                           compute_map,
                                           no_recycling)
                thunk.inputs = [storage_map[v] for v in node.inputs]
                thunk.outputs = [storage_map[v] for v in node.outputs]
                thunk_other = thunk
            else:
                new_node = node.op.prepare_node(node, storage_map, compute_map)
                if new_node is not None:
                    node = new_node

            debug = hasattr(node.op, 'debug_perform')

            try:
                if not self.maker.mode.check_c_code or debug:
                    raise utils.MethodNotDefined()
                # Ops that do not inherit from gof.op.Op don't have certain
                # methods defined that the CLinker expects (Scan is an
                # example, ifelse is another of such classes that inherit
                # directly from PureOp)
                if not isinstance(node.op, gof.op.Op):
                    raise utils.MethodNotDefined()

                thunk = node.op.make_c_thunk(node, storage_map, compute_map,
                                             no_recycling)
                thunks_c.append(thunk)
            except (NotImplementedError, utils.MethodNotDefined):
                thunks_c.append(None)

            # Pure ops don't really have a perform ( or their perform just
            # raises an not implemented exception), so in those cases we
            # consider that we don't have a python implementation
            if (((self.maker.mode.check_py_code or thunks_c[-1] is None) and
                 node.op.perform.__code__ != gof.op.PureOp.perform.__code__) or
                    debug):
                thunk = node.op.make_py_thunk(node, storage_map, compute_map,
                                              no_recycling, debug=debug)
                thunks_py.append(thunk)
            else:
                thunks_py.append(None)

            if not self.maker.mode.check_c_code and thunks_py[-1] is None:
                _logger.warn("Op %s doesn't have a perform, "
                             "forcing check of the C code" % node.op)
                thunk = node.op.make_c_thunk(node, storage_map, compute_map,
                                             no_recycling)
                thunks_c[-1] = thunk

            # If the op defined its own make_thunk, use the generated thunk
            if thunk_other is not None:
                if thunks_py[-1] is None:
                    thunks_py[-1] = thunk_other
                elif thunks_c[-1] is None:
                    thunks_c[-1] = thunk_other
                else:
                    _logger.warn("We won't check the perform function "
                                 "of node '%s' but we will check its "
                                 "make_thunk function" % node)
                    thunks_py[-1] = thunk_other

        # Use self.no_recycling (that was passed in accept()) to always
        # use new memory storage when it is needed, in particular for the
        # function's outputs. no_recycling_map will be used in f() below.
        if self.no_recycling is True:
            no_recycling_map = list(storage_map.values())
            no_recycling_map = utils.difference(no_recycling_map,
                                                input_storage)
        else:
            no_recycling_map = [storage_map[r] for r in self.no_recycling
                                if r not in fgraph.inputs]

        # Precompute some things for storage pre-allocation
        def_val = int(config.unittests.rseed)

        #####
        # This is the function that runs when you evaluate the graph
        #####
        def f():
            ####
            # Note: `f` ignores the compute_map and evaluates the nodes in
            # topological order. In some sense, this is ok, and can be used
            # for now.
            #####
            _logger.debug("starting a DebugMode call")
            _logger.debug("self.maker.mode.check_preallocated_output: %s",
                          self.maker.mode.check_preallocated_output)
            for x in no_recycling_map:
                x[0] = None

            # nest all this in try-finally to put storage *back* into
            # storage_map when an exception is raised
            original_storage_map_keys = [r for r in storage_map
                                         if r.owner is None]

            try:
                # r_vals are the true values associated with each
                # variable in the graph they should not change during
                # the evaluation of this function, even when the graph
                # has destructive ops in it
                #
                # This dictionary is used to populate the storage_map
                # as necessary
                r_vals = {}

                # dr_vals are the values taken by variables after
                # being destroyed
                dr_vals = {}
                assert len(thunks_py) == len(order)

                # transfer the initial values from the storage_map to
                # the r_vals
                _logger.debug("DEBUGMODE: transfer initial values")
                # r_vals_initialized keeps track of the values that have
                # actually been transferred from storage_map to r_vals
                r_vals_initialized = []
                for r in storage_map:
                    if (r.owner is None):
                        if not r.type.is_valid_value(storage_map[r][0]):
                            # None may be a valid input value (for instance,
                            # for a Generic object). We only want to raise
                            # an error if it is not valid.
                            if (storage_map[r][0] is None):
                                raise InvalidValueError(
                                    r, storage_map[r][0],
                                    hint=("Graph Input '%s' is missing" %
                                          str(r)))
                            raise InvalidValueError(
                                r, storage_map[r][0],
                                hint=("Graph Input '%s' has invalid value "
                                      "%s" % (r, storage_map[r][0])))
                        r_vals[r] = storage_map[r][0]
                        storage_map[r][0] = None
                        r_vals_initialized.append(r)

                # store preallocated outputs in another map, and test
                # the thunks on them as output storages.
                init_outputs = {}
                for r in storage_map:
                    if r in fgraph.outputs:
                        if storage_map[r][0] is not None:
                            init_outputs[r] = storage_map[r][0]
                            storage_map[r][0] = None

                #####
                #  Precondition: the storage map is empty, transferred
                #  completely to r_vals
                #####
                for r, s in iteritems(storage_map):
                    if s[0] is not None:
                        print(r, s)
                    assert s[0] is None

                # try:
                # compute the value of all variables
                for i, (thunk_py, thunk_c, node) in enumerate(zip(thunks_py,
                                                                  thunks_c,
                                                                  order)):
                    _logger.debug("%i - starting node %i %s", i, i, node)

                    # put a copy of each input into the storage_map
                    # also, check that inputs have valid values
                    for r in node.inputs:
                        assert isinstance(r, gof.Variable)
                        assert r in r_vals
                        storage_map[r][0] = _lessbroken_deepcopy(r_vals[r])
                        if not r.type.is_valid_value(storage_map[r][0]):
                            raise InvalidValueError(r, storage_map[r][0],
                                                    client_node=node)

                    # On the first call to thunk_py(), its output
                    # storage will be None
                    if thunk_py:
                        _logger.debug("%i - running thunk_py with None as "
                                      "output storage", i)
                        try:
                            thunk_py()
                        except (utils.MethodNotDefined, NotImplementedError):
                            # shouldn't have put it into the list in
                            # the first place
                            thunk_py = None
                            thunks_py[i] = None
                        except Exception as e:
                            # I think that only 1 optimization can
                            # insert a given apply node. If that is not True,
                            # we would need to loop over all node outputs,
                            # But this make the output uglier.
                            reason = fgraph.equivalence_tracker.reasons[
                                node.outputs[0]]
                            if not reason:
                                raise
                            opt = str(reason[0][0])
                            msg = (
                                "An optimization (probably %s) inserted an "
                                "apply node that raise an error." % opt +
                                "\nThe information we have about this "
                                "optimizations is:" + str(reason[0][1]) +
                                "\n" + reason[0][2] +
                                "\n\nThe original exception: \n" + str(e))
                            new_e = e.__class__(msg)
                            exc_type, exc_value, exc_trace = sys.exc_info()
                            exc_value = new_e
                            raise_with_op(node, thunk_c,
                                          (exc_type, exc_value, exc_trace))

                    if thunk_py:
                        # check output values for type-correctness
                        for r in node.outputs:
                            if not r.type.is_valid_value(storage_map[r][0]):
                                hint2 = r.type.value_validity_msg(
                                    storage_map[r][0])
                                raise InvalidValueError(r, storage_map[r][0],
                                                        hint='perform output',
                                                        specific_hint=hint2)
                        warn_inp = config.DebugMode.warn_input_not_reused
                        py_inplace_outs = _check_inputs(
                            node, storage_map, r_vals, dr_vals,
                            active_order_set,
                            clobber_dr_vals=True, perform='py',
                            warn_input_not_reused=warn_inp)
                        _check_viewmap(node, storage_map)

                        # Retrieve each output from the storage_map.
                        # The return values of this first run will be
                        # the reference ones
                        for r in node.outputs:
                            assert r not in r_vals
                            r_vals[r] = storage_map[r][0]
                            # clear the storage_map of outputs for the thunk_c
                            storage_map[r][0] = None

                        if self.maker.mode.check_preallocated_output:
                            prealloc_modes = \
                                self.maker.mode.check_preallocated_output
                            _logger.debug(
                                '%i - calling _check_preallocated_output '
                                'with thunk_py', i)
                            _check_preallocated_output(
                                node=node,
                                thunk=thunk_py,
                                prealloc_modes=prealloc_modes,
                                def_val=def_val,
                                storage_map=storage_map,
                                r_vals=r_vals,
                                dr_vals=dr_vals,
                                perform='py',
                                active_order_set=active_order_set,
                                inplace_outs=py_inplace_outs,
                                init_outputs=init_outputs)

                        sys.stdout.flush()

                    if thunk_c:

                        clobber = True
                        if thunk_py:
                            dmap = getattr(node.op, 'destroy_map', {})
                            vmap = getattr(node.op, 'view_map', {})
                            for i, r in enumerate(node.inputs):
                                # if thunk_py ran, and we still got
                                # this far, it means that the
                                # destroy_map of the Op (and view_map)
                                # are accurate so we can assume that
                                # inputs not marked as destroyed have
                                # in fact not been destroyed.
                                # Therefore... we only need to
                                # overwrite inputs that *have* been
                                # marked as destroyed.  Inputs marked
                                # as viewd are unsafe too, because the
                                # corresponding output can be
                                # destroyed.
                                if any(i in v for v in chain(dmap.values(),
                                                             vmap.values())):
                                    storage_map[r][0] = _lessbroken_deepcopy(
                                        r_vals[r])

                            clobber = False

                        _logger.debug("%i - running thunk_c", i)
                        # First time, with None in output_storage
                        try:
                            thunk_c()
                        except Exception as e:
                            # I think that only 1 optimization can
                            # insert a given apply node. If that is not True,
                            # we would need to loop over all node outputs,
                            # But this make the output uglier.
                            reason = fgraph.equivalence_tracker.reasons[
                                node.outputs[0]]
                            if not reason:
                                raise
                            opt = str(reason[0][0])
                            msg = (
                                "An optimization (probably %s) inserted "
                                "an apply node that raise an error." % opt +
                                "\nThe information we have about this "
                                "optimizations is:" + str(reason[0][1]) +
                                "\n" + reason[0][2] +
                                "\n\nThe original exception: \n" + str(e))
                            new_e = e.__class__(msg)
                            exc_type, exc_value, exc_trace = sys.exc_info()
                            exc_value = new_e
                            raise_with_op(node, thunk_c,
                                          (exc_type, exc_value, exc_trace))

                        for r in node.outputs:
                            # check output values for type-correctness
                            if not r.type.is_valid_value(storage_map[r][0]):
                                raise InvalidValueError(r, storage_map[r][0],
                                                        hint='c output')

                            if thunk_py:
                                # because we put it in during the
                                # thunk_py branch
                                assert r in r_vals
                                # check for stride correctness (may
                                # raise exception)
                                _check_strides_match(
                                    r_vals[r], storage_map[r][0],
                                    self.maker.mode.require_matching_strides,
                                    node.op)

                        warn_inp = config.DebugMode.warn_input_not_reused
                        c_inplace_outs = _check_inputs(
                            node, storage_map, r_vals,
                            dr_vals, active_order_set,
                            clobber_dr_vals=clobber, perform='c',
                            warn_input_not_reused=warn_inp)

                        _check_viewmap(node, storage_map)

                        # Check with Python result
                        for r in node.outputs:
                            if r in r_vals:
                                # compares the version from thunk_py
                                # (in r_vals) to the version produced
                                # by thunk_c (in storage_map)
                                if not check_eq(r, r_vals[r],
                                                storage_map[r][0]):
                                    inputs_val = [storage_map[inp][0]
                                                  for inp in r.owner.inputs]
                                    raise BadThunkOutput(
                                        r, thunk1='perform', val1=r_vals[r],
                                        thunk2='c_code',
                                        val2=storage_map[r][0],
                                        inputs_val=inputs_val)
                            else:
                                # retrieve each output from the storage_map
                                r_vals[r] = storage_map[r][0]
                            # clear the storage_map for the thunk_c
                            storage_map[r][0] = None

                        if self.maker.mode.check_preallocated_output:
                            prealloc_modes = \
                                self.maker.mode.check_preallocated_output

                            def thunk():
                                try:
                                    thunk_c()
                                except Exception:
                                    raise_with_op(node, thunk_c)
                            _logger.debug(
                                '%i - calling _check_preallocated_output '
                                'with thunk_c', i)
                            _check_preallocated_output(
                                node=node,
                                thunk=thunk,
                                prealloc_modes=prealloc_modes,
                                def_val=def_val,
                                storage_map=storage_map,
                                r_vals=r_vals,
                                dr_vals=dr_vals,
                                perform='c code',
                                active_order_set=active_order_set,
                                inplace_outs=c_inplace_outs,
                                init_outputs=init_outputs)

                        sys.stdout.flush()

                    # we're done with this thunk
                    # clear everything out of the storage_map
                    for r in node.inputs:
                        storage_map[r][0] = None
                    _logger.debug("%i - done with node", i)
                    for r in node.outputs:
                        if r not in r_vals:
                            idx = order.index(node)
                            assert thunks_py[idx] is None, node
                            assert thunks_c[idx] is None, node
                            raise Exception("No code run for %s" % node)

                if False:
                    # This could be useful to help finding refcount problem.
                    # But it is very slow and it is not sure it will help.
                    gc.collect()

                _find_bad_optimizations(order,
                                        fgraph.equivalence_tracker.reasons,
                                        r_vals)

                #####
                #  Postcondition: the input and output variables are
                #  in the storage map, nothing more
                #####

                # Nothing should be in storage map after evaluating
                # each the thunk (specifically the last one)
                for r, s in iteritems(storage_map):
                    assert type(s) is list
                    assert s[0] is None

                # store our output variables to their respective storage lists
                for output, storage in zip(fgraph.outputs, output_storage):
                    storage[0] = r_vals[output]

                # transfer all inputs back to their respective storage lists
                for r in r_vals:
                    if r.owner is None:
                        if r in fgraph.inputs:
                            assert (storage_map[r] is
                                    input_storage[fgraph.inputs.index(r)])
                        storage_map[r][0] = r_vals[r]

                # if an input was destroyed, the destroyed value
                # should be returned
                for r in dr_vals:
                    assert dr_vals[r][0] is not None
                    if r.owner is None:
                        assert r in fgraph.inputs
                        # HACK TO LOOK LIKE A REAL DESTRUCTIVE ACTION
                        # TOOK PLACE
                        if ((type(dr_vals[r][0]) in
                             (numpy.ndarray, numpy.memmap)) and
                            (dr_vals[r][0].dtype ==
                             storage_map[r][0].dtype) and
                            (dr_vals[r][0].shape ==
                             storage_map[r][0].shape)):
                            if len(dr_vals[r][0].shape):
                                storage_map[r][0][:] = dr_vals[r][0]
                            else:
                                storage_map[r][0].itemset(dr_vals[r][0])
                        else:
                            storage_map[r][0] = dr_vals[r][0]
            except Exception:
                # Restore the initial state of storage_map
                for r in storage_map:
                    if r in original_storage_map_keys:
                        # If r was transferred to r_vals, put it back
                        if r in r_vals_initialized:
                            storage_map[r][0] = r_vals[r]
                    else:
                        # clear out any partially-computed stuff
                        storage_map[r][0] = None
                raise

            for r in storage_map:
                if (r.owner is None):
                    if not r.type.is_valid_value(None):
                        assert storage_map[r][0] is not None

            ###############
            # Done debugmode function call 'f'
            ##############

        def run_with_tensortype_filter_check(f):
            def deco():
                # WARNING: this is a global mechanism...
                # so it will screw up if we are trying to use
                # multiple modes at once.
                old_filter_checks_isfinite = TensorType.filter_checks_isfinite
                TensorType.filter_checks_isfinite = \
                    self.maker.mode.check_isfinite
                try:
                    return f()
                finally:
                    # put back the filter_checks_isfinite
                    TensorType.filter_checks_isfinite = \
                        old_filter_checks_isfinite
            return deco

        f = run_with_tensortype_filter_check(f)
        f.storage_map = storage_map
        f.allow_gc = True
        assert len(fgraph.inputs) == len(input_storage)
        assert len(fgraph.outputs) == len(output_storage)
        return (f,
                [link.Container(input, storage, readonly=False)
                 for input, storage in zip(fgraph.inputs, input_storage)],
                [link.Container(output, storage, readonly=True)
                 for output, storage in zip(fgraph.outputs, output_storage)],
                thunks_py, order)


_NODEFAULT = ['NODEFAULT']


class _Maker(FunctionMaker):  # inheritance buys a few helper functions
    """
    Special debugging FunctionMaker.

    Parameters
    ----------
    inputs : list of SymbolicInput instances
    outputs : list of SymbolicOutput instances
        Outputs may also be a single Variable (not a list), in which case
        the functions produced by FunctionMaker will return their output
        value directly.
    accept_inplace
        True iff it is acceptable to have inplace operations in the graph from
        the inputs to the outputs.
    on_unused_input
        What to do if a variable in the 'inputs' list is not used in the
        graph. Possible values are 'raise', 'warn' and 'ignore'.
    output_keys
        If the outputs argument for theano.function was a list, then
        output_keys is None. If the outputs argument was a dict, then
        output_keys is a sorted list of the keys from that dict.

    Notes
    -----
    The constructor sets TensorType.filter_checks_isfinite when
    `mode.check_isfinite` is True.

    """

    verbose = 0
    """
    Verbosity level of compile-time and run-time checks. (Default 0: silent).

    """

    def __init__(self, inputs, outputs, mode,
                 accept_inplace=False,
                 function_builder=Function,
                 profile=None,
                 on_unused_input=None,
                 fgraph=None,  # If present the optimized graph. we ignore it.
                 output_keys=None):
        self.profile = profile
        optimizer = mode.optimizer
        # Handle the case where inputs and/or outputs is a single
        # Variable (not in a list)
        unpack_single = False
        return_none = False
        if outputs is None:
            return_none = True
            outputs = []
        if not isinstance(outputs, (list, tuple)):
            unpack_single = True
            outputs = [outputs]
        if not isinstance(inputs, (list, tuple)):
            inputs = [inputs]

        # Wrap them in In or Out instances if needed.
        inputs = [self.wrap_in(i) for i in inputs]
        outputs = [self.wrap_out(o) for o in outputs]

        _inputs = gof.graph.inputs([o.variable for o in outputs] +
                                   [i.update for i in inputs
                                    if getattr(i, 'update', False)])

        # Check if some input variables are unused
        self._check_unused_inputs(inputs, outputs, on_unused_input)

        # Make a list of (SymbolicInput|SymblicInputKits, indices,
        # [SymbolicInput,...]), one tuple for each input. (See
        # Function.indices for more details)
        indices = [[input] + self.expand_in(input, _inputs)
                   for input in inputs]

        # make the fgraph
        for i in xrange(mode.stability_patience):
            fgraph, additional_outputs, equivalence_tracker = _optcheck_fgraph(
                inputs, outputs, accept_inplace)
            fgraph.equivalence_tracker = equivalence_tracker

            # optimize the fgraph
            compute_test_value_orig = theano.config.compute_test_value
            try:
                theano.config.compute_test_value = \
                    theano.config.compute_test_value_opt
                optimizer(fgraph)

                theano.compile.function_module.insert_deepcopy(
                    fgraph, inputs, list(chain(outputs, additional_outputs)))
            finally:
                theano.config.compute_test_value = compute_test_value_orig

            if i == 0:
                fgraph0 = fgraph
            else:
                li = fgraph.equivalence_tracker.event_list
                l0 = fgraph0.equivalence_tracker.event_list
                if li != l0:
                    infolog = StringIO()
                    print("WARNING: Optimization process is unstable...",
                          file=infolog)
                    print("  (HINT: Ops that the nodes point to must compare "
                          "equal)", file=infolog)
                    print("(event index)  (one event trace)  (other event "
                          "trace)", file=infolog)
                    print("-------------------------------------------------"
                          "----", file=infolog)
                    for j in xrange(max(len(li), len(l0))):
                        if j >= len(li):
                            print('trailing event in optimization 0 :', j,
                                  file=infolog)
                            print('   ', str(l0[j]), file=infolog)
                        elif j >= len(l0):
                            print('trailing event in optimization', i, ':',
                                  j, file=infolog)
                            print('   ', str(li[j]), file=infolog)
                        elif li[j] != l0[j]:
                            print('non-equal optimization events', i, ':',
                                  j, file=infolog)
                            print('   ', str(l0[j]), file=infolog)
                            print('   ', str(li[j]), file=infolog)
                        else:
                            pass
                    raise StochasticOrder(infolog.getvalue())
                else:
                    if self.verbose:
                        print("OPTCHECK: optimization", i,
                              "of", len(li), "events was stable.",
                              file=sys.stderr)
        self.fgraph = fgraph

        linker = _Linker(self)

        # the 'no_borrow' outputs are the ones for which that we can't return
        # the internal storage pointer.

        no_borrow = [
            output
            for output, spec in izip(fgraph.outputs,
                                     outputs + additional_outputs)
            if not spec.borrow]
        if no_borrow:
            self.linker = linker.accept(
                fgraph,
                no_recycling=infer_reuse_pattern(fgraph, no_borrow))
        else:
            self.linker = linker.accept(fgraph)

        self.indices = indices
        self.inputs = inputs
        self.expanded_inputs = inputs
        self.outputs = outputs
        self.unpack_single = unpack_single
        self.return_none = return_none
        self.accept_inplace = accept_inplace
        self.function_builder = function_builder
        self.mode = mode
        self.on_unused_input = on_unused_input  # Used for the pickling/copy
        self.output_keys = output_keys

    def create(self, defaults=None, trustme=False, storage_map=None):
        """
        Create a function.

        Parameters
        ----------
        defaults
            A list matching the inputs list and providing default values if the
            default for an input is None, then that input is a required input.
            For an input with an update, the default acts as initialization.
        trustme
            Disables some exceptions, used internally.

        """
        if defaults is None:
            defaults = [None] * len(self.inputs)
        # List of independent one-element lists, will be passed to the linker.
        input_storage = []
        _defaults = []

        # The following loop is to fill in the input_storage and _defaults
        # lists.
        for (input, indices, subinputs), default in izip(self.indices,
                                                         defaults):
            __default = default

            if isinstance(default, gof.Container):
                # If the default is a gof.Container, this means we want to
                # share the same storage. This is done by appending
                # default.storage to input_storage.
                if indices is not None:
                    raise TypeError("Cannot take a Container instance as "
                                    "default for a SymbolicInputKit.")
                input_storage.append(default.storage)
                default = None
            elif isinstance(input, SymbolicInputKit):
                # If the input is a SymbolicInputKit, it represents more than
                # one storage unit. The indices and subinputs lists represent
                # which of the kit's inputs are active in this graph, so we
                # make as many storage units as needed
                if isinstance(default, (list, tuple)) \
                        and all(isinstance(x, gof.Container) for x in default):
                    if len(default) == len(indices):
                        input_storage += [x.storage for x in default]
                    elif len(default) > len(indices):
                        input_storage += [default[i].storage for i in indices]
                    else:
                        raise ValueError(
                            'Not enough storage for SymbolicInputKit',
                            input, indices, default)
                    default = _NODEFAULT
                else:
                    input_storage += [[None] for i in indices]
            else:
                # Normal case: one new, independent storage unit
                input_storage.append([None])

            # Filling _defaults. Each entry is a tuple of three elements:
            # (required, refeed, value)
            # - required means that the user must provide a value when calling
            #   the function
            # - refeed means that we want to put the default back in the
            #   storage after each function call
            # - value is the value that will be put in the storage initially

            # Even though a SymbolicInputKit represents more than one input,
            # we still only have one entry for the defaults list.
            if isinstance(input, SymbolicInputKit):
                if default is _NODEFAULT:
                    _defaults.append((False, False, None))
                elif default is None:
                    _defaults.append((True, True, None))
                else:
                    _defaults.append((False, False, default))
            elif input.update is not None:
                # If the input has an update, then (logically) it is
                # not required since it is just a parameter and of
                # course we don't want to refeed the default back into
                # the storage as it would defeat the point of updating
                # it. We always do this policy.
                if default is None:
                    if trustme or isinstance(__default, gof.Container):
                        _defaults.append((False, False, None))
                    else:
                        # This might catch some bugs early
                        raise ValueError(
                            "A default (initial) value is required for an "
                            "input which can update itself.", input)
                else:
                    _defaults.append((False, False, default))
            else:
                if default is None:
                    if trustme or isinstance(__default, gof.Container):
                        _defaults.append((False, False, None))
                    else:
                        # No default, so this is a required
                        # input. Nothing to feed back, initial value
                        # is None.
                        _defaults.append((True, False, None))
                else:
                    # Default value. It is not required, but we want
                    # to put it back into the storage everytime so it
                    # behaves like most programming languages' default
                    # values
                    _defaults.append((False, True, default))
        defaults = _defaults

        # Get a function instance
        _fn, _i, _o = self.linker.make_thunk(input_storage=input_storage,
                                             storage_map=storage_map)
        fn = self.function_builder(_fn, _i, _o, self.indices,
                                   self.outputs, defaults, self.unpack_single,
                                   self.return_none, self.output_keys, self)
        return fn


def _pickle_DebugMode_Maker(maker):
    raise NotImplementedError('DebugMode is not picklable (yet)')
copyreg.pickle(_Maker, _pickle_DebugMode_Maker)

########################
#
# API symbol: DebugMode
#
########################


class DebugMode(Mode):
    """
    Evaluation Mode that detects internal theano errors.

    This mode catches several kinds of internal error:

    - Inconsistent outputs when calling the same Op twice with the same
      inputs, for instance if c_code and perform implementations, are
      inconsistent, or in case of incorrect handling of output memory
      (see `BadThunkOutput`).

    - A variable replacing another when their runtime values don't
      match.  This is a symptom of an incorrect optimization step, or
      faulty Op implementation (raises `BadOptimization`).

    - Stochastic optimization ordering (raises `StochasticOrder`).

    - Incomplete `destroy_map` specification (raises `BadDestroyMap`).

    - An op that returns an illegal value not matching the output
      Variable Type (raises InvalidValueError).

    Each of these exceptions inherits from the more generic `DebugModeError`.

    If there are no internal errors, this mode behaves like FAST_RUN
    or FAST_COMPILE, but takes a little longer and uses more memory.

    Raises
    ------
    DebugModeError
        If there are internal errors.

    Notes
    -----
    The work of debugging is implemented by the `_Maker`, `_Linker`,
    and `_VariableEquivalenceTracker` classes.

    """

    stability_patience = config.DebugMode.patience
    """
    When checking for the stability of optimization, recompile the
    graph this many times.

    """

    check_c_code = config.DebugMode.check_c
    """
    Should we evaluate (and check) the `c_code` implementations?

    """

    check_py_code = config.DebugMode.check_py
    """
    Should we evaluate (and check) the `perform` implementations?
    Always checked if no `c_code`.

    """

    check_isfinite = config.DebugMode.check_finite
    """
    Should we check for (and complain about) NaN/Inf ndarray elements?

    """

    require_matching_strides = config.DebugMode.check_strides
    """
    Should we check for (and complain about) Ops whose python and C
    outputs are ndarrays with different strides? (This can catch bugs,
    but is generally overly strict.) 0 no check, 1 warn, 2 err.

    """

    check_preallocated_output = config.DebugMode.check_preallocated_output
    check_preallocated_output = check_preallocated_output.split(':')
    """
    List of strings representing ways to pre-allocate output memory in
    tests.  Valid values are: "previous" (previously-returned memory),
    "c_contiguous", "f_contiguous", "strided" (positive and negative
    strides), "wrong_size" (larger and smaller dimensions), and "ALL"
    (all of the above).

    """

    # This function will be used to create a FunctionMaker in
    # function_module.function
    def function_maker(self, i, o, m, *args, **kwargs):
        """
        Return an instance of `_Maker` which handles much of the debugging work.

        """
        assert m is self
        return _Maker(i, o, self, *args, **kwargs)

    def __init__(self,
                 optimizer='fast_run',
                 stability_patience=None,
                 check_c_code=None,
                 check_py_code=None,
                 check_isfinite=None,
                 check_preallocated_output=None,
                 require_matching_strides=None,
                 linker=_DummyLinker()):
        """
        If any of these arguments (except optimizer) is not None, it overrides
        the class default. The linker argument is not used. It is set there to
        allow Mode.requiring() and some other fct to work with DebugMode too.

        """

        if not isinstance(linker, _DummyLinker):
            raise Exception("DebugMode can only use its own linker! You "
                            "should not provide one.", linker)

        super(DebugMode, self).__init__(optimizer=optimizer,
                                        linker=linker)

        if stability_patience is not None:
            self.stability_patience = stability_patience

        if check_c_code is not None:
            self.check_c_code = check_c_code

        if check_py_code is not None:
            self.check_py_code = check_py_code

        if check_isfinite is not None:
            self.check_isfinite = check_isfinite

        if check_preallocated_output is not None:
            # Copy to avoid sharing the same list across different instances
            self.check_preallocated_output = check_preallocated_output[:]

        if require_matching_strides is not None:
            self.require_matching_strides = require_matching_strides

        if not (self.check_c_code or self.check_py_code):
            raise ValueError('DebugMode has to check at least one of c and py '
                             'code')

    def __str__(self):
        return "DebugMode(linker=%s, optimizer=%s)" % (
            self.provided_linker, self.provided_optimizer)


register_mode('DEBUG_MODE', DebugMode(optimizer='fast_run'))