python source code of sampleset

# Copyright 2018 D-Wave Systems Inc.
#
#    Licensed under the Apache License, Version 2.0 (the "License");
#    you may not use this file except in compliance with the License.
#    You may obtain a copy of the License at
#
#        http://www.apache.org/licenses/LICENSE-2.0
#
#    Unless required by applicable law or agreed to in writing, software
#    distributed under the License is distributed on an "AS IS" BASIS,
#    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#    See the License for the specific language governing permissions and
#    limitations under the License.
#
# =============================================================================
import base64
import copy
import itertools
import json
import numbers
import collections.abc as abc

from collections import namedtuple

import numpy as np

from numpy.lib import recfunctions

from dimod.decorators import vartype_argument
from dimod.exceptions import WriteableError
from dimod.serialization.format import Formatter
from dimod.serialization.utils import (pack_samples as _pack_samples,
                                       unpack_samples,
                                       serialize_ndarray,
                                       deserialize_ndarray,
                                       serialize_ndarrays,
                                       deserialize_ndarrays)
from dimod.utilities import LockableDict
from dimod.variables import Variables, iter_deserialize_variables
from dimod.vartypes import Vartype
from dimod.views.samples import SampleView, SamplesArray

__all__ = 'as_samples', 'concatenate', 'SampleSet'


def as_samples(samples_like, dtype=None, copy=False, order='C'):
    """Convert a samples_like object to a NumPy array and list of labels.

    Args:
        samples_like (samples_like):
            A collection of raw samples. `samples_like` is an extension of
            NumPy's array_like_ structure. See examples below.

        dtype (data-type, optional):
            dtype for the returned samples array. If not provided, it is either
            derived from `samples_like`, if that object has a dtype, or set to
            :class:`numpy.int8`.

        copy (bool, optional, default=False):
            If true, then samples_like is guaranteed to be copied, otherwise
            it is only copied if necessary.

        order ({'K', 'A', 'C', 'F'}, optional, default='C'):
            Specify the memory layout of the array. See :func:`numpy.array`.

    Returns:
        tuple: A 2-tuple containing:

            :obj:`numpy.ndarray`: Samples.

            list: Variable labels

    Examples:
        The following examples convert a variety of samples_like objects:

        NumPy arrays

        >>> import numpy as np
        ...
        >>> dimod.as_samples(np.ones(5, dtype='int8'))
        (array([[1, 1, 1, 1, 1]], dtype=int8), [0, 1, 2, 3, 4])
        >>> dimod.as_samples(np.zeros((5, 2), dtype='int8'))
        (array([[0, 0],
               [0, 0],
               [0, 0],
               [0, 0],
               [0, 0]], dtype=int8), [0, 1])

        Lists

        >>> dimod.as_samples([-1, +1, -1])
        (array([[-1,  1, -1]], dtype=int8), [0, 1, 2])
        >>> dimod.as_samples([[-1], [+1], [-1]])
        (array([[-1],
               [ 1],
               [-1]], dtype=int8), [0])

        Dicts

        >>> dimod.as_samples({'a': 0, 'b': 1, 'c': 0}) # doctest: +SKIP
        (array([[0, 1, 0]], dtype=int8), ['a', 'b', 'c'])
        >>> dimod.as_samples([{'a': -1, 'b': +1}, {'a': 1, 'b': 1}]) # doctest: +SKIP
        (array([[-1,  1],
                [ 1,  1]], dtype=int8), ['a', 'b'])

        A 2-tuple containing an array_like object and a list of labels

        >>> dimod.as_samples(([-1, +1, -1], ['a', 'b', 'c']))
        (array([[-1,  1, -1]], dtype=int8), ['a', 'b', 'c'])
        >>> dimod.as_samples((np.zeros((5, 2), dtype='int8'), ['in', 'out']))
        (array([[0, 0],
               [0, 0],
               [0, 0],
               [0, 0],
               [0, 0]], dtype=int8), ['in', 'out'])

    .. _array_like: https://docs.scipy.org/doc/numpy/user/basics.creation.html

    """
    if isinstance(samples_like, SampleSet):
        # we implicitely support this by handling an iterable of mapping but
        # it is much faster to just do this here.
        labels = list(samples_like.variables)
        if dtype is None:
            return samples_like.record.sample, labels
        else:
            return samples_like.record.sample.astype(dtype), labels

    if isinstance(samples_like, tuple) and len(samples_like) == 2:
        samples_like, labels = samples_like

        if not isinstance(labels, list) and labels is not None:
            labels = list(labels)
    else:
        labels = None

    if isinstance(samples_like, abc.Iterator):
        # if we don't check this case we can get unexpected behaviour where an
        # iterator can be depleted
        raise TypeError('samples_like cannot be an iterator')

    if isinstance(samples_like, abc.Mapping):
        return as_samples(([samples_like], labels), dtype=dtype)

    if (isinstance(samples_like, list) and samples_like and
            isinstance(samples_like[0], numbers.Number)):
        # this is not actually necessary but it speeds up the
        # samples_like = [1, 0, 1,...] case significantly
        return as_samples(([samples_like], labels), dtype=dtype)

    if not isinstance(samples_like, np.ndarray):
        if any(isinstance(sample, abc.Mapping) for sample in samples_like):
            # go through samples-like, turning the dicts into lists
            samples_like, old = list(samples_like), samples_like

            if labels is None:
                first = samples_like[0]
                if isinstance(first, abc.Mapping):
                    labels = list(first)
                else:
                    labels = list(range(len(first)))

            for idx, sample in enumerate(old):
                if isinstance(sample, abc.Mapping):
                    try:
                        samples_like[idx] = [sample[v] for v in labels]
                    except KeyError:
                        raise ValueError("samples_like and labels do not match")

    if dtype is None and not hasattr(samples_like, 'dtype'):
        dtype = np.int8

    # samples-like should now be array-like
    arr = np.array(samples_like, dtype=dtype, copy=copy, order=order)

    if arr.ndim > 2:
        raise ValueError("expected samples_like to be <= 2 dimensions")
    if arr.ndim < 2:
        if arr.size:
            arr = np.atleast_2d(arr)
        elif labels:  # is not None and len > 0
            arr = arr.reshape((0, len(labels)))
        else:
            arr = arr.reshape((0, 0))

    # ok we're basically done, just need to check against the labels
    if labels is None:
        return arr, list(range(arr.shape[1]))
    elif len(labels) != arr.shape[1]:
        raise ValueError("samples_like and labels dimensions do not match")
    else:
        return arr, labels


def concatenate(samplesets, defaults=None):
    """Combine sample sets.

    Args:
        samplesets (iterable[:obj:`.SampleSet`):
            Iterable of sample sets.

        defaults (dict, optional):
            Dictionary mapping data vector names to the corresponding default values.

    Returns:
        :obj:`.SampleSet`: A sample set with the same vartype and variable order as the first
        given in `samplesets`.

    Examples:
        >>> a = dimod.SampleSet.from_samples(([-1, +1], 'ab'), dimod.SPIN, energy=-1)
        >>> b = dimod.SampleSet.from_samples(([-1, +1], 'ba'), dimod.SPIN, energy=-1)
        >>> ab = dimod.concatenate((a, b))
        >>> ab.record.sample
        array([[-1,  1],
               [ 1, -1]], dtype=int8)

    """

    itertup = iter(samplesets)

    try:
        first = next(itertup)
    except StopIteration:
        raise ValueError("samplesets must contain at least one SampleSet")

    vartype = first.vartype
    variables = first.variables

    records = [first.record]
    records.extend(_iter_records(itertup, vartype, variables))

    # dev note: I was able to get ~2x performance boost when trying to
    # implement the same functionality here by hand (I didn't know that
    # this function existed then). However I think it is better to use
    # numpy's function and rely on their testing etc. If however this becomes
    # a performance bottleneck in the future, it might be worth changing.
    record = recfunctions.stack_arrays(records, defaults=defaults,
                                       asrecarray=True, usemask=False)

    return SampleSet(record, variables, {}, vartype)


def _iter_records(samplesets, vartype, variables):
    # coerce each record into the correct vartype and variable-order
    for samples in samplesets:

        # coerce vartype
        if samples.vartype is not vartype:
            samples = samples.change_vartype(vartype, inplace=False)

        if samples.variables != variables:
            new_record = samples.record.copy()
            order = [samples.variables.index[v] for v in variables]
            new_record.sample = samples.record.sample[:, order]
            yield new_record
        else:
            # order matches so we're done
            yield samples.record


def infer_vartype(samples_like):
    """Infer the vartype of the given samples-like.

    Args:
        A collection of samples. 'samples_like' is an extension of NumPy's
        array_like_. See :func:`.as_samples`.

    Returns:
        The :class:`.Vartype`, or None in the case that it is ambiguous.

    """
    if isinstance(samples_like, SampleSet):
        return samples_like.vartype

    samples, _ = as_samples(samples_like)

    ones_mask = (samples == 1)

    if ones_mask.all():
        # either empty or all 1s, in either case ambiguous
        return None

    if (ones_mask ^ (samples == 0)).all():
        return Vartype.BINARY

    if (ones_mask ^ (samples == -1)).all():
        return Vartype.SPIN

    raise ValueError("given samples_like is of an unknown vartype")


class SampleSet(abc.Iterable, abc.Sized):
    """Samples and any other data returned by dimod samplers.

    Args:
        record (:obj:`numpy.recarray`)
            A NumPy record array. Must have 'sample', 'energy' and 'num_occurrences' as fields.
            The 'sample' field should be a 2D NumPy array where each row is a sample and each
            column represents the value of a variable.

        variables (iterable):
            An iterable of variable labels, corresponding to columns in `record.samples`.

        info (dict):
            Information about the :class:`SampleSet` as a whole, formatted as a dict.

        vartype (:class:`.Vartype`/str/set):
            Variable type for the :class:`SampleSet`. Accepted input values:

            * :class:`.Vartype.SPIN`, ``'SPIN'``, ``{-1, 1}``
            * :class:`.Vartype.BINARY`, ``'BINARY'``, ``{0, 1}``

    Examples:
        This example creates a SampleSet out of a samples_like object (a NumPy array).

        >>> import numpy as np
        ...
        >>> sampleset =  dimod.SampleSet.from_samples(np.ones(5, dtype='int8'),
        ...                                           'BINARY', 0)
        >>> sampleset.variables
        Variables([0, 1, 2, 3, 4])

    """

    _REQUIRED_FIELDS = ['sample', 'energy', 'num_occurrences']

    ###############################################################################################
    # Construction
    ###############################################################################################

    @vartype_argument('vartype')
    def __init__(self, record, variables, info, vartype):

        # make sure that record is a numpy recarray and that it has the expected fields
        if not isinstance(record, np.recarray):
            raise TypeError("input record must be a numpy recarray")
        elif not set(self._REQUIRED_FIELDS).issubset(record.dtype.fields):
            raise ValueError("input record must have {}, {} and {} as fields".format(*self._REQUIRED_FIELDS))
        self._record = record

        num_samples, num_variables = record.sample.shape

        self._variables = variables = Variables(variables)
        if len(variables) != num_variables:
            msg = ("mismatch between number of variables in record.sample ({}) "
                   "and labels ({})").format(num_variables, len(variables))
            raise ValueError(msg)

        self._info = LockableDict(info)

        # vartype is checked by vartype_argument decorator
        self._vartype = vartype

    @classmethod
    def from_samples(cls, samples_like, vartype, energy, info=None,
                     num_occurrences=None, aggregate_samples=False,
                     sort_labels=True, **vectors):
        """Build a :class:`SampleSet` from raw samples.

        Args:
            samples_like:
                A collection of raw samples. 'samples_like' is an extension of NumPy's array_like_.
                See :func:`.as_samples`.

            vartype (:class:`.Vartype`/str/set):
                Variable type for the :class:`SampleSet`. Accepted input values:

                * :class:`.Vartype.SPIN`, ``'SPIN'``, ``{-1, 1}``
                * :class:`.Vartype.BINARY`, ``'BINARY'``, ``{0, 1}``

            energy (array_like):
                Vector of energies.

            info (dict, optional):
                Information about the :class:`SampleSet` as a whole formatted as a dict.

            num_occurrences (array_like, optional):
                Number of occurrences for each sample. If not provided, defaults to a vector of 1s.

            aggregate_samples (bool, optional, default=False):
                If True, all samples in returned :obj:`.SampleSet` are unique,
                with `num_occurrences` accounting for any duplicate samples in
                `samples_like`.

            sort_labels (bool, optional, default=True):
                Return :attr:`.SampleSet.variables` in sorted order. For mixed
                (unsortable) types, the given order is maintained.

            **vectors (array_like):
                Other per-sample data.

        Returns:
            :obj:`.SampleSet`

        Examples:
            This example creates a SampleSet out of a samples_like object (a dict).

            >>> import numpy as np
            ...
            >>> sampleset = dimod.SampleSet.from_samples(
            ...   dimod.as_samples({'a': 0, 'b': 1, 'c': 0}), 'BINARY', 0)
            >>> sampleset.variables
            Variables(['a', 'b', 'c'])

        .. _array_like:  https://docs.scipy.org/doc/numpy/user/basics.creation.html#converting-python-array-like-objects-to-numpy-arrays
        """
        if aggregate_samples:
            return cls.from_samples(samples_like, vartype, energy,
                                    info=info, num_occurrences=num_occurrences,
                                    aggregate_samples=False,
                                    **vectors).aggregate()

        # get the samples, variable labels
        samples, variables = as_samples(samples_like)

        if sort_labels and variables:  # need something to sort
            try:
                reindex, new_variables = zip(*sorted(enumerate(variables),
                                                     key=lambda tup: tup[1]))
            except TypeError:
                # unlike types are not sortable in python3, so we do nothing
                pass
            else:
                if new_variables != variables:
                    # avoid the copy if possible
                    samples = samples[:, reindex]
                    variables = new_variables

        num_samples, num_variables = samples.shape

        energy = np.asarray(energy)

        # num_occurrences
        if num_occurrences is None:
            num_occurrences = np.ones(num_samples, dtype=int)
        else:
            num_occurrences = np.asarray(num_occurrences)

        # now construct the record
        datatypes = [('sample', samples.dtype, (num_variables,)),
                     ('energy', energy.dtype),
                     ('num_occurrences', num_occurrences.dtype)]
        for key, vector in vectors.items():
            vectors[key] = vector = np.asarray(vector)
            datatypes.append((key, vector.dtype, vector.shape[1:]))

        record = np.rec.array(np.zeros(num_samples, dtype=datatypes))
        record['sample'] = samples
        record['energy'] = energy
        record['num_occurrences'] = num_occurrences
        for key, vector in vectors.items():
            record[key] = vector

        if info is None:
            info = {}

        return cls(record, variables, info, vartype)

    @classmethod
    def from_samples_bqm(cls, samples_like, bqm, **kwargs):
        """Build a sample set from raw samples and a binary quadratic model.

        The binary quadratic model is used to calculate energies and set the
        :class:`vartype`.

        Args:
            samples_like:
                A collection of raw samples. 'samples_like' is an extension of NumPy's array_like.
                See :func:`.as_samples`.

            bqm (:obj:`.BinaryQuadraticModel`):
                A binary quadratic model.

            info (dict, optional):
                Information about the :class:`SampleSet` as a whole formatted as a dict.

            num_occurrences (array_like, optional):
                Number of occurrences for each sample. If not provided, defaults to a vector of 1s.

            aggregate_samples (bool, optional, default=False):
                If True, all samples in returned :obj:`.SampleSet` are unique,
                with `num_occurrences` accounting for any duplicate samples in
                `samples_like`.

            sort_labels (bool, optional, default=True):
                Return :attr:`.SampleSet.variables` in sorted order. For mixed
                (unsortable) types, the given order is maintained.

            **vectors (array_like):
                Other per-sample data.

        Returns:
            :obj:`.SampleSet`

        Examples:

            >>> bqm = dimod.BinaryQuadraticModel.from_ising({}, {('a', 'b'): -1})
            >>> sampleset = dimod.SampleSet.from_samples_bqm({'a': -1, 'b': 1}, bqm)

        """
        # more performant to do this once, here rather than again in bqm.energies
        # and in cls.from_samples
        samples_like = as_samples(samples_like)

        energies = bqm.energies(samples_like)

        return cls.from_samples(samples_like, energy=energies, vartype=bqm.vartype, **kwargs)

    @classmethod
    def from_future(cls, future, result_hook=None):
        """Construct a :class:`SampleSet` referencing the result of a future computation.

        Args:
            future (object):
                Object that contains or will contain the information needed to construct a
                :class:`SampleSet`. If `future` has a :meth:`~concurrent.futures.Future.done` method,
                this determines the value returned by :meth:`.SampleSet.done`.

            result_hook (callable, optional):
                A function that is called to resolve the future. Must accept the future and return
                a :obj:`.SampleSet`. If not provided, set to

                .. code-block:: python

                    def result_hook(future):
                        return future.result()

        Returns:
            :obj:`.SampleSet`

        Notes:
            The future is resolved on the first read of any of the :class:`SampleSet` properties.

        Examples:
            Run a dimod sampler on a single thread and load the returned future into :class:`SampleSet`.

            >>> from concurrent.futures import ThreadPoolExecutor
            ...
            >>> bqm = dimod.BinaryQuadraticModel.from_ising({}, {('a', 'b'): -1})
            >>> with ThreadPoolExecutor(max_workers=1) as executor:
            ...     future = executor.submit(dimod.ExactSolver().sample, bqm)
            ...     sampleset = dimod.SampleSet.from_future(future)
            >>> sampleset.first.energy    # doctest: +SKIP

        """
        obj = cls.__new__(cls)
        obj._future = future

        if result_hook is None:
            def result_hook(future):
                return future.result()
        elif not callable(result_hook):
            raise TypeError("expected result_hook to be callable")

        obj._result_hook = result_hook
        return obj

    ###############################################################################################
    # Special Methods
    ###############################################################################################

    def __len__(self):
        """The number of rows in record."""
        return self.record.__len__()

    def __iter__(self):
        """Iterate over the samples, low energy to high."""
        # need to make it an iterator rather than just an iterable
        return iter(self.samples(sorted_by='energy'))

    def __eq__(self, other):
        """SampleSet equality."""

        if not isinstance(other, SampleSet):
            return False

        if self.vartype != other.vartype or self.info != other.info:
            return False

        # check that all the fields match in record, order doesn't matter
        if self.record.dtype.fields.keys() != other.record.dtype.fields.keys():
            return False
        for field in self.record.dtype.fields:
            if field == 'sample':
                continue
            if not (self.record[field] == other.record[field]).all():
                return False

        # now check the actual samples.
        if self.variables == other.variables:
            return (self.record.sample == other.record.sample).all()

        try:
            other_idx = [other.variables.index(v) for v in self.variables]
        except ValueError:
            # mismatched variables
            return False

        return (self.record.sample == other.record.sample[:, other_idx]).all()

    def __getstate__(self):
        # Ensure that any futures are resolved before pickling.
        self.resolve()
        # we'd prefer to do super().__getstate__ but unfortunately that's not
        # present, so instead we recreate the (documented) behaviour
        return self.__dict__

    def __repr__(self):
        return "{}({!r}, {}, {}, {!r})".format(self.__class__.__name__,
                                               self.record,
                                               self.variables,
                                               self.info,
                                               self.vartype.name)

    def __str__(self):
        return Formatter().format(self)

    ###############################################################################################
    # Properties
    ###############################################################################################

    @property
    def data_vectors(self):
        """The per-sample data in a vector.

        Returns:
            dict: A dict where the keys are the fields in the record and the
            values are the corresponding arrays.

        Examples:
            >>> sampleset = dimod.SampleSet.from_samples([[-1, 1], [1, 1]], dimod.SPIN,
                                                         energy=[-1, 1])
            >>> sampleset.data_vectors['energy']
            array([-1,  1])

            Note that this is equivalent to, and less performant than:

            >>> sampleset = dimod.SampleSet.from_samples([[-1, 1], [1, 1]], dimod.SPIN,
                                                         energy=[-1, 1])
            >>> sampleset.record['energy']
            array([-1,  1])


        """
        return {field: self.record[field] for field in self.record.dtype.names
                if field != 'sample'}

    @property
    def first(self):
        """Sample with the lowest-energy.

        Raises:
            ValueError: If empty.

        Example:

            >>> sampleset = dimod.ExactSolver().sample_ising({'a': 1}, {('a', 'b'): 1})
            >>> sampleset.first
            Sample(sample={'a': -1, 'b': 1}, energy=-2.0, num_occurrences=1)

        """
        try:
            return next(self.data(sorted_by='energy', name='Sample'))
        except StopIteration:
            raise ValueError('{} is empty'.format(self.__class__.__name__))

    @property
    def info(self):
        """Dict of information about the :class:`SampleSet` as a whole.

        Examples:
           This example shows the type of information that might be returned by
           a dimod sampler by submitting a BQM that sets a value on a D-Wave
           system's first listed coupler.

           >>> from dwave.system import DWaveSampler    # doctest: +SKIP
           >>> sampler = DWaveSampler()    # doctest: +SKIP
           >>> bqm = dimod.BQM({}, {sampler.edgelist[0]: -1}, 0, dimod.SPIN)   # doctest: +SKIP
           >>> sampler.sample(bqm).info   # doctest: +SKIP
           {'timing': {'qpu_sampling_time': 315,
            'qpu_anneal_time_per_sample': 20,
            'qpu_readout_time_per_sample': 274,
            # Snipped above response for brevity
        """
        self.resolve()
        return self._info

    @property
    def record(self):
        """:obj:`numpy.recarray` containing the samples, energies, number of occurences, and other sample data.

        Examples:
            >>> sampler = dimod.ExactSolver()
            >>> sampleset = sampler.sample_ising({'a': -0.5, 'b': 1.0}, {('a', 'b'): -1.0})
            >>> sampleset.record.sample     # doctest: +SKIP
            array([[-1, -1],
                   [ 1, -1],
                   [ 1,  1],
                   [-1,  1]], dtype=int8)
            >>> len(sampleset.record.energy)
            4

        """
        self.resolve()
        return self._record

    @property
    def variables(self):
        """:class:`.VariableIndexView` of variable labels.

        Corresponds to columns of the sample field of :attr:`.SampleSet.record`.

        """
        self.resolve()
        return self._variables

    @property
    def vartype(self):
        """:class:`.Vartype` of the samples."""
        self.resolve()
        return self._vartype

    @property
    def is_writeable(self):
        return getattr(self, '_writeable', True)

    @is_writeable.setter
    def is_writeable(self, b):
        b = bool(b)  # cast

        self._writeable = b

        self.record.flags.writeable = b
        self.variables.is_writeable = b
        self.info.is_writeable = b

    ###############################################################################################
    # Views
    ###############################################################################################

    def done(self):
        """Return True if a pending computation is done.

        Used when a :class:`SampleSet` is constructed with :meth:`SampleSet.from_future`.

        Examples:
            This example uses a :class:`~concurrent.futures.Future` object directly. Typically
            a :class:`~concurrent.futures.Executor` sets the result of the future
            (see documentation for :mod:`concurrent.futures`).

            >>> from concurrent.futures import Future
            ...
            >>> future = Future()
            >>> sampleset = dimod.SampleSet.from_future(future)
            >>> future.done()
            False
            >>> future.set_result(dimod.ExactSolver().sample_ising({0: -1}, {}))
            >>> future.done()
            True
            >>> sampleset.first.energy
            -1.0

        """
        return (not hasattr(self, '_future')) or (not hasattr(self._future, 'done')) or self._future.done()

    def samples(self, n=None, sorted_by='energy'):
        """Return an iterable over the samples.

        Args:
            n (int, optional, default=None):
                Maximum number of samples to return in the view.

            sorted_by (str/None, optional, default='energy'):
                Selects the record field used to sort the samples. If None,
                samples are returned in record order.

        Returns:
            :obj:`.SamplesArray`: A view object mapping variable labels to
            values.

        Examples:

            >>> sampleset = dimod.ExactSolver().sample_ising({'a': 0.1, 'b': 0.0},
            ...                                              {('a', 'b'): 1})
            >>> for sample in sampleset.samples():   # doctest: +SKIP
            ...     print(sample)
            {'a': -1, 'b': 1}
            {'a': 1, 'b': -1}
            {'a': -1, 'b': -1}
            {'a': 1, 'b': 1}

            >>> sampleset = dimod.ExactSolver().sample_ising({'a': 0.1, 'b': 0.0},
            ...                                              {('a', 'b'): 1})
            >>> samples = sampleset.samples()
            >>> samples[0]
            {'a': -1, 'b': 1}
            >>> samples[0, 'a']
            -1
            >>> samples[0, ['b', 'a']]
            array([ 1, -1], dtype=int8)
            >>> samples[1:, ['a', 'b']]
            array([[ 1, -1],
                   [-1, -1],
                   [ 1,  1]], dtype=int8)

        """
        if n is not None:
            return self.samples(sorted_by=sorted_by)[:n]

        if sorted_by is None:
            samples = self.record.sample
        else:
            order = np.argsort(self.record[sorted_by])
            samples = self.record.sample[order]

        return SamplesArray(samples, self.variables)

    def data(self, fields=None, sorted_by='energy', name='Sample', reverse=False,
             sample_dict_cast=True, index=False):
        """Iterate over the data in the :class:`SampleSet`.

        Args:
            fields (list, optional, default=None):
                If specified, only these fields are included in the yielded tuples.
                The special field name 'sample' can be used to view the samples.

            sorted_by (str/None, optional, default='energy'):
                Selects the record field used to sort the samples. If None, the samples are yielded
                in record order.

            name (str/None, optional, default='Sample'):
                Name of the yielded namedtuples or None to yield regular tuples.

            reverse (bool, optional, default=False):
                If True, yield in reverse order.

            sample_dict_cast (bool, optional, default=True):
                Samples are returned as dicts rather than
                :class:`.SampleView`, which requires heavy memory
                usage. Set to False to reduce load on memory.

            index (bool, optional, default=False):
                If True, `datum.idx` gives the corresponding index of the
                :attr:`.SampleSet.record`.

        Yields:
            namedtuple/tuple: The data in the :class:`SampleSet`, in the order specified by the input
            `fields`.

        Examples:

            >>> sampleset = dimod.ExactSolver().sample_ising({'a': -0.5, 'b': 1.0}, {('a', 'b'): -1})
            >>> for datum in sampleset.data(fields=['sample', 'energy']):   # doctest: +SKIP
            ...     print(datum)
            Sample(sample={'a': -1, 'b': -1}, energy=-1.5)
            Sample(sample={'a': 1, 'b': -1}, energy=-0.5)
            Sample(sample={'a': 1, 'b': 1}, energy=-0.5)
            Sample(sample={'a': -1, 'b': 1}, energy=2.5)
            >>> for energy, in sampleset.data(fields=['energy'], sorted_by='energy'):
            ...     print(energy)
            ...
            -1.5
            -0.5
            -0.5
            2.5
            >>> print(next(sampleset.data(fields=['energy'], name='ExactSolverSample')))
            ExactSolverSample(energy=-1.5)

        """
        record = self.record

        if fields is None:
            # make sure that sample, energy is first
            fields = self._REQUIRED_FIELDS + [field for field in record.dtype.fields
                                              if field not in self._REQUIRED_FIELDS]
            if index:
                fields.append('idx')

        if sorted_by is None:
            order = np.arange(len(self))
        elif index:
            # we want a stable sort but it can be slower
            order = np.argsort(record[sorted_by], kind='stable')
        else:
            order = np.argsort(record[sorted_by])

        if reverse:
            order = np.flip(order)

        if name is None:
            # yielding a tuple
            def _pack(values):
                return tuple(values)
        else:
            # yielding a named tuple
            SampleTuple = namedtuple(name, fields)

            def _pack(values):
                return SampleTuple(*values)

        def _values(idx):
            for field in fields:
                if field == 'sample':
                    sample = SampleView(record.sample[idx, :], self.variables)
                    if sample_dict_cast:
                        sample = dict(sample)
                    yield sample
                elif field == 'idx':
                    yield idx
                else:
                    yield record[field][idx]

        for idx in order:
            yield _pack(_values(idx))

    ###############################################################################################
    # Methods
    ###############################################################################################

    def copy(self):
        """Create a shallow copy."""
        return self.__class__(self.record.copy(),
                              self.variables,  # a new one is made in all cases
                              self.info.copy(),
                              self.vartype)

    @vartype_argument('vartype')
    def change_vartype(self, vartype, energy_offset=0.0, inplace=True):
        """Return the :class:`SampleSet` with the given vartype.

        Args:
            vartype (:class:`.Vartype`/str/set):
                Variable type to use for the new :class:`SampleSet`. Accepted input values:

                * :class:`.Vartype.SPIN`, ``'SPIN'``, ``{-1, 1}``
                * :class:`.Vartype.BINARY`, ``'BINARY'``, ``{0, 1}``

            energy_offset (number, optional, defaul=0.0):
                Constant value applied to the 'energy' field of :attr:`SampleSet.record`.

            inplace (bool, optional, default=True):
                If True, the instantiated :class:`SampleSet` is updated; otherwise, a new
                :class:`SampleSet` is returned.

        Returns:
            :obj:`.SampleSet`: SampleSet with changed vartype. If `inplace` is True, returns itself.

        Notes:
            This function is non-blocking unless `inplace==True`, in which case
            the sample set is resolved.

        Examples:
            This example creates a binary copy of a spin-valued :class:`SampleSet`.

            >>> sampleset = dimod.ExactSolver().sample_ising({'a': -0.5, 'b': 1.0}, {('a', 'b'): -1})
            >>> sampleset_binary = sampleset.change_vartype(dimod.BINARY, energy_offset=1.0, inplace=False)
            >>> sampleset_binary.vartype is dimod.BINARY
            True
            >>> sampleset_binary.first.sample
            {'a': 0, 'b': 0}

        """
        if not inplace:
            return self.copy().change_vartype(vartype, energy_offset, inplace=True)

        if not self.done():
            def hook(sampleset):
                sampleset.resolve()
                return sampleset.change_vartype(vartype, energy_offset)
            return self.from_future(self, hook)

        if not self.is_writeable:
            raise WriteableError("SampleSet is not writeable")

        if energy_offset:
            self.record.energy = self.record.energy + energy_offset

        if vartype is self.vartype:
            return self  # we're done!

        if vartype is Vartype.SPIN and self.vartype is Vartype.BINARY:
            self.record.sample = 2 * self.record.sample - 1
            self._vartype = vartype
        elif vartype is Vartype.BINARY and self.vartype is Vartype.SPIN:
            self.record.sample = (self.record.sample + 1) // 2
            self._vartype = vartype
        else:
            raise ValueError("Cannot convert from {} to {}".format(self.vartype, vartype))

        return self

    def relabel_variables(self, mapping, inplace=True):
        """Relabel the variables of a :class:`SampleSet` according to the specified mapping.

        Args:
            mapping (dict):
                Mapping from current variable labels to new, as a dict. If incomplete mapping is
                specified, unmapped variables keep their current labels.

            inplace (bool, optional, default=True):
                If True, the current :class:`SampleSet` is updated; otherwise, a new
                :class:`SampleSet` is returned.

        Returns:
            :class:`.SampleSet`: SampleSet with relabeled variables. If `inplace` is True, returns
            itself.

        Examples:
            This example creates a relabeled copy of a :class:`SampleSet`.

            >>> sampleset = dimod.ExactSolver().sample_ising({'a': -0.5, 'b': 1.0}, {('a', 'b'): -1})
            >>> new_sampleset = sampleset.relabel_variables({'a': 0, 'b': 1}, inplace=False)
            >>> new_sampleset.variables
            Variables([0, 1])

        """
        if not inplace:
            return self.copy().relabel_variables(mapping, inplace=True)

        self.variables.relabel(mapping)
        return self

    def resolve(self):
        """Ensure that the sampleset is resolved if constructed from a future.
        """
        # if it doesn't have the attribute then it is already resolved
        if hasattr(self, '_future'):
            samples = self._result_hook(self._future)
            self.__init__(samples.record, samples.variables, samples.info, samples.vartype)
            del self._future
            del self._result_hook

    def aggregate(self):
        """Create a new SampleSet with repeated samples aggregated.

        Returns:
            :obj:`.SampleSet`

        Note:
            :attr:`.SampleSet.record.num_occurrences` are accumulated but no
            other fields are.

        Examples:
            This examples aggregates a sample set with two identical samples
            out of three.

            >>> sampleset = dimod.SampleSet.from_samples([[0, 0, 1], [0, 0, 1],
            ...                                           [1, 1, 1]],
            ...                                           dimod.BINARY,
            ...                                           [0, 0, 1])
            >>> print(sampleset)
               0  1  2 energy num_oc.
            0  0  0  1      0       1
            1  0  0  1      0       1
            2  1  1  1      1       1
            ['BINARY', 3 rows, 3 samples, 3 variables]
            >>> print(sampleset.aggregate())
               0  1  2 energy num_oc.
            0  0  0  1      0       2
            1  1  1  1      1       1
            ['BINARY', 2 rows, 3 samples, 3 variables]
        """
        _, indices, inverse = np.unique(self.record.sample, axis=0,
                                        return_index=True, return_inverse=True)

        # unique also sorts the array which we don't want, so we undo the sort
        order = np.argsort(indices)
        indices = indices[order]

        # and on the inverse
        revorder = np.empty(len(order), dtype=order.dtype)
        revorder[order] = np.arange(len(order))
        inverse = revorder[inverse]

        record = self.record[indices]

        # fix the number of occurrences
        record.num_occurrences = 0
        for old_idx, new_idx in enumerate(inverse):
            record[new_idx].num_occurrences += self.record[old_idx].num_occurrences

        # dev note: we don't check the energies as they should be the same
        # for individual samples

        return type(self)(record, self.variables, copy.deepcopy(self.info),
                          self.vartype)

    def append_variables(self, samples_like, sort_labels=True):
        """Create a new sampleset with the given variables and values.

        Not defined for empty sample sets. If `sample_like` is
        a :obj:`.SampleSet`, its data vectors and info are ignored.

        Args:
            samples_like:
                Samples to add to the sample set. Either a single
                sample or identical in length to the sample set.
                'samples_like' is an extension of NumPy's array_like_.
                See :func:`.as_samples`.

            sort_labels (bool, optional, default=True):
                Return :attr:`.SampleSet.variables` in sorted order. For mixed
                (unsortable) types, the given order is maintained.

        Returns:
            :obj:`.SampleSet`: New sample set with the variables/values added.

        Examples:

            >>> sampleset = dimod.SampleSet.from_samples([{'a': -1, 'b': +1},
            ...                                           {'a': +1, 'b': +1}],
            ...                                          dimod.SPIN,
            ...                                          energy=[-1.0, 1.0])
            >>> new = sampleset.append_variables({'c': -1})
            >>> print(new)
               a  b  c energy num_oc.
            0 -1 +1 -1   -1.0       1
            1 +1 +1 -1    1.0       1
            ['SPIN', 2 rows, 2 samples, 3 variables]

            Add variables from another sample set to the previous example. Note
            that the energies remain unchanged.

            >>> another = dimod.SampleSet.from_samples([{'c': -1, 'd': +1},
            ...                                         {'c': +1, 'd': +1}],
            ...                                        dimod.SPIN,
            ...                                        energy=[-2.0, 1.0])
            >>> new = sampleset.append_variables(another)
            >>> print(new)
               a  b  c  d energy num_oc.
            0 -1 +1 -1 +1   -1.0       1
            1 +1 +1 +1 +1    1.0       1
            ['SPIN', 2 rows, 2 samples, 4 variables]

        .. _array_like: https://docs.scipy.org/doc/numpy/user/basics.creation.html

        """
        samples, labels = as_samples(samples_like)

        num_samples = len(self)

        # we don't handle multiple values
        if samples.shape[0] == num_samples:
            # we don't need to do anything, it's already the correct shape
            pass
        elif samples.shape[0] == 1 and num_samples:
            samples = np.repeat(samples, num_samples, axis=0)
        else:
            msg = ("mismatched shape. The samples to append should either be "
                   "a single sample or should match the length of the sample "
                   "set. Empty sample sets cannot be appended to.")
            raise ValueError(msg)

        # append requires the new variables to be unique
        variables = self.variables
        if any(v in variables for v in labels):
            msg = "Appended samples cannot contain variables in sample set"
            raise ValueError(msg)

        new_variables = list(variables) + labels
        new_samples = np.hstack((self.record.sample, samples))

        return type(self).from_samples((new_samples, new_variables),
                                       self.vartype,
                                       info=copy.deepcopy(self.info),  # make a copy
                                       sort_labels=sort_labels,
                                       **self.data_vectors)

    def lowest(self, rtol=1.e-5, atol=1.e-8):
        """Return a sample set containing the lowest-energy samples.

        A sample is included if its energy is within tolerance of the lowest
        energy in the sample set. The following equation is used to determine
        if two values are equivalent:

        absolute(`a` - `b`) <= (`atol` + `rtol` * absolute(`b`))

        See :func:`numpy.isclose` for additional details and caveats.

        Args:
            rtol (float, optional, default=1.e-5):
                The relative tolerance (see above).

            atol (float, optional, default=1.e-8):
                The absolute tolerance (see above).

        Returns:
            :obj:`.SampleSet`: A new sample set containing the lowest energy
            samples as delimited by configured tolerances from the lowest energy
            sample in the current sample set.

        Examples:
            >>> sampleset = dimod.ExactSolver().sample_ising({'a': .001},
            ...                                              {('a', 'b'): -1})
            >>> print(sampleset.lowest())
               a  b energy num_oc.
            0 -1 -1 -1.001       1
            ['SPIN', 1 rows, 1 samples, 2 variables]
            >>> print(sampleset.lowest(atol=.1))
               a  b energy num_oc.
            0 -1 -1 -1.001       1
            1 +1 +1 -0.999       1
            ['SPIN', 2 rows, 2 samples, 2 variables]

        Note:
            "Lowest energy" is the lowest energy in the sample set. This is not
            always the "ground energy" which is the lowest energy possible
            for a binary quadratic model.

        """

        if len(self) == 0:
            # empty so all are lowest
            return self.copy()

        record = self.record

        # want all the rows within tolerance of the minimal energy
        close = np.isclose(record.energy,
                           np.min(record.energy),
                           rtol=rtol, atol=atol)
        record = record[close]

        return type(self)(record, self.variables, copy.deepcopy(self.info),
                          self.vartype)

    def truncate(self, n, sorted_by='energy'):
        """Create a new sample set with up to n rows.

        Args:
            n (int):
                Maximum number of rows in the returned sample set. Does not return
                any rows above this limit in the original sample set.

            sorted_by (str/None, optional, default='energy'):
                Selects the record field used to sort the samples before
                truncating. Note that this sort order is maintained in the
                returned sample set.

        Returns:
            :obj:`.SampleSet`

        Examples:

            >>> import numpy as np
            ...
            >>> sampleset = dimod.SampleSet.from_samples(np.ones((5, 5)), dimod.SPIN, energy=5)
            >>> print(sampleset)
               0  1  2  3  4 energy num_oc.
            0 +1 +1 +1 +1 +1      5       1
            1 +1 +1 +1 +1 +1      5       1
            2 +1 +1 +1 +1 +1      5       1
            3 +1 +1 +1 +1 +1      5       1
            4 +1 +1 +1 +1 +1      5       1
            ['SPIN', 5 rows, 5 samples, 5 variables]
            >>> print(sampleset.truncate(2))
               0  1  2  3  4 energy num_oc.
            0 +1 +1 +1 +1 +1      5       1
            1 +1 +1 +1 +1 +1      5       1
            ['SPIN', 2 rows, 2 samples, 5 variables]

        See:
            :meth:`SampleSet.slice`

        """
        return self.slice(n, sorted_by=sorted_by)

    def slice(self, *slice_args, **kwargs):
        """Create a new sample set with rows sliced according to standard Python
        slicing syntax.

        Args:
            start (int, optional, default=None):
                Start index for `slice`.

            stop (int):
                Stop index for `slice`.

            step (int, optional, default=None):
                Step value for `slice`.

            sorted_by (str/None, optional, default='energy'):
                Selects the record field used to sort the samples before
                slicing. Note that `sorted_by` determines the sample order in
                the returned sample set.

        Returns:
            :obj:`.SampleSet`

        Examples:

            >>> import numpy as np
            ...
            >>> sampleset = dimod.SampleSet.from_samples(np.diag(range(1, 11)),
            ...                   dimod.BINARY, energy=range(10))
            >>> print(sampleset)
               0  1  2  3  4  5  6  7  8  9 energy num_oc.
            0  1  0  0  0  0  0  0  0  0  0      0       1
            1  0  1  0  0  0  0  0  0  0  0      1       1
            2  0  0  1  0  0  0  0  0  0  0      2       1
            3  0  0  0  1  0  0  0  0  0  0      3       1
            4  0  0  0  0  1  0  0  0  0  0      4       1
            5  0  0  0  0  0  1  0  0  0  0      5       1
            6  0  0  0  0  0  0  1  0  0  0      6       1
            7  0  0  0  0  0  0  0  1  0  0      7       1
            8  0  0  0  0  0  0  0  0  1  0      8       1
            9  0  0  0  0  0  0  0  0  0  1      9       1
            ['BINARY', 10 rows, 10 samples, 10 variables]

            The above example's first 3 samples by energy == truncate(3):

            >>> print(sampleset.slice(3))
               0  1  2  3  4  5  6  7  8  9 energy num_oc.
            0  1  0  0  0  0  0  0  0  0  0      0       1
            1  0  1  0  0  0  0  0  0  0  0      1       1
            2  0  0  1  0  0  0  0  0  0  0      2       1
            ['BINARY', 3 rows, 3 samples, 10 variables]

            The last 3 samples by energy:

            >>> print(sampleset.slice(-3, None))
               0  1  2  3  4  5  6  7  8  9 energy num_oc.
            0  0  0  0  0  0  0  0  1  0  0      7       1
            1  0  0  0  0  0  0  0  0  1  0      8       1
            2  0  0  0  0  0  0  0  0  0  1      9       1
            ['BINARY', 3 rows, 3 samples, 10 variables]

            Every second sample in between, skipping top and bottom 3:

            >>> print(sampleset.slice(3, -3, 2))
               0  1  2  3  4  5  6  7  8  9 energy num_oc.
            0  0  0  0  1  0  0  0  0  0  0      3       1
            1  0  0  0  0  0  1  0  0  0  0      5       1
            ['BINARY', 2 rows, 2 samples, 10 variables]

        """
        # handle `sorted_by` kwarg with a default value in a python2-compatible way
        sorted_by = kwargs.pop('sorted_by', 'energy')
        if kwargs:
            # be strict about allowed kwargs: throw the same error as python3 would
            raise TypeError('slice got an unexpected '
                            'keyword argument {!r}'.format(kwargs.popitem()[0]))

        # follow Python's slice syntax
        if slice_args:
            selector = slice(*slice_args)
        else:
            selector = slice(None)

        if sorted_by is None:
            record = self.record[selector]
        else:
            sort_indices = np.argsort(self.record[sorted_by])
            record = self.record[sort_indices[selector]]

        return type(self)(record, self.variables, copy.deepcopy(self.info),
                          self.vartype)

    ###############################################################################################
    # Serialization
    ###############################################################################################

    def to_serializable(self, use_bytes=False, bytes_type=bytes,
                        pack_samples=True):
        """Convert a :class:`SampleSet` to a serializable object.

        Note that the contents of the :attr:`.SampleSet.info` field are assumed
        to be serializable.

        Args:
            use_bytes (bool, optional, default=False):
                If True, a compact representation of the biases as bytes is used.

            bytes_type (class, optional, default=bytes):
                If `use_bytes` is True, this class is used to wrap the bytes
                objects in the serialization. Useful for Python 2 using BSON
                encoding, which does not accept the raw `bytes` type;
                `bson.Binary` can be used instead.

            pack_samples (bool, optional, default=True):
                Pack the samples using 1 bit per sample.

        Returns:
            dict: Object that can be serialized.

        Examples:
            This example encodes using JSON.

            >>> import json
            ...
            >>> samples = dimod.SampleSet.from_samples([-1, 1, -1], dimod.SPIN, energy=-.5)
            >>> s = json.dumps(samples.to_serializable())

        See also:
            :meth:`~.SampleSet.from_serializable`

        """
        schema_version = "3.1.0"

        # developer note: numpy's record array stores the samples, energies,
        # num_occ. etc as a struct array. If we dumped that array directly to
        # bytes we could avoid a copy when undoing the serialization. However,
        # we want to pack the samples, so that means that we're storing the
        # arrays individually.
        vectors = {name: serialize_ndarray(data, use_bytes=use_bytes,
                                           bytes_type=bytes_type)
                   for name, data in self.data_vectors.items()}

        if pack_samples:
            # we could just do self.record.sample > 0 for all of these, but to
            # save on the copy if we are already binary and bool/integer we
            # check and just pass through in that case
            samples = self.record.sample
            if (self.vartype is Vartype.BINARY and
                    (np.issubdtype(samples.dtype, np.integer) or
                     np.issubdtype(samples.dtype, np.bool_))):
                packed = _pack_samples(samples)
            else:
                packed = _pack_samples(samples > 0)

            sample_data = serialize_ndarray(packed,
                                            use_bytes=use_bytes,
                                            bytes_type=bytes_type)
        else:
            sample_data = serialize_ndarray(self.record.sample,
                                            use_bytes=use_bytes,
                                            bytes_type=bytes_type)

        return {
            # metadata
            "type": type(self).__name__,
            "version": {"sampleset_schema": schema_version},

            # samples
            "num_variables": len(self.variables),
            "num_rows": len(self),
            "sample_data": sample_data,
            "sample_type": self.record.sample.dtype.name,
            "sample_packed": bool(pack_samples),  # 3.1.0+, default=True

            # vectors
            "vectors": vectors,

            # other
            "variable_labels": self.variables.to_serializable(),
            "variable_type": self.vartype.name,
            "info": serialize_ndarrays(self.info, use_bytes=use_bytes,
                                       bytes_type=bytes_type),
            }

    def _asdict(self):
        # support simplejson encoding
        return self.to_serializable()

    @classmethod
    def from_serializable(cls, obj):
        """Deserialize a :class:`SampleSet`.

        Args:
            obj (dict):
                A :class:`SampleSet` serialized by :meth:`~.SampleSet.to_serializable`.

        Returns:
            :obj:`.SampleSet`

        Examples:
            This example encodes and decodes using JSON.

            >>> import json
            ...
            >>> samples = dimod.SampleSet.from_samples([-1, 1, -1], dimod.SPIN, energy=-.5)
            >>> s = json.dumps(samples.to_serializable())
            >>> new_samples = dimod.SampleSet.from_serializable(json.loads(s))

        See also:
            :meth:`~.SampleSet.to_serializable`

        """

        version = obj["version"]["sampleset_schema"]
        if version < "3.0.0":
            raise ValueError("No longer supported serialization format")

        # assume we're working with v3

        # other data
        vartype = str(obj['variable_type'])  # cast to str for python2
        num_variables = obj['num_variables']
        variables = list(iter_deserialize_variables(obj['variable_labels']))
        info = deserialize_ndarrays(obj['info'])

        # vectors
        vectors = {name: deserialize_ndarray(data)
                   for name, data in obj['vectors'].items()}

        sample = deserialize_ndarray(obj['sample_data'])
        if obj.get('sample_packed', True):  # 3.1.0
            sample = unpack_samples(sample,
                                    n=num_variables,
                                    dtype=obj['sample_type'])

            if vartype == 'SPIN':
                sample *= 2
                sample -= 1

        return cls.from_samples((sample, variables), vartype, info=info,
                                **vectors)

    ###############################################################################################
    # Export to dataframe
    ###############################################################################################

    def to_pandas_dataframe(self, sample_column=False):
        """Convert a sample set to a Pandas DataFrame

        Args:
            sample_column(bool, optional, default=False): If True, samples are
            represented as a column of type dict.

        Returns:
            :obj:`pandas.DataFrame`

        Examples:
            >>> samples = dimod.SampleSet.from_samples([{'a': -1, 'b': +1, 'c': -1},
            ...                                         {'a': -1, 'b': -1, 'c': +1}],
            ...                                        dimod.SPIN, energy=-.5)
            >>> samples.to_pandas_dataframe()    # doctest: +SKIP
               a  b  c  energy  num_occurrences
            0 -1  1 -1    -0.5                1
            1 -1 -1  1    -0.5                1
            >>> samples.to_pandas_dataframe(sample_column=True)    # doctest: +SKIP
                                   sample  energy  num_occurrences
            0  {'a': -1, 'b': 1, 'c': -1}    -0.5                1
            1  {'a': -1, 'b': -1, 'c': 1}    -0.5                1

        """
        import pandas as pd

        if sample_column:
            df = pd.DataFrame(self.data(sorted_by=None, sample_dict_cast=True))

        else:
            # work directly with the record, it's much faster
            df = pd.DataFrame(self.record.sample, columns=self.variables)

            for field in sorted(self.record.dtype.fields):  # sort for consistency
                if field == 'sample':
                    continue

                df.loc[:, field] = self.record[field]

        return df