python source code of test

from __future__ import division, print_function
from io import BytesIO
from scipy import sparse
import numpy as np
import pandas as pd
import h5py
import pytest

from cooler import core


def make_hdf5_table(mode):
    s = BytesIO()
    f = h5py.File(s, mode)
    h5opts = dict(compression='gzip', compression_opts=6, maxshape=(None,))
    grp = f.create_group('table')
    grp.create_dataset(
        'chrom',
        data=np.array(['chr1', 'chr1', 'chr1', 'chr2', 'chr2'], dtype='S'),
        **h5opts
    )
    grp.create_dataset(
        'start',
        data=[0, 10, 20, 0, 10],
        **h5opts
    )
    grp.create_dataset(
        'end',
        data=[10, 20, 32, 10, 21],
        **h5opts
    )
    grp.create_dataset(
        'value',
        data=[1.1, 2.0, 3.0, 4.0, 5.0],
        **h5opts
    )
    f.flush()
    return f


def test_get():
    f = make_hdf5_table('a')
    out = core.get(f['table'], 0, 3, ['chrom', 'value'])
    assert isinstance(out, pd.DataFrame)
    assert len(out.columns) == 2
    assert out['chrom'].astype('U').tolist() == ['chr1', 'chr1', 'chr1']
    assert np.allclose(out['value'].values, [1.1, 2.0, 3.0])

    out = core.get(f['table'], 0, 3, 'value')
    assert isinstance(out, pd.Series)
    assert np.allclose(out.values, [1.1, 2.0, 3.0])

    out = core.get(f['table'], 0, 3, 'value', as_dict=True)
    assert isinstance(out, dict)
    assert np.allclose(out['value'], [1.1, 2.0, 3.0])

    out = core.get(f['table'])
    assert len(out) == 5
    assert len(out.columns) == 4

    out = core.get(f['table'], lo=None)
    assert len(out) == 5
    assert len(out.columns) == 4

    out = core.get(f['table'], lo=3)
    assert len(out) == 2
    assert len(out.columns) == 4


def test_put():
    f = make_hdf5_table('a')

    # append
    df = pd.DataFrame({
        'chrom': ['chr3', 'chr3'],
        'start': [0, 20],
        'end': [20, 40],
        'value': [4.0, 5.0],
    })
    core.put(f['table'], df, lo=5)
    f.flush()
    out = core.get(f['table'])
    assert len(out) == 7

    # insert a categorical column
    s = pd.Series(pd.Categorical(out['chrom'], ordered=True), index=out.index)
    s.name = 'chrom_enum'
    core.put(f['table'], s)
    assert h5py.check_dtype(enum=f['table/chrom_enum'].dtype)
    out = core.get(f['table'])
    assert len(out.columns) == 5
    assert pd.api.types.is_categorical_dtype(out['chrom_enum'].dtype)
    out = core.get(f['table'], convert_enum=False)
    assert len(out.columns) == 5
    assert pd.api.types.is_integer_dtype(out['chrom_enum'].dtype)

    # don't convert categorical to enum
    s.name = 'chrom_string'
    core.put(f['table'], s, store_categories=False)
    out = core.get(f['table'])
    assert len(out.columns) == 6
    assert not pd.api.types.is_categorical_dtype(out['chrom_string'].dtype)

    # scalar input
    core.put(f['table'], {'foo': 42})
    out = core.get(f['table'])
    assert len(out.columns) == 7
    assert (out['foo'] == 42).all()


def test_delete():
    f = make_hdf5_table('a')
    core.delete(f['table'])
    assert len(f['table'].keys()) == 0

    f = make_hdf5_table('a')
    core.delete(f['table'], ['chrom'])
    assert len(f['table'].keys()) == 3

    f = make_hdf5_table('a')
    core.delete(f['table'], 'chrom')
    assert len(f['table'].keys()) == 3


def test_region_to_offset_extent(mock_cooler):
    chromID_lookup = pd.Series({"chr1": 0, "chr2": 1})
    binsize = 100

    region = ("chr1", 159, 402)
    first, last = 1, 4
    assert core.region_to_extent(
        mock_cooler, chromID_lookup, region, binsize
    ) == (first, last + 1)
    assert core.region_to_extent(mock_cooler, chromID_lookup, region, None) == (
        first,
        last + 1,
    )
    assert core.region_to_offset(
        mock_cooler, chromID_lookup, region, binsize
    ) == first
    assert core.region_to_offset(
        mock_cooler, chromID_lookup, region, None
    ) == first

    region = ("chr1", 159, 400)
    first, last = 1, 3
    assert core.region_to_extent(
        mock_cooler, chromID_lookup, region, binsize
    ) == (first, last + 1)
    assert core.region_to_extent(mock_cooler, chromID_lookup, region, None) == (
        first,
        last + 1,
    )
    assert core.region_to_offset(
        mock_cooler, chromID_lookup, region, binsize
    ) == first
    assert core.region_to_offset(
        mock_cooler, chromID_lookup, region, None
    ) == first


def test_interval_ops():
    assert core._comes_before(1, 5, 6, 10)
    assert not core._comes_before(6, 10, 1, 5)
    assert core._comes_before(1, 5, 6, 10, strict=True)
    assert core._comes_before(1, 5, 5, 10, strict=True)
    assert core._comes_before(1, 5, 3, 10)
    assert not core._comes_before(1, 5, 3, 10, strict=True)

    assert core._contains(1, 10, 3, 5)
    assert core._contains(1, 10, 3, 5, strict=True)
    assert core._contains(1, 10, 3, 10)
    assert not core._contains(1, 10, 3, 10, strict=True)
    assert not core._contains(1, 5, 6, 10)


def test_indexing_mixin():

    class Impl(core._IndexingMixin):
        def __init__(self, shape):
            self._shape = shape

        def __getitem__(self, key):
            s1, s2 = self._unpack_index(key)
            i0, i1 = self._process_slice(s1, self._shape[0])
            j0, j1 = self._process_slice(s2, self._shape[1])
            return i0, i1, j0, j1

    obj = Impl((10, 10))

    # row scalar
    assert obj[5] == (5, 6, 0, 10)
    assert obj[5, ] == (5, 6, 0, 10)

    # row slice
    assert obj[:] == (0, 10, 0, 10)
    assert obj[1:5] == (1, 5, 0, 10)
    assert obj[:-2] == (0, 8, 0, 10)
    assert obj[-2:] == (8, 10, 0, 10)

    # slice + scalar
    assert obj[1:5, 3] == (1, 5, 3, 4)
    assert obj[2, 1:5] == (2, 3, 1, 5)
    assert obj[2, 0:-2] == (2, 3, 0, 8)
    assert obj[-2, 0:-2] == (8, 9, 0, 8)

    # row + col scalar query
    assert obj[5, 5] == (5, 6, 5, 6)

    # row + col slices
    assert obj[:, :] == (0, 10, 0, 10)
    assert obj[1:5, :] == (1, 5, 0, 10)
    assert obj[:, 2:3] == (0, 10, 2, 3)
    assert obj[1:5, 2:3] == (1, 5, 2, 3)

    with pytest.raises(IndexError):
        obj[10]

    with pytest.raises(TypeError):
        obj[{}]

    # with pytest.raises(TypeError):
    #     obj[4.5]


def test_selector1d():
    slicer = lambda fields, lo, hi: (lo, hi)  # noqa
    fetcher = lambda x: x  # noqa
    nmax = 50

    s = core.RangeSelector1D(None, slicer, fetcher, nmax)
    assert s[30] == (30, 31)
    assert s[10:20] == (10, 20)
    assert s[:20] == (0, 20)
    assert s[10:] == (10, nmax)
    assert s[:] == (0, nmax)
    assert s[:nmax] == (0, nmax)
    assert s[:-10] == (0, nmax - 10)
    assert s[1:1] == (1, 1)
    with pytest.raises(IndexError):
        s[:, :]
    with pytest.raises(ValueError):
        s[::2]
    # assert_raises(TypeError, lambda : s['blah'])
    assert s.shape == (nmax,)

    # FIXME - questionable behavior
    assert s[30:20] == (30, 20)  # lo > hi
    assert s[nmax + 10 : nmax + 30] == (nmax + 10, nmax + 30)  # lo > nmax
    assert s[10.0] == (10, 11)  # accepting floats
    # assert s[10.1] == (10.1, 11.1)  # not casting
    # assert s[nmax+10] == (nmax+10, nmax+11)


    slicer = lambda fields, lo, hi: pd.DataFrame(  # noqa
        np.zeros((hi - lo, len(fields))),
        columns=fields
    )
    fetcher = lambda x: list(map(int, x.split(':')))  # noqa
    nmax = 50
    sel = core.RangeSelector1D(['a', 'b', 'c'], slicer, fetcher, nmax)
    assert sel.columns.tolist() == ['a', 'b', 'c']
    assert list(sel.keys()) == ['a', 'b', 'c']
    assert isinstance(sel.dtypes, pd.Series)
    assert 'a' in sel
    assert len(sel) == 50
    assert len(sel[['a', 'b']].columns) == 2
    assert len(sel[['a']].columns) == 1
    assert np.all(sel[5] == 0)
    assert np.all(sel[5, ] == 0)
    assert len(sel.fetch('5:10')) == 5

    # some things are broken here
    series_view = sel['a']
    assert len(series_view) == 50
    assert series_view.shape == (50,)
    # series_view.columns ???


def test_selector2d():
    slicer = lambda field, i0, i1, j0, j1: (i0, i1, j0, j1)  # noqa
    fetcher = lambda x: x  # noqa
    nmax = 50

    s = core.RangeSelector2D(None, slicer, fetcher, (nmax, nmax))
    assert s[30] == (30, 31, 0, nmax)
    assert s[10:20, 10:20] == (10, 20, 10, 20)
    assert s[:] == (0, nmax, 0, nmax)
    with pytest.raises(IndexError):
        s[:, :, :]
    with pytest.raises(ValueError):
        s[::2, :]
    assert s.shape == (nmax, nmax)


    slicer = lambda field, i0, i1, j0, j1: ( # noqa
        np.zeros((i1 - i0, j1 - j0))
    )
    fetcher = lambda x, y=None: (0, 10, 0, 10)  # noqa
    nmax = 50
    sel = core.RangeSelector2D('count', slicer, fetcher, (nmax, nmax))
    assert sel.shape == (50, 50)
    assert len(sel) == 50
    assert sel[:10, 5:10].shape == (10, 5)
    assert sel.fetch('0:10', '0:10').shape == (10, 10)


def test_slice_matrix(mock_cooler):
    slices = [
        (0, 10, 0, 10),
        (0, 10, 10, 20),
        (5, 15, 10, 20),
        (10, 20, 5, 15),
        (1, 1, 5, 15),
        (1, 1, 1, 1),
    ]
    for i0, i1, j0, j1 in slices:
        triu_reader = core.CSRReader(mock_cooler, "count", max_chunk=10)

        # triangular query
        index = triu_reader.index_col(i0, i1, j0, j1)
        i, j, v = triu_reader.query(i0, i1, j0, j1)
        assert len(index) == len(i)

        # rectangular query
        i, j, v = core.query_rect(triu_reader.query, i0, i1, j0, j1)
        mat = sparse.coo_matrix((v, (i - i0, j - j0)), (i1 - i0, j1 - j0)).toarray()
        r = sparse.coo_matrix(
            (
                (
                    mock_cooler["pixels/count"],
                    (mock_cooler["pixels/bin1_id"], mock_cooler["pixels/bin2_id"]),
                )
            ),
            (mock_cooler.attrs["nbins"],) * 2,
        )
        r_full = r.toarray() + r.toarray().T
        assert np.allclose(r_full[i0:i1, j0:j1], mat)


def test_csr_reader():
    pass


def test_query_rect():
    pass