python source code of test

import json
import sys
from copy import copy
from decimal import Decimal, InvalidOperation
from math import isclose, isfinite
from zlib import compress

import pytest
from hypothesis import assume, example, given
from hypothesis.strategies import (
    binary,
    booleans,
    characters,
    floats,
    integers,
    lists,
    recursive,
)

import pikepdf
from pikepdf import (
    Array,
    Dictionary,
    Name,
    Object,
    Operator,
    Pdf,
    PdfError,
    Stream,
    String,
)
from pikepdf import _qpdf as qpdf

# pylint: disable=eval-used, redefined-outer-name

encode = qpdf._encode
roundtrip = qpdf._roundtrip


def test_none():
    assert encode(None) is None


def test_booleans():
    assert encode(True) == True
    assert encode(False) == False


@given(characters(min_codepoint=0x20, max_codepoint=0x7F))
@example('')
def test_ascii_involution(ascii_):
    b = ascii_.encode('ascii')
    assert encode(b) == b


@given(
    characters(min_codepoint=0x0, max_codepoint=0xFEF0, blacklist_categories=('Cs',))
)
@example('')
def test_unicode_involution(s):
    assert str(encode(s)) == s


@given(binary(min_size=0, max_size=300))
def test_binary_involution(binary_):
    assert bytes(encode(binary_)) == binary_


int64s = integers(min_value=-9223372036854775807, max_value=9223372036854775807)


@given(int64s, int64s)
def test_integer_comparison(a, b):
    equals = a == b
    encoded_equals = encode(a) == encode(b)
    assert encoded_equals == equals

    lessthan = a < b
    encoded_lessthan = encode(a) < encode(b)
    assert lessthan == encoded_lessthan


@given(integers(-(10 ** 12), 10 ** 12), integers(0, 12))
def test_decimal_involution(num, radix):
    strnum = str(num)
    if radix > len(strnum):
        strnum = strnum[:radix] + '.' + strnum[radix:]

    d = Decimal(strnum)
    assert encode(d) == d


@given(floats())
def test_decimal_from_float(f):
    d = Decimal(f)
    if isfinite(f) and d.is_finite():
        try:
            # PDF is limited to ~5 sig figs
            decstr = str(d.quantize(Decimal('1.000000')))
        except InvalidOperation:
            return  # PDF doesn't support exponential notation
        try:
            py_d = Object.parse(decstr)
        except RuntimeError as e:
            if 'overflow' in str(e) or 'underflow' in str(e):
                py_d = Object.parse(str(f))

        assert isclose(py_d, d, abs_tol=1e-5), (d, f.hex())
    else:
        with pytest.raises(PdfError):
            Object.parse(str(d))


@given(lists(integers(-10, 10), min_size=0, max_size=10))
def test_list(array):
    a = pikepdf.Array(array)
    assert a == array


@given(lists(lists(integers(1, 10), min_size=1, max_size=5), min_size=1, max_size=5))
def test_nested_list(array):
    a = pikepdf.Array(array)
    assert a == array


@given(
    recursive(
        integers(1, 10) | booleans(),
        lambda children: lists(children),  # pylint: disable=unnecessary-lambda
        max_leaves=20,
    )
)
def test_nested_list2(array):
    assume(isinstance(array, list))
    a = pikepdf.Array(array)
    assert a == array


def test_list_apis():
    a = pikepdf.Array([1, 2, 3])
    a[1] = None
    assert a[1] is None
    assert len(a) == 3
    del a[1]
    assert len(a) == 2
    a[-1] = Name('/Foo')
    with pytest.raises(IndexError):
        a[-5555] = Name.Foo
    assert a == pikepdf.Array([1, Name.Foo])
    a.append(4)
    assert a == pikepdf.Array([1, Name.Foo, 4])
    a.extend([42, 666])
    assert a == pikepdf.Array([1, Name.Foo, 4, 42, 666])


def test_stack_depth():
    a = [42]
    for _ in range(100):
        a = [a]
    rlimit = sys.getrecursionlimit()
    try:
        sys.setrecursionlimit(100)
        with pytest.raises(RecursionError):
            assert encode(a) == a
        with pytest.raises(RecursionError):
            encode(a) == encode(a)  # pylint: disable=expression-not-assigned
        with pytest.raises(RecursionError):
            repr(a)
    finally:
        sys.setrecursionlimit(rlimit)  # So other tests are not affected


def test_bytes():
    b = b'\x79\x78\x77\x76'
    qs = String(b)
    assert bytes(qs) == b

    s = 'é'
    qs = String(s)
    assert str(qs) == s

    assert Name('/xyz') == b'/xyz'


def test_len_array():
    assert len(Array([])) == 0
    assert len(Array()) == 0
    assert len(Array([3])) == 1


def test_wrap_array():
    assert Name('/Foo').wrap_in_array() == Array([Name('/Foo')])
    assert Array([42]).wrap_in_array() == Array([42])


def test_name_equality():
    # Who needs transitivity? :P
    # While this is less than ideal ('/Foo' != b'/Foo') it allows for slightly
    # sloppy tests like if colorspace == '/Indexed' without requiring
    # Name('/Indexed') everywhere
    assert Name('/Foo') == '/Foo'
    assert Name('/Foo') == b'/Foo'
    assert Name.Foo == Name('/Foo')


def test_no_len():
    with pytest.raises(TypeError):
        len(Name.Foo)
        len(String('abc'))


def test_unslashed_name():
    with pytest.raises(ValueError, match='must begin with'):
        Name('Monty') not in []  # pylint: disable=expression-not-assigned


def test_empty_name():
    with pytest.raises(ValueError):
        Name('')
    with pytest.raises(ValueError):
        Name('/')


def test_forbidden_name_usage():
    with pytest.raises(TypeError):
        Name.Monty = Name.Python
    with pytest.raises(TypeError):
        Name['/Monty']  # pylint: disable=pointless-statement


class TestHashViolation:
    def check(self, a, b):
        assert a == b, "invalid test case"
        assert hash(a) == hash(b), "hash violation"

    def test_unequal_but_similar(self):
        assert Name('/Foo') != String('/Foo')

    def test_numbers(self):
        self.check(Object.parse('1.0'), 1)
        self.check(Object.parse('42'), 42)

    def test_bool_comparison(self):
        self.check(Object.parse('0.0'), False)
        self.check(True, 1)

    def test_string(self):
        utf16 = b'\xfe\xff' + 'hello'.encode('utf-16be')
        self.check(String(utf16), String('hello'))

    def test_name(self):
        self.check(Name.This, Name('/This'))

    def test_operator(self):
        self.check(Operator('q'), Operator('q'))

    def test_array_not_hashable(self):
        with pytest.raises(TypeError):
            {Array([3]): None}  # pylint: disable=expression-not-assigned


def test_not_constructible():
    with pytest.raises(TypeError, match="constructor"):
        Object()


class TestRepr:
    def test_repr_dict(self):
        d = Dictionary(
            {
                '/Boolean': True,
                '/Integer': 42,
                '/Real': Decimal('42.42'),
                '/String': String('hi'),
                '/Array': Array([1, 2, 3.14]),
                '/Operator': Operator('q'),
                '/Dictionary': Dictionary({'/Color': 'Red'}),
            }
        )
        expected = """\
            pikepdf.Dictionary({
                "/Array": [ 1, 2, Decimal('3.140000') ],
                "/Boolean": True,
                "/Dictionary": {
                    "/Color": "Red"
                },
                "/Integer": 42,
                "/Operator": pikepdf.Operator("q"),
                "/Real": Decimal('42.42'),
                "/String": "hi"
            })
        """

        def strip_all_whitespace(s):
            return ''.join(s.split())

        assert strip_all_whitespace(repr(d)) == strip_all_whitespace(expected)
        assert eval(repr(d)) == d

    def test_repr_scalar(self):
        scalars = [
            False,
            666,
            Decimal('3.14'),
            String('scalar'),
            Name('/Bob'),
            Operator('Q'),
        ]
        for s in scalars:
            assert eval(repr(s)) == s

    def test_repr_indirect(self, resources):
        graph = pikepdf.open(resources / 'graph.pdf')
        repr_page0 = repr(graph.pages[0])
        assert repr_page0[0] == '<', 'should not be constructible'


def test_utf16_error():
    with pytest.raises((UnicodeEncodeError, RuntimeError)):
        str(encode('\ud801'))


class TestDictionary:
    def test_contains(self):
        d = Dictionary({'/Monty': 'Python', '/Flying': 'Circus'})
        assert Name.Flying in d
        assert Name('/Monty') in d
        assert Name.Brian not in d

    def test_none(self):
        d = pikepdf.Dictionary({'/One': 1, '/Two': 2})
        with pytest.raises(ValueError):
            d['/Two'] = None

    def test_init(self):
        d1 = pikepdf.Dictionary({'/Animal': 'Dog'})
        d2 = pikepdf.Dictionary(Animal='Dog')
        assert d1 == d2

    def test_kwargs(self):
        d = pikepdf.Dictionary(A='a', B='b', C='c')
        assert '/B' in d
        assert 'B' in dir(d)

    def test_iter(self):
        d = pikepdf.Dictionary(A='a')
        for k in d:
            assert k == '/A'
            assert d[k] == 'a'

    def test_items(self):
        d = pikepdf.Dictionary(A='a')
        for _k in d.items():
            pass

    def test_str(self):
        d = pikepdf.Dictionary(A='a')
        with pytest.raises(NotImplementedError):
            str(d)

    def test_attr(self):
        d = pikepdf.Dictionary(A='a')
        with pytest.raises(AttributeError):
            d.invalidname  # pylint: disable=pointless-statement

    def test_get(self):
        d = pikepdf.Dictionary(A='a')
        assert d.get(Name.A) == 'a'
        assert d.get(Name.Resources, 42) == 42

    def test_nonpage(self):
        d = pikepdf.Dictionary(A='a')
        with pytest.raises(TypeError):
            d.images  # pylint: disable=pointless-statement
        with pytest.raises(TypeError):
            d.page_contents_add(b'', True)

    def test_bad_name(self):
        with pytest.raises(ValueError, match=r"must begin with '/'"):
            pikepdf.Dictionary({'/Slash': 'dot', 'unslash': 'error'})


def test_not_convertible():
    class PurePythonObj:
        def __repr__(self):
            return 'PurePythonObj()'

    c = PurePythonObj()
    with pytest.raises(RuntimeError):
        encode(c)
    with pytest.raises(RuntimeError):
        pikepdf.Array([1, 2, c])

    d = pikepdf.Dictionary()
    with pytest.raises(RuntimeError):
        d.SomeKey = c

    assert d != c


def test_json():
    d = Dictionary(
        {
            '/Boolean': True,
            '/Integer': 42,
            '/Real': Decimal('42.42'),
            '/String': String('hi'),
            '/Array': Array([1, 2, 3.14]),
            '/Dictionary': Dictionary({'/Color': 'Red'}),
        }
    )
    json_bytes = d.to_json(False)
    try:
        as_dict = json.loads(json_bytes)
    except TypeError:
        as_dict = json.loads(json_bytes.decode('utf-8'))  # Py3.5 shim
    assert as_dict == {
        "/Array": [1, 2, 3.140000],
        "/Boolean": True,
        "/Dictionary": {"/Color": "Red"},
        "/Integer": 42,
        "/Real": 42.42,
        "/String": "hi",
    }


@pytest.fixture
def stream_object():
    pdf = pikepdf.new()
    return Stream(pdf, b'')


@pytest.fixture
def sandwich(resources):
    return Pdf.open(resources / 'sandwich.pdf')


class TestStreamReadWrite:
    def test_basic(self, stream_object):
        stream_object.write(b'abc')
        assert stream_object.read_bytes() == b'abc'

    def test_compressed_readback(self, stream_object):
        stream_object.write(compress(b'def'), filter=Name.FlateDecode)
        assert stream_object.read_bytes() == b'def'

    def test_stacked_compression(self, stream_object):
        double_compressed = compress(compress(b'pointless'))
        stream_object.write(
            double_compressed, filter=[Name.FlateDecode, Name.FlateDecode]
        )
        assert stream_object.read_bytes() == b'pointless'
        assert stream_object.read_raw_bytes() == double_compressed

    def test_explicit_decodeparms(self, stream_object):
        double_compressed = compress(compress(b'pointless'))
        stream_object.write(
            double_compressed,
            filter=[Name.FlateDecode, Name.FlateDecode],
            decode_parms=[None, None],
        )
        assert stream_object.read_bytes() == b'pointless'
        assert stream_object.read_raw_bytes() == double_compressed

    def test_no_kwargs(self, stream_object):
        with pytest.raises(TypeError):
            stream_object.write(compress(b'x'), [Name.FlateDecode])

    def test_ccitt(self, stream_object):
        ccitt = b'\x00'  # Not valid data, just for testing decode_parms
        stream_object.write(
            ccitt,
            filter=Name.CCITTFaxDecode,
            decode_parms=Dictionary(K=-1, Columns=8, Length=1),
        )

    def test_stream_bytes(self, stream_object):
        stream_object.write(b'pi')
        assert bytes(stream_object) == b'pi'


def test_copy():
    d = Dictionary(
        {
            '/Boolean': True,
            '/Integer': 42,
            '/Real': Decimal('42.42'),
            '/String': String('hi'),
            '/Array': Array([1, 2, 3.14]),
            '/Dictionary': Dictionary({'/Color': 'Red'}),
        }
    )
    d2 = copy(d)
    assert d2 == d
    assert d2 is not d
    assert d2['/Dictionary'] == d['/Dictionary']


def test_object_iteration(sandwich):
    expected = len(sandwich.objects)
    loops = 0
    for obj in sandwich.objects:
        loops += 1
        if isinstance(obj, Dictionary):
            assert len(obj.keys()) >= 1
    assert expected == loops


@pytest.mark.parametrize(
    'obj', [Array([1]), Dictionary({'/A': 'b'}), Operator('q'), String('s')]
)
def test_object_isinstance(obj):
    assert isinstance(obj, (Array, Dictionary, Operator, String, Stream))
    assert isinstance(obj, type(obj))
    assert isinstance(obj, Object)


def test_stream_isinstance():
    pdf = pikepdf.new()
    stream = Stream(pdf, b'xyz')
    assert isinstance(stream, Stream)
    assert isinstance(stream, Object)


def test_object_classes():
    classes = [Array, Dictionary, Operator, String, Stream]
    for cls in classes:
        assert issubclass(cls, Object)


def test_operator_create():
    Operator('q')
    assert Operator('q') == Operator('q')
    assert Operator('q') != Operator('Q')


@pytest.fixture(scope="function")
def abcxyz_stream():
    pdf = pikepdf.new()
    data = b'abcxyz'
    stream = Stream(pdf, data)
    return stream


def test_stream_as_dict(abcxyz_stream):
    stream = abcxyz_stream
    assert Name.Length in stream
    stream.TestAttrAccess = True
    stream['/TestKeyAccess'] = True
    stream[Name.TestKeyNameAccess] = True
    assert len(stream.keys()) == 4  # Streams always have a /Length

    assert all((v == len(stream.read_bytes()) or v == True) for k, v in stream.items())

    assert stream.stream_dict.TestAttrAccess

    assert stream.get(Name.MissingName, 3.14) == 3.14

    assert {k for k in stream} == {
        '/TestKeyAccess',
        '/TestAttrAccess',
        '/Length',
        '/TestKeyNameAccess',
    }


def test_stream_length_modify(abcxyz_stream):
    stream = abcxyz_stream

    with pytest.warns(DeprecationWarning):
        stream.Length = 42
    with pytest.warns(DeprecationWarning):
        del stream.Length


def test_len_stream(abcxyz_stream):
    with pytest.raises(TypeError):
        len(abcxyz_stream)  # pylint: disable=pointless-statement
    assert len(abcxyz_stream.stream_dict) == 1