python source code of tiff

#!/usr/bin/env python
# -*- coding: utf-8 -*-

import io
import logging
from pathlib import Path
from typing import Optional

import dask.array as da
import numpy as np
from dask import delayed
from tifffile import TiffFile

from .. import exceptions, types
from ..buffer_reader import BufferReader
from ..constants import Dimensions
from .reader import Reader

###############################################################################

log = logging.getLogger(__name__)

###############################################################################


class TiffReader(Reader):
    """
    TiffReader wraps tifffile to provide the same reading capabilities but abstracts
    the specifics of using the backend library to create a unified interface. This
    enables higher level functions to duck type the File Readers.

    Parameters
    ----------
    data: types.FileLike
        A string or path to the TIFF file to be read.
    S: int
        If the image has different dimensions on any scene from another, the dask array
        construction will fail.
        In that case, use this parameter to specify a specific scene to construct a
        dask array for.
        Default: 0 (select the first scene)
    """

    def __init__(self, data: types.FileLike, S: int = 0, **kwargs):
        # Run super init to check filepath provided
        super().__init__(data, **kwargs)

        # Store parameters needed for dask read
        self.specific_s_index = S

        # Lazy load and hold on to dtype
        self._dtype = None

    @staticmethod
    def _is_this_type(buffer: io.BufferedIOBase) -> bool:
        with BufferReader(buffer) as buffer_reader:
            # Per the TIFF-6 spec
            # (https://www.itu.int/itudoc/itu-t/com16/tiff-fx/docs/tiff6.pdf),
            # 'II' is little-endian (Intel format)
            # 'MM' is big-endian (Motorola format)
            if buffer_reader.endianness not in [
                buffer_reader.INTEL_ENDIAN,
                buffer_reader.MOTOROLA_ENDIAN,
            ]:
                return False
            magic = buffer_reader.read_uint16()

            # Per TIFF-6, magic is 42.
            if magic == 42:
                ifd_offset = buffer_reader.read_uint32()
                if ifd_offset == 0:
                    return False

            # Per BigTIFF
            # (https://www.awaresystems.be/imaging/tiff/bigtiff.html), magic is 43.
            if magic == 43:
                # Alex magic here...
                if buffer_reader.read_uint16() != 8:
                    return False
                if buffer_reader.read_uint16() != 0:
                    return False
                ifd_offset = buffer_reader.read_uint64()
                if ifd_offset == 0:
                    return False
            return True

    @staticmethod
    def _imread(img: Path, scene: int, page: int) -> np.ndarray:
        # Load Tiff
        with TiffFile(img) as tiff:
            # Get proper scene
            scene = tiff.series[scene]

            # Get proper page
            page = scene.pages[page]

            # Return numpy
            return page.asarray()

    @staticmethod
    def _scene_shape_is_consistent(tiff: TiffFile, S: int) -> bool:
        scenes = tiff.series
        operating_shape = scenes[0].shape
        for scene in scenes:
            if scene.shape != operating_shape:
                log.info(
                    f"File contains variable dimensions per scene, "
                    f"selected scene: {S} for data "
                    f"retrieval."
                )
                return False

        return True

    def _read_delayed(self) -> da.core.Array:
        # Load Tiff
        with TiffFile(self._file) as tiff:
            # Check each scene has the same shape
            # If scene shape checking fails, use the specified scene and update
            # operating shape
            scenes = tiff.series
            operating_shape = scenes[0].shape
            if not self._scene_shape_is_consistent(tiff, S=self.specific_s_index):
                operating_shape = scenes[self.specific_s_index].shape
                scenes = [scenes[self.specific_s_index]]

            # Get sample yx plane
            sample = scenes[0].pages[0].asarray()

            # Combine length of scenes and operating shape
            # Replace YX dims with empty dimensions
            operating_shape = (len(scenes), *operating_shape)
            operating_shape = operating_shape[:-2] + (1, 1)

            # Make ndarray for lazy arrays to fill
            lazy_arrays = np.ndarray(operating_shape, dtype=object)
            for all_page_index, (np_index, _) in enumerate(np.ndenumerate(lazy_arrays)):
                # Scene index is the first index in np_index
                scene_index = np_index[0]

                # This page index is current enumeration divided by scene index + 1
                # For example if the image has 10 Z slices and 5 scenes, there
                # would be 50 total pages
                this_page_index = all_page_index // (scene_index + 1)

                # Fill the numpy array with the delayed arrays
                lazy_arrays[np_index] = da.from_delayed(
                    delayed(TiffReader._imread)(
                        self._file, scene_index, this_page_index
                    ),
                    shape=sample.shape,
                    dtype=sample.dtype,
                )

            # Convert the numpy array of lazy readers into a dask array
            data = da.block(lazy_arrays.tolist())

            # Only return the scene dimension if multiple scenes are present
            if len(scenes) == 1:
                data = data[0, :]

            return data

    def _read_immediate(self) -> np.ndarray:
        # Load Tiff
        with TiffFile(self._file) as tiff:
            # Check each scene has the same shape
            # If scene shape checking fails, use the specified scene and update
            # operating shape
            scenes = tiff.series
            if not self._scene_shape_is_consistent(tiff, S=self.specific_s_index):
                return scenes[self.specific_s_index].asarray()

            # Read each scene and stack if single scene
            if len(scenes) > 1:
                return np.stack([s.asarray() for s in scenes])

            # Else, return single scene
            return tiff.asarray()

    def load_slice(self, slice_index: int = 0) -> np.ndarray:
        with TiffFile(self._file) as tiff:
            return tiff.asarray(key=slice_index)

    def dtype(self):
        if self._dtype is None:
            with TiffFile(self._file) as tiff:
                self._dtype = tiff.pages[0].dtype

        return self._dtype

    @property
    def dims(self) -> str:
        if self._dims is None:
            # Get a single scenes dimensions in order
            with TiffFile(self._file) as tiff:
                single_scene_dims = tiff.series[0].pages.axes

                # We can sometimes trust the dimension info in the image
                if all([d in Dimensions.DefaultOrder for d in single_scene_dims]):
                    # Add scene dimension only if there are multiple scenes
                    if len(tiff.series) == 1:
                        self._dims = single_scene_dims
                    else:
                        self._dims = f"{Dimensions.Scene}{single_scene_dims}"
                # Sometimes the dimension info is wrong in certain dimensions, so guess
                # that dimension
                else:
                    guess = self.guess_dim_order(tiff.series[0].pages.shape)
                    best_guess = []
                    for dim_from_meta in single_scene_dims:
                        if dim_from_meta in Dimensions.DefaultOrder:
                            best_guess.append(dim_from_meta)
                        else:
                            appended_dim = False
                            for guessed_dim in guess:
                                if guessed_dim not in best_guess:
                                    best_guess.append(guessed_dim)
                                    appended_dim = True
                                    log.info(
                                        f"Unsure how to handle dimension: "
                                        f"{dim_from_meta}. "
                                        f"Replaced with guess: {guessed_dim}"
                                    )
                                    break

                            # All of our guess dims were already in the dim list,
                            # append the dim read from meta
                            if not appended_dim:
                                best_guess.append(dim_from_meta)

                    best_guess = "".join(best_guess)

                    # Add scene dimension only if there are multiple scenes
                    if len(tiff.series) == 1:
                        self._dims = best_guess
                    else:
                        self._dims = f"{Dimensions.Scene}{best_guess}"

        return self._dims

    @dims.setter
    def dims(self, dims: str):
        # Check amount of provided dims against data shape
        if len(dims) != len(self.dask_data.shape):
            raise exceptions.InvalidDimensionOrderingError(
                f"Provided too many dimensions for the associated file. "
                f"Received {len(dims)} dimensions [dims: {dims}] "
                f"for image with {len(self.data.shape)} dimensions "
                f"[shape: {self.data.shape}]."
            )

        # Set the dims
        self._dims = dims

    @staticmethod
    def get_image_description(buffer: io.BufferedIOBase) -> Optional[bytearray]:
        """Retrieve the image description as one large string."""
        description_length = 0
        description_offset = 0

        with BufferReader(buffer) as buffer_reader:
            # Per the TIFF-6 spec
            # (https://www.itu.int/itudoc/itu-t/com16/tiff-fx/docs/tiff6.pdf),
            # 'II' is little-endian (Intel format)
            # 'MM' is big-endian (Motorola format)
            if buffer_reader.endianness not in [
                buffer_reader.INTEL_ENDIAN,
                buffer_reader.MOTOROLA_ENDIAN,
            ]:
                return None
            magic = buffer_reader.read_uint16()

            # Per TIFF-6, magic is 42.
            if magic == 42:
                found = False
                while not found:
                    ifd_offset = buffer_reader.read_uint32()
                    if ifd_offset == 0:
                        return None
                    buffer_reader.buffer.seek(ifd_offset, 0)
                    entries = buffer_reader.read_uint16()
                    for n in range(0, entries):
                        tag = buffer_reader.read_uint16()
                        type = buffer_reader.read_uint16()
                        count = buffer_reader.read_uint32()
                        offset = buffer_reader.read_uint32()
                        if tag == 270:
                            description_length = count - 1  # drop the NUL from the end
                            description_offset = offset
                            found = True
                            break

            # Per BigTIFF
            # (https://www.awaresystems.be/imaging/tiff/bigtiff.html), magic is 43.
            if magic == 43:
                # Alex magic here...
                if buffer_reader.read_uint16() != 8:
                    return None
                if buffer_reader.read_uint16() != 0:
                    return None
                found = False
                while not found:
                    ifd_offset = buffer_reader.read_uint64()
                    if ifd_offset == 0:
                        return None
                    buffer_reader.buffer.seek(ifd_offset, 0)
                    entries = buffer_reader.read_uint64()
                    for n in range(0, entries):
                        tag = buffer_reader.read_uint16()
                        type = buffer_reader.read_uint16()  # noqa: F841
                        count = buffer_reader.read_uint64()
                        offset = buffer_reader.read_uint64()
                        if tag == 270:
                            description_length = count - 1  # drop the NUL from the end
                            description_offset = offset
                            found = True
                            break

            if description_offset == 0:
                # Nothing was found
                return bytearray("")
            else:
                buffer_reader.buffer.seek(description_offset, 0)
                return bytearray(buffer_reader.buffer.read(description_length))

    @property
    def metadata(self) -> str:
        if self._metadata is None:
            with open(self._file, "rb") as rb:
                description = self.get_image_description(rb)
            if description is None:
                self._metadata = ""
            else:
                self._metadata = description.decode()
        return self._metadata