python source code of data

import numpy as np
import cv2
from glob import glob
import torch.utils.data as data
import os
from concurrent.futures import ThreadPoolExecutor
from tqdm import tqdm; tqdm.monitor_interval = 0  # noqa
import pandas as pd
from collections import defaultdict, Counter  # noqa
import torch as th

from albumentations import Compose, ShiftScaleRotate, RandomCrop, BboxParams, \
    ToGray, CLAHE, GaussNoise, GaussianBlur, RandomGamma, \
    RandomBrightnessContrast, RGBShift, HueSaturationValue  # noqa
# datetime.utcfromtimestamp(float('.'.join(a.iloc[0, 0].split('.')[-2:]))).
# strftime(('%Y-%m-%d %H:%M:%S'))


class KuzushijiDataset(data.Dataset):
  def __init__(self, image_fns, gt_boxes=None,
               label_to_int=None,
               augment=False,
               train_image_dir='train_images',
               test_image_dir='test_images',
               height=1536,
               width=1536,
               feature_scale=0.25):
    self.image_fns = image_fns
    self.gt_boxes = gt_boxes
    self.label_to_int = label_to_int
    self.augment = augment
    self.aug = Compose([
      ShiftScaleRotate(p=0.9, rotate_limit=10,
          scale_limit=0.2, border_mode=cv2.BORDER_CONSTANT),
      RandomCrop(512, 512, p=1.0),
      ToGray(),
      CLAHE(),
      GaussNoise(),
      GaussianBlur(),
      RandomBrightnessContrast(),
      RandomGamma(),
      RGBShift(),
      HueSaturationValue(),
    ], bbox_params=BboxParams(format='coco', min_visibility=0.75))

    self.encoded_cache = None
    self.height = height
    self.width = width
    self.feature_scale = feature_scale

  def cache(self):
    self.encoded_cache = {}
    print("Caching ... ")
    with ThreadPoolExecutor() as e:
      encoded_imgs = list(tqdm(e.map(self.read_encoded, self.image_fns),
          total=len(self.image_fns)))
    for fn, encoded in zip(self.image_fns, encoded_imgs):
      self.encoded_cache[fn] = encoded

  def read_encoded(self, fn):
    with open(fn, 'rb') as f:
      img_bytes = np.frombuffer(f.read(), dtype=np.uint8)
    return img_bytes

  @staticmethod
  def fn_to_id(fn):
    return os.path.splitext(os.path.basename(fn))[0]

  def boxes_to_mask_centers_classes(self, boxes, height=1024, width=1024,
        merge_masks=True, scale_x=1, scale_y=1, feature_scale=0.25,
        num_max_samples=620 * 9):
    mask = np.zeros((int(height * feature_scale), int(width * feature_scale)),
        dtype=np.float32)
    centers = -1 * np.ones((num_max_samples, 2), dtype=np.int64)
    classes = -1 * np.ones((num_max_samples, ), dtype=np.int64)
    # pad mask
    mask = np.pad(mask, ((1, 1), (1, 1)), mode='constant')
    pos_kernel = np.float32([[0.5, 0.75, 0.5],
                             [0.75, 1.0, 0.75],
                             [0.5, 0.75, 0.5]])
    center_ind = 0
    for box in boxes:
      x, y, w, h, ll = box
      cx, cy = feature_scale * scale_x * (x + w / 2), \
          feature_scale * scale_y * (y + h / 2)
      cx, cy = int(round(cx)), int(round(cy))
      # drop out of mask centers
      if cy >= (mask.shape[0] - 2) or cx >= (mask.shape[1] - 2):
        continue
      # mask[cy: cy + 1, cx: cx + 1] = 1
      mask[cy: cy + 3, cx: cx + 3] = pos_kernel
      label_int = self.label_to_int[ll]
      for x_offset in [-1, 0, 1]:
        cxx = cx + x_offset
        if cxx < 0 or cxx >= mask.shape[1] - 2:
          continue
        for y_offset in [-1, 0, 1]:
          cyy = cy + y_offset
          if cyy < 0 or cyy >= mask.shape[0] - 2:
            continue

          centers[center_ind] = cxx, cyy
          classes[center_ind] = label_int
          center_ind += 1

    # remove padding
    mask = mask[1: -1, 1: -1]
    return mask, centers, classes

  @staticmethod
  def mask_to_rle(img, mask_value=255, transpose=True):
    img = np.int32(img)
    if transpose:
      img = img.T
    img = img.flatten()
    img[img == mask_value] = 1
    pimg = np.pad(img, 1, mode='constant')
    diff = np.diff(pimg)
    starts = np.where(diff == 1)[0]
    ends = np.where(diff == -1)[0]
    rle = []
    previous_end = 0
    for start, end in zip(starts, ends):
      relative_start = start - previous_end
      length = end - start
      previous_end = end
      rle.append(str(relative_start))
      rle.append(str(length))
    if len(rle) == 0:
      return "-1"
    return " ".join(rle)

  @staticmethod
  def get_paddings(h, w, ratio):
    current_ratio = h / w
    # pad height
    if current_ratio < ratio:
      pad_h = int(w * ratio - h)
      pad_top = pad_h // 2
      pad_bottom = pad_h - pad_top
      pad_left, pad_right = 0, 0
    # pad width
    else:
      pad_w = int(h / ratio - w)
      pad_left = pad_w // 2
      pad_right = pad_w - pad_left
      pad_top, pad_bottom = 0, 0

    return pad_top, pad_bottom, pad_left, pad_right

  @staticmethod
  def pad_to_ratio(img, ratio):
    h, w = img.shape[:2]
    pad_top, pad_bottom, pad_left, pad_right = KuzushijiDataset.get_paddings(
        h, w, ratio)
    paddings = ((pad_top, pad_bottom), (pad_left, pad_right), (0, 0))
    img = np.pad(img, paddings, mode='constant')
    return img, pad_top, pad_left

  def __getitem__(self, index, to_tensor=True):
    fn = self.image_fns[index]

    if self.encoded_cache is not None:
      encoded_img = self.encoded_cache[fn]
      img = cv2.imdecode(encoded_img, 1)
    else:
      img = cv2.cvtColor(cv2.imread(fn, 1), cv2.COLOR_BGR2RGB)

    img, pad_top, pad_left = self.pad_to_ratio(img, ratio=1.5)
    h, w = img.shape[:2]
    # print(h / w, pad_left, pad_top)
    assert img.ndim == 3
    img = cv2.resize(img, (self.width, self.height))
    scale_x, scale_y = self.width / w, self.height / h

    if self.gt_boxes is not None:
      gt_boxes = self.gt_boxes[self.fn_to_id(fn)][:]
      # rescale boxes
      for box_ind in range(len(gt_boxes)):
        x_min, y_min, box_w, box_h, ll = gt_boxes[box_ind]
        # correct padding
        x_min += pad_left
        y_min += pad_top
        # correct scale
        x_min, box_w = x_min * scale_x, box_w * scale_x
        y_min, box_h = y_min * scale_y, box_h * scale_y
        if box_w > self.width - x_min:
          print("W out")
          box_w = self.width - x_min
        if box_h > self.height - y_min:
          print("H out")
          box_h = self.height - y_min

        gt_boxes[box_ind] = (x_min, y_min, box_w, box_h, ll)

      if self.augment:
        augmented = self.aug(image=img, bboxes=gt_boxes)
        img, gt_boxes = augmented['image'], augmented['bboxes']

      curr_h, curr_w = img.shape[:2]
      mask, centers, classes = self.boxes_to_mask_centers_classes(
          gt_boxes, height=curr_h, width=curr_w,
          scale_x=1.0, scale_y=1.0, feature_scale=self.feature_scale)

      # if flip_lr:
      #   img = img[:, ::-1]
      #   mask = mask[:, ::-1]
      #   center_mask = centers[:, 0] >= 0
      #   centers[center_mask, 0] = (mask.shape[1] - 1) - centers[center_mask, 0]
      #   classes[center_mask] += 4212

      if to_tensor:
        img = img.transpose((2, 0, 1))
        img = th.from_numpy(img.copy())

        mask = np.expand_dims(mask, 0)
        mask = th.from_numpy(mask.copy())

        centers = th.from_numpy(centers)
        classes = th.from_numpy(classes)

      return img, fn, mask, centers, classes

    assert not self.augment, "Don't"
    if to_tensor:
      if img.ndim == 2:
        img = np.expand_dims(img, 0)
      elif img.ndim == 3:
        img = img.transpose((2, 0, 1))
      else:
        assert False, img.ndim
      img = th.from_numpy(img.copy())

    return img, fn

  def __len__(self):
    return len(self.image_fns)


class MultiScaleInferenceKuzushijiDataset(data.Dataset):
  def __init__(self, image_fns, height, width, scales):
    self.image_fns = image_fns.copy()
    self.height = height
    self.width = width
    self.scales = scales

  def __getitem__(self, index, to_tensor=True):
    fn = self.image_fns[index]
    img = cv2.cvtColor(cv2.imread(fn, 1), cv2.COLOR_BGR2RGB)

    img, pad_top, pad_left = KuzushijiDataset.pad_to_ratio(img, ratio=1.5)
    h, w = img.shape[:2]
    # print(h / w, pad_left, pad_top)
    assert img.ndim == 3
    scaled_imgs = []
    for scale in self.scales:
      h_scale = int(scale * self.height)
      w_scale = int(scale * self.width)
      simg = cv2.resize(img, (w_scale, h_scale))

      if to_tensor:
        assert simg.ndim == 3, simg.ndim
        simg = simg.transpose((2, 0, 1))
        simg = th.from_numpy(simg.copy())

      scaled_imgs.append(simg)

    return scaled_imgs + [fn]

  def __len__(self):
    return len(self.image_fns)


def load_gt(fn, label_key='labels', has_height_width=True):
  labels = pd.read_csv(fn, dtype={'image_id': str, label_key: str})
  labels = labels.fillna('')
  labels_ = defaultdict(list)
  all_labels = set()
  for img_id, label_str in zip(labels['image_id'], labels[label_key]):
    img_labels = label_str.split(' ')
    if has_height_width:
      l, x, y, h, w = img_labels[::5], img_labels[1::5], img_labels[2::5], \
          img_labels[3::5], img_labels[4::5]
      for ll, xx, yy, hh, ww in zip(l, x, y, h, w):
        labels_[img_id].append((int(xx), int(yy), int(hh), int(ww), ll))
        all_labels.add(ll)
    else:
      l, x, y = img_labels[::3], img_labels[1::3], img_labels[2::3]
      for ll, xx, yy in zip(l, x, y):
        labels_[img_id].append((int(xx), int(yy), ll))
        all_labels.add(ll)

  label_to_int = {v: k for k, v in enumerate(sorted(list(all_labels)))}
  labels = dict(labels_)
  return labels, label_to_int


if __name__ == '__main__':
  from matplotlib import pyplot as plt
  np.random.seed(321)
  th.manual_seed(321)
  gt, label_to_int = load_gt('train.csv')
  train_image_fns = sorted(glob(os.path.join('train_images', '*.jpg')))
  test_image_fns = sorted(glob(os.path.join('test_images', '*.jpg')))

  # remove empty masks from training data
  non_empty_gt = {k: v for k, v in gt.items() if '-1' not in v[0]}
  train_image_fns = [fn for fn in train_image_fns if
      KuzushijiDataset.fn_to_id(fn) in non_empty_gt]
  # subset
  train_image_fns = train_image_fns[50:60]
  print("[Non-EMPTY] TRAIN: ", len(train_image_fns), os.path.basename(
      train_image_fns[0]))
  train_ds = KuzushijiDataset(train_image_fns, gt_boxes=gt,
      label_to_int=label_to_int, augment=True)
  train_ds.cache()

  for k in range(500):
    index = np.random.randint(len(train_ds))
    ret = train_ds.__getitem__(index)
    img, fn, mask, centers, classes = ret
    img, mask = img.squeeze(), mask.squeeze()
    img = img.permute(1, 2, 0).numpy()
    print(img.shape, mask.shape)
    mask = mask.numpy()
    # h, w = mask.shape[:2]
    # mask = cv2.resize(mask, (w * 4, h * 4), interpolation=cv2.INTER_NEAREST)
    plt.imshow(mask)
    nm = 0
    classes_mask = np.zeros_like(mask, dtype=np.int32)
    for cind, (x, y) in enumerate(centers):
      if x == -1:
        break
      nm += 1
      classes_mask[y, x] = classes[cind]

    print(mask.shape, img.shape)
    mask = np.dstack([mask] * 3)
    print(np.unique(classes_mask[classes_mask > 0]))
    classes_mask = np.dstack([classes_mask] * 3)
    h, w = img.shape[:2]
    classes_mask = cv2.resize(classes_mask, (w, h),
        interpolation=cv2.INTER_NEAREST)
    mask = cv2.resize(mask, (w, h), interpolation=cv2.INTER_NEAREST)
    mask = 255 - (mask * 255).astype(np.uint8)
    mask = cv2.addWeighted(img, 0.7, mask, 0.3, 0.0)
    vis = np.hstack([img, mask])
    h, w = vis.shape[:2]
    # vis = cv2.resize(vis, (w // 2, h // 2))
    cv2.imshow("t", vis)
    # cv2.imshow("c", classes_mask.astype(np.uint8))
    q = cv2.waitKey()
    if q == ord('q'):
      break