python source code of book

#!/usr/bin/env python
#-*- coding:utf-8 -*-
#author: wu.zheng midday.me

import os
import json
import cv2
import random
import numpy as np
import time
from dataset import image_augmation
from dataset import data_util
import xml.etree.ElementTree as ET

IMAGE_WIDTH=1024
IMAGE_HEIGHT=1024
NUM_CLASS=3


def load_image_list(image_data_path):
  image_path_list = []
  with open(image_data_path) as f:
    for _, line in enumerate(f):
      line = line.strip("\n")
      image_path_list.append(line)
  return image_path_list

def get_int_value(item, name):
  return int(item.get(name))

def get_item_box(item):
  x = get_int_value(item, 'left')
  y = get_int_value(item, 'top')
  w = get_int_value(item, 'width')
  h = get_int_value(item, 'height')
  return x, y, w, h

def load_label_data(label_file_path):
  tree = ET.parse(label_file_path)  
  root = tree.getroot()
  label_data = {}
  image_list= root.findall('image')
  text_list= root.findall('text')
  label_data['image_path']=root.get("image_path")
  page_height = int(root.get('height'))
  page_width = int(root.get('width'))
  label_data['width'] =  page_width
  label_data['height'] = page_height
  label_data['images'] = []
  label_data['texts'] = []
  for image in image_list:
    box = get_item_box(image)
    label_data['images'].append(box)
  for text in text_list:
    box = get_item_box(text)
    label_data['texts'].append(box)
  return label_data

def get_shape_by_type(shapes, label):
  shape_list = []
  for shape in shapes:
    if shape['label'] == label:
      shape_list.append(shape)
  return shape_list

def fill_image(label_image, boxs, label_value, w_factor, h_factor):
  for box in boxs:
    x, y, w, h = box
    min_x, min_y = x, y
    max_x, max_y = x + w , y + h
    area = (max_x - min_x) * (max_y - min_y)
    point_box = [(min_x, min_y), (max_x, min_y), (max_x, max_y), (min_x, max_y)]
    point_box = np.array(point_box)
    point_box = point_box.reshape((4,2))
    point_box[:,0] = point_box[:,0] * w_factor
    point_box[:,1] = point_box[:,1] * h_factor
    label_image = cv2.fillPoly(label_image, point_box.astype(np.int32)[np.newaxis, :,: ], label_value)
  return label_image


def data_generator(list_path, image_dir, batch_size, mode='train'):
  label_file_list = load_image_list(list_path)
  print("example size:", len(label_file_list))
  image_batch = []
  label_batch = []
  mask_batch = []
  xml_path_batch = []
  scal_list=[0.3, 0.5, 1.0, 2.0, 3.0]
  while True:
    random.shuffle(label_file_list)
    for xml_path in label_file_list:
      xml_path = os.path.join(image_dir, xml_path)
      label_data=load_label_data(xml_path)
      image_path = os.path.join(image_dir, label_data['image_path'])
      image = cv2.imread(image_path)
      image_labels = np.array(label_data['images']) 
      text_labels = np.array(label_data['texts']) 
      # todo 图像增强
      #aug_image = image_augmation.image_aug(image.copy())
      aug_image = image.copy()
      #rd_scale = np.random.choice(scal_list, 1)
      ##rd_scale = 1.0
      #r_image = cv2.resize(aug_image, dsize=None, fx=rd_scale, fy=rd_scale)
      #image_labels = image_labels * rd_scale
      #text_labels = text_labels * rd_scale
      r_image = aug_image

      if image is None:
        continue
      #h, w = image.shape[:2]
      h = label_data['height']
      w = label_data['width']

      image_h, image_w = r_image.shape[:2]
      h_ratio = image_h / h
      w_ratio = image_w / w
      if image_h > image_w:
        factor =  IMAGE_HEIGHT / image_h
        new_h = IMAGE_HEIGHT
        new_w = int(image_w * factor)
      else:
        factor =  IMAGE_WIDTH / image_w
        new_w = IMAGE_WIDTH
        new_h = int(image_h * factor)
      # todo resize

      w_factor =  new_w / w
      h_factor =  new_h / h
      r_image = cv2.resize(r_image, (new_w, new_h))
      label_image = np.zeros((new_h, new_w))
      mask = np.ones((new_h, new_w))

      label_image = fill_image(label_image, image_labels , 1, w_factor, h_factor)
      label_image = fill_image(label_image, text_labels, 2, w_factor, h_factor)

      train_image = np.zeros((IMAGE_HEIGHT, IMAGE_WIDTH, 3))
      train_image[0:new_h, 0:new_w] = r_image
      train_label = np.zeros((IMAGE_HEIGHT, IMAGE_WIDTH))
      train_label[0:new_h, 0:new_w] = label_image

      mask = np.ones((new_h, new_w))
      train_mask = np.zeros((IMAGE_HEIGHT, IMAGE_WIDTH))
      train_mask[0:new_h, 0:new_w] = mask


      label_batch.append(train_label)
      train_image = train_image / 255.0
      image_batch.append(train_image)
      mask_batch.append(train_mask)
      xml_path_batch.append(xml_path)
      if len(image_batch) == batch_size:
        yield image_batch, label_batch, mask_batch, xml_path_batch
        image_batch = []
        label_batch = []
        mask_batch = []
        xml_path_batch = []
    if mode!='train':
      break


def get_batch(list_dir, image_dir, batch_size, mode='train', workers=1, max_queue_size=32):
  enqueuer = data_util.GeneratorEnqueuer(data_generator(list_dir, image_dir, batch_size, mode))
  enqueuer.start(max_queue_size=max_queue_size, workers=workers)
  enqueuer.is_running()
  generator_output = None
  while True:
    while enqueuer.is_running():
      if not enqueuer.queue.empty():
        generator_output = enqueuer.queue.get()
        break
      else:
        time.sleep(0.01)
    yield generator_output
    generator_output = None


import random

def get_random_color():
  color = (int(random.random()*255), int(random.random()*255), int(random.random()*255))
  return color

def mask_to_bbox(mask, im, num_class, out_path=None, out_file_name=None):
  bbox_list = []
  mask = mask.astype(np.uint8)
  for i in range(1, num_class, 1):
    c_bbox_list = []
    c_mask = np.zeros_like(mask)
    c_mask[np.where(mask==i)] = 255
    contours, hierarchy = cv2.findContours(c_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    color = get_random_color()
    for cnt in contours:
      area = cv2.contourArea(cnt)
      if area < 50:
        continue
      epsilon = 0.005 * cv2.arcLength(cnt,True)
      approx = cv2.approxPolyDP(cnt,epsilon,True)
      (x, y, w, h) = cv2.boundingRect(approx)
      c_bbox_list.append([x,  y, x+w, y+h])
      if out_path is not None:
        cv2.putText(im, str(i), (x, y), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1, cv2.LINE_AA)
        im=cv2.rectangle(im, pt1=(x, y), pt2=(x+w, y+h), color=color, thickness=1)
    bbox_list.append(c_bbox_list)
  if out_path is not None:
    outf = os.path.join(out_path, out_file_name)
    print(outf)
    cv2.imwrite(outf, im)
  return bbox_list