Python numpy.pad() Examples

The following are 30 code examples for showing how to use numpy.pad(). These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example.

You may check out the related API usage on the sidebar.

You may also want to check out all available functions/classes of the module numpy , or try the search function .

Example 1
Project: mmdetection   Author: open-mmlab   File: test_masks.py    License: Apache License 2.0 8 votes vote down vote up
def test_bitmap_mask_pad():
    # pad with empty bitmap masks
    raw_masks = dummy_raw_bitmap_masks((0, 28, 28))
    bitmap_masks = BitmapMasks(raw_masks, 28, 28)
    padded_masks = bitmap_masks.pad((56, 56))
    assert len(padded_masks) == 0
    assert padded_masks.height == 56
    assert padded_masks.width == 56

    # pad with bitmap masks contain 3 instances
    raw_masks = dummy_raw_bitmap_masks((3, 28, 28))
    bitmap_masks = BitmapMasks(raw_masks, 28, 28)
    padded_masks = bitmap_masks.pad((56, 56))
    assert len(padded_masks) == 3
    assert padded_masks.height == 56
    assert padded_masks.width == 56
    assert (padded_masks.masks[:, 28:, 28:] == 0).all() 
Example 2
Project: DDPAE-video-prediction   Author: jthsieh   File: video_transforms.py    License: MIT License 6 votes vote down vote up
def __call__(self, video):
    """
    Args:
        video (np.ndarray): Video to be cropped.
    Returns:
        np.ndarray: Cropped video.
    """
    if self.padding > 0:
      pad = Pad(self.padding, 0)
      video = pad(video)

    w, h = video.shape[-2], video.shape[-3]
    th, tw = self.size
    if w == tw and h == th:
      return video

    x1 = random.randint(0, w-tw)
    y1 = random.randint(0, h-th)
    return video[..., y1:y1+th, x1:x1+tw, :] 
Example 3
Project: gated-graph-transformer-network   Author: hexahedria   File: convert_story.py    License: MIT License 6 votes vote down vote up
def convert(story):
    # import pdb; pdb.set_trace()
    sentence_arr, graphs, query_arr, answer_arr = story
    node_id_w = graphs[2].shape[2]
    edge_type_w = graphs[3].shape[3]

    all_node_strengths = [np.zeros([1])]
    all_node_ids = [np.zeros([1,node_id_w])]
    for num_new_nodes, new_node_strengths, new_node_ids, _ in zip(*graphs):
        last_strengths = all_node_strengths[-1]
        last_ids = all_node_ids[-1]

        cur_strengths = np.concatenate([last_strengths, new_node_strengths], 0)
        cur_ids = np.concatenate([last_ids, new_node_ids], 0)

        all_node_strengths.append(cur_strengths)
        all_node_ids.append(cur_ids)

    all_edges = graphs[3]
    full_n_nodes = all_edges.shape[1]
    all_node_strengths = np.stack([np.pad(x, ((0, full_n_nodes-x.shape[0])), 'constant') for x in all_node_strengths[1:]])
    all_node_ids = np.stack([np.pad(x, ((0, full_n_nodes-x.shape[0]), (0, 0)), 'constant') for x in all_node_ids[1:]])
    all_node_states = np.zeros([len(all_node_strengths), full_n_nodes,0])

    return tuple(x[np.newaxis,...] for x in (all_node_strengths, all_node_ids, all_node_states, all_edges)) 
Example 4
Project: disentangling_conditional_gans   Author: zalandoresearch   File: dataset_tool.py    License: MIT License 6 votes vote down vote up
def create_mnist(tfrecord_dir, mnist_dir):
    print('Loading MNIST from "%s"' % mnist_dir)
    import gzip
    with gzip.open(os.path.join(mnist_dir, 'train-images-idx3-ubyte.gz'), 'rb') as file:
        images = np.frombuffer(file.read(), np.uint8, offset=16)
    with gzip.open(os.path.join(mnist_dir, 'train-labels-idx1-ubyte.gz'), 'rb') as file:
        labels = np.frombuffer(file.read(), np.uint8, offset=8)
    images = images.reshape(-1, 1, 28, 28)
    images = np.pad(images, [(0,0), (0,0), (2,2), (2,2)], 'constant', constant_values=0)
    assert images.shape == (60000, 1, 32, 32) and images.dtype == np.uint8
    assert labels.shape == (60000,) and labels.dtype == np.uint8
    assert np.min(images) == 0 and np.max(images) == 255
    assert np.min(labels) == 0 and np.max(labels) == 9
    onehot = np.zeros((labels.size, np.max(labels) + 1), dtype=np.float32)
    onehot[np.arange(labels.size), labels] = 1.0
    
    with TFRecordExporter(tfrecord_dir, images.shape[0]) as tfr:
        order = tfr.choose_shuffled_order()
        for idx in range(order.size):
            tfr.add_image(images[order[idx]])
        tfr.add_labels(onehot[order])

#---------------------------------------------------------------------------- 
Example 5
Project: disentangling_conditional_gans   Author: zalandoresearch   File: dataset_tool.py    License: MIT License 6 votes vote down vote up
def create_mnistrgb(tfrecord_dir, mnist_dir, num_images=1000000, random_seed=123):
    print('Loading MNIST from "%s"' % mnist_dir)
    import gzip
    with gzip.open(os.path.join(mnist_dir, 'train-images-idx3-ubyte.gz'), 'rb') as file:
        images = np.frombuffer(file.read(), np.uint8, offset=16)
    images = images.reshape(-1, 28, 28)
    images = np.pad(images, [(0,0), (2,2), (2,2)], 'constant', constant_values=0)
    assert images.shape == (60000, 32, 32) and images.dtype == np.uint8
    assert np.min(images) == 0 and np.max(images) == 255
    
    with TFRecordExporter(tfrecord_dir, num_images) as tfr:
        rnd = np.random.RandomState(random_seed)
        for idx in range(num_images):
            tfr.add_image(images[rnd.randint(images.shape[0], size=3)])

#---------------------------------------------------------------------------- 
Example 6
Project: dc_tts   Author: Kyubyong   File: utils.py    License: Apache License 2.0 6 votes vote down vote up
def load_spectrograms(fpath):
    '''Read the wave file in `fpath`
    and extracts spectrograms'''

    fname = os.path.basename(fpath)
    mel, mag = get_spectrograms(fpath)
    t = mel.shape[0]

    # Marginal padding for reduction shape sync.
    num_paddings = hp.r - (t % hp.r) if t % hp.r != 0 else 0
    mel = np.pad(mel, [[0, num_paddings], [0, 0]], mode="constant")
    mag = np.pad(mag, [[0, num_paddings], [0, 0]], mode="constant")

    # Reduction
    mel = mel[::hp.r, :]
    return fname, mel, mag 
Example 7
Project: mmdetection   Author: open-mmlab   File: test_masks.py    License: Apache License 2.0 6 votes vote down vote up
def test_polygon_mask_pad():
    # pad with empty polygon masks
    raw_masks = dummy_raw_polygon_masks((0, 28, 28))
    polygon_masks = PolygonMasks(raw_masks, 28, 28)
    padded_masks = polygon_masks.pad((56, 56))
    assert len(padded_masks) == 0
    assert padded_masks.height == 56
    assert padded_masks.width == 56
    assert padded_masks.to_ndarray().shape == (0, 56, 56)

    # pad with polygon masks contain 3 instances
    raw_masks = dummy_raw_polygon_masks((3, 28, 28))
    polygon_masks = PolygonMasks(raw_masks, 28, 28)
    padded_masks = polygon_masks.pad((56, 56))
    assert len(padded_masks) == 3
    assert padded_masks.height == 56
    assert padded_masks.width == 56
    assert padded_masks.to_ndarray().shape == (3, 56, 56)
    assert (padded_masks.to_ndarray()[:, 28:, 28:] == 0).all() 
Example 8
Project: kuzushiji-recognition   Author: see--   File: data.py    License: MIT License 6 votes vote down vote up
def mask_to_rle(img, mask_value=255, transpose=True):
    img = np.int32(img)
    if transpose:
      img = img.T
    img = img.flatten()
    img[img == mask_value] = 1
    pimg = np.pad(img, 1, mode='constant')
    diff = np.diff(pimg)
    starts = np.where(diff == 1)[0]
    ends = np.where(diff == -1)[0]
    rle = []
    previous_end = 0
    for start, end in zip(starts, ends):
      relative_start = start - previous_end
      length = end - start
      previous_end = end
      rle.append(str(relative_start))
      rle.append(str(length))
    if len(rle) == 0:
      return "-1"
    return " ".join(rle) 
Example 9
Project: kuzushiji-recognition   Author: see--   File: data.py    License: MIT License 6 votes vote down vote up
def get_paddings(h, w, ratio):
    current_ratio = h / w
    # pad height
    if current_ratio < ratio:
      pad_h = int(w * ratio - h)
      pad_top = pad_h // 2
      pad_bottom = pad_h - pad_top
      pad_left, pad_right = 0, 0
    # pad width
    else:
      pad_w = int(h / ratio - w)
      pad_left = pad_w // 2
      pad_right = pad_w - pad_left
      pad_top, pad_bottom = 0, 0

    return pad_top, pad_bottom, pad_left, pad_right 
Example 10
Project: kss   Author: Kyubyong   File: utils.py    License: Apache License 2.0 6 votes vote down vote up
def load_spectrograms(fpath):
    '''Read the wave file in `fpath`
    and extracts spectrograms'''

    fname = os.path.basename(fpath)
    mel, mag = get_spectrograms(fpath)
    t = mel.shape[0]

    # Marginal padding for reduction shape sync.
    num_paddings = hp.r - (t % hp.r) if t % hp.r != 0 else 0
    mel = np.pad(mel, [[0, num_paddings], [0, 0]], mode="constant")
    mag = np.pad(mag, [[0, num_paddings], [0, 0]], mode="constant")

    # Reduction
    mel = mel[::hp.r, :]
    return fname, mel, mag

#This is adapted by
# https://github.com/keithito/tacotron/blob/master/util/audio.py#L55-62 
Example 11
Project: RingNet   Author: soubhiksanyal   File: image.py    License: MIT License 6 votes vote down vote up
def scale_and_crop(image, scale, center, img_size):
    image_scaled, scale_factors = resize_img(image, scale)
    # Swap so it's [x, y]
    scale_factors = [scale_factors[1], scale_factors[0]]
    center_scaled = np.round(center * scale_factors).astype(np.int)

    margin = int(img_size / 2)
    image_pad = np.pad(
        image_scaled, ((margin, ), (margin, ), (0, )), mode='edge')
    center_pad = center_scaled + margin
    # figure out starting point
    start_pt = center_pad - margin
    end_pt = center_pad + margin
    # crop:
    crop = image_pad[start_pt[1]:end_pt[1], start_pt[0]:end_pt[0], :]
    proc_param = {
        'scale': scale,
        'start_pt': start_pt,
        'end_pt': end_pt,
        'img_size': img_size
    }

    return crop, proc_param 
Example 12
Project: imgcomp-cvpr   Author: fab-jul   File: images_iterator.py    License: GNU General Public License v3.0 6 votes vote down vote up
def add_padding(self, im):
            # TODO: use undo pad when saving images to disk
            w, h, chan = im.shape
            if chan == 4:
                print('*** Ditching alpha channel...')
                return self.add_padding(im[:, :, :3])
            if w % self.pad == 0 and h % self.pad == 0:
                return im, lambda x: x

            wp = (self.pad - w % self.pad) % self.pad
            hp = (self.pad - h % self.pad) % self.pad
            wp_left = wp // 2
            wp_right = wp - wp_left
            hp_left = hp // 2
            hp_right = hp - hp_left
            paddings = [[wp_left, wp_right], [hp_left, hp_right], [0, 0]]
            im = np.pad(im, paddings, mode='constant')

            def _undo_pad(img_data_):
                return img_data_[wp_left:(-wp_right or None), hp_left:(-hp_right or None), :]
            return im, _undo_pad 
Example 13
Project: imgcomp-cvpr   Author: fab-jul   File: probclass.py    License: GNU General Public License v3.0 6 votes vote down vote up
def pad_for_probclass3d(x, context_size, pad_value=0, learn_pad_var=False):
    """
    :param x: NCHW tensorflow Tensor or numpy array
    """
    input_is_tf = not isinstance(x, np.ndarray)
    if not input_is_tf and x.ndim == 3:  # for bit_counter
        return remove_batch_dim(pad_for_probclass3d(
                add_batch_dim(x), context_size, pad_value, learn_pad_var))

    with tf.name_scope('pad_cs' + str(context_size)):
        pad = context_size // 2
        assert pad >= 1
        if learn_pad_var:
            if not isinstance(pad_value, tf.Variable):
                print('Warn: Expected tf.Variable for padding, got {}'.format(pad_value))
            return pc_pad_grad(x, pad, pad_value)

        pads = [[0, 0],  # don't pad batch dimension
                [pad, 0],  # don't pad depth_future, it's not seen by any filter
                [pad, pad],
                [pad, pad]]
        assert len(pads) == _get_ndims(x), '{} != {}'.format(len(pads), x.shape)

        pad_fn = tf.pad if input_is_tf else get_np_pad_fn()
        return pad_fn(x, pads, constant_values=pad_value) 
Example 14
Project: DeepLung   Author: uci-cbcl   File: utils.py    License: GNU General Public License v3.0 6 votes vote down vote up
def split4(data,  max_stride, margin):
    splits = []
    data = torch.Tensor.numpy(data)
    _,c, z, h, w = data.shape

    w_width = np.ceil(float(w / 2 + margin)/max_stride).astype('int')*max_stride
    h_width = np.ceil(float(h / 2 + margin)/max_stride).astype('int')*max_stride
    pad = int(np.ceil(float(z)/max_stride)*max_stride)-z
    leftpad = pad/2
    pad = [[0,0],[0,0],[leftpad,pad-leftpad],[0,0],[0,0]]
    data = np.pad(data,pad,'constant',constant_values=-1)
    data = torch.from_numpy(data)
    splits.append(data[:, :, :, :h_width, :w_width])
    splits.append(data[:, :, :, :h_width, -w_width:])
    splits.append(data[:, :, :, -h_width:, :w_width])
    splits.append(data[:, :, :, -h_width:, -w_width:])
    
    return torch.cat(splits, 0) 
Example 15
Project: argus-freesound   Author: lRomul   File: audio.py    License: MIT License 6 votes vote down vote up
def read_audio(file_path):
    min_samples = int(config.min_seconds * config.sampling_rate)
    try:
        y, sr = librosa.load(file_path, sr=config.sampling_rate)
        trim_y, trim_idx = librosa.effects.trim(y)  # trim, top_db=default(60)

        if len(trim_y) < min_samples:
            center = (trim_idx[1] - trim_idx[0]) // 2
            left_idx = max(0, center - min_samples // 2)
            right_idx = min(len(y), center + min_samples // 2)
            trim_y = y[left_idx:right_idx]

            if len(trim_y) < min_samples:
                padding = min_samples - len(trim_y)
                offset = padding // 2
                trim_y = np.pad(trim_y, (offset, padding - offset), 'constant')
        return trim_y
    except BaseException as e:
        print(f"Exception while reading file {e}")
        return np.zeros(min_samples, dtype=np.float32) 
Example 16
Project: ICDAR-2019-SROIE   Author: zzzDavid   File: my_utils.py    License: MIT License 6 votes vote down vote up
def random_string(n):
    if n == 0:
        return ""

    x = random.random()
    if x > 0.5:
        pad = " " * n
    elif x > 0.3:
        pad = "".join(random.choices(digits + " \t\n", k=n))
    elif x > 0.2:
        pad = "".join(random.choices(ascii_uppercase + " \t\n", k=n))
    elif x > 0.1:
        pad = "".join(random.choices(ascii_uppercase + digits + " \t\n", k=n))
    else:
        pad = "".join(
            random.choices(ascii_uppercase + digits + punctuation + " \t\n", k=n)
        )

    return pad 
Example 17
Project: ICDAR-2019-SROIE   Author: zzzDavid   File: my_data.py    License: MIT License 6 votes vote down vote up
def get_val_data(self, batch_size=8, device="cpu"):
        keys = random.sample(self.val_dict.keys(), batch_size)

        texts = [self.val_dict[k][0] for k in keys]
        labels = [self.val_dict[k][1] for k in keys]

        maxlen = max(len(s) for s in texts)
        texts = [s.ljust(maxlen, " ") for s in texts]
        labels = [
            numpy.pad(a, (0, maxlen - len(a)), mode="constant", constant_values=0)
            for a in labels
        ]

        text_tensor = torch.zeros(maxlen, batch_size, dtype=torch.long)
        for i, text in enumerate(texts):
            text_tensor[:, i] = torch.LongTensor([VOCAB.find(c) for c in text])

        truth_tensor = torch.zeros(maxlen, batch_size, dtype=torch.long)
        for i, label in enumerate(labels):
            truth_tensor[:, i] = torch.LongTensor(label)

        return keys, text_tensor.to(self.device), truth_tensor.to(self.device) 
Example 18
Project: scarlet   Author: pmelchior   File: test_interpolation.py    License: MIT License 6 votes vote down vote up
def test_fft_convolve(self):
        shape = (11, 11)
        img = np.zeros(shape, dtype=np.float32)
        img[3, 3] = 2
        img[2, 3] = .5
        img[3, 4] = .75
        img[3, 2] = .1
        kernel = np.arange(25, dtype=np.float32).reshape(5, 5)
        pad1, pad2 = scarlet.interpolation.get_common_padding(img, kernel, padding=3)
        _img = np.pad(img, pad1, 'constant')
        _kernel = np.pad(kernel, pad2, 'constant')
        result = scarlet.interpolation.fft_convolve(_img, _kernel)
        (bottom, top), (left, right) = pad1
        result = result[bottom:-top, left:-right]
        truth = np.zeros(shape)
        truth[1:6, 1:6] += 2 * kernel
        truth[:5, 1:6] += .5 * kernel
        truth[1:6, 2:7] += .75 * kernel
        truth[1:6, :5] += .1 * kernel
        assert_almost_equal(result, truth, decimal=5) 
Example 19
Project: CartoonGAN-Tensorflow   Author: taki0112   File: edge_smooth.py    License: MIT License 6 votes vote down vote up
def make_edge_smooth(dataset_name, img_size) :
    check_folder('./dataset/{}/{}'.format(dataset_name, 'trainB_smooth'))

    file_list = glob('./dataset/{}/{}/*.*'.format(dataset_name, 'trainB'))
    save_dir = './dataset/{}/trainB_smooth'.format(dataset_name)

    kernel_size = 5
    kernel = np.ones((kernel_size, kernel_size), np.uint8)
    gauss = cv2.getGaussianKernel(kernel_size, 0)
    gauss = gauss * gauss.transpose(1, 0)

    for f in tqdm(file_list) :
        file_name = os.path.basename(f)

        bgr_img = cv2.imread(f)
        gray_img = cv2.imread(f, 0)

        bgr_img = cv2.resize(bgr_img, (img_size, img_size))
        pad_img = np.pad(bgr_img, ((2, 2), (2, 2), (0, 0)), mode='reflect')
        gray_img = cv2.resize(gray_img, (img_size, img_size))

        edges = cv2.Canny(gray_img, 100, 200)
        dilation = cv2.dilate(edges, kernel)

        gauss_img = np.copy(bgr_img)
        idx = np.where(dilation != 0)
        for i in range(np.sum(dilation != 0)):
            gauss_img[idx[0][i], idx[1][i], 0] = np.sum(
                np.multiply(pad_img[idx[0][i]:idx[0][i] + kernel_size, idx[1][i]:idx[1][i] + kernel_size, 0], gauss))
            gauss_img[idx[0][i], idx[1][i], 1] = np.sum(
                np.multiply(pad_img[idx[0][i]:idx[0][i] + kernel_size, idx[1][i]:idx[1][i] + kernel_size, 1], gauss))
            gauss_img[idx[0][i], idx[1][i], 2] = np.sum(
                np.multiply(pad_img[idx[0][i]:idx[0][i] + kernel_size, idx[1][i]:idx[1][i] + kernel_size, 2], gauss))

        cv2.imwrite(os.path.join(save_dir, file_name), gauss_img) 
Example 20
Project: eye-in-the-sky   Author: manideep2510   File: main_unet.py    License: Apache License 2.0 6 votes vote down vote up
def padding(img, w, h, c, crop_size, stride, n_h, n_w):
    
    w_extra = w - ((n_w-1)*stride)
    w_toadd = crop_size - w_extra
    
    h_extra = h - ((n_h-1)*stride)
    h_toadd = crop_size - h_extra
    
    img_pad = np.zeros(((h+h_toadd), (w+w_toadd), c))
    #img_pad[:h, :w,:] = img
    #img_pad = img_pad+img
    img_pad = np.pad(img, [(0, h_toadd), (0, w_toadd), (0,0)], mode='constant')
    
    return img_pad
    


# Adding pixels to make the image with shape in multiples of stride 
Example 21
Project: eye-in-the-sky   Author: manideep2510   File: test_unet.py    License: Apache License 2.0 6 votes vote down vote up
def padding(img, w, h, c, crop_size, stride, n_h, n_w):
    
    w_extra = w - ((n_w-1)*stride)
    w_toadd = crop_size - w_extra
    
    h_extra = h - ((n_h-1)*stride)
    h_toadd = crop_size - h_extra
    
    img_pad = np.zeros(((h+h_toadd), (w+w_toadd), c))
    #img_pad[:h, :w,:] = img
    #img_pad = img_pad+img
    img_pad = np.pad(img, [(0, h_toadd), (0, w_toadd), (0,0)], mode='constant')
    
    return img_pad
    
    
# Adding pixels to make the image with shape in multiples of stride 
Example 22
Project: eye-in-the-sky   Author: manideep2510   File: main_psp.py    License: Apache License 2.0 6 votes vote down vote up
def padding(img, w, h, c, crop_size, stride, n_h, n_w):
    
    w_extra = w - ((n_w-1)*stride)
    w_toadd = crop_size - w_extra
    
    h_extra = h - ((n_h-1)*stride)
    h_toadd = crop_size - h_extra
    
    img_pad = np.zeros(((h+h_toadd), (w+w_toadd), c))
    #img_pad[:h, :w,:] = img
    #img_pad = img_pad+img
    img_pad = np.pad(img, [(0, h_toadd), (0, w_toadd), (0,0)], mode='constant')
    
    return img_pad
    


# Adding pixels to make the image with shape in multiples of stride 
Example 23
Project: cat-bbs   Author: aleju   File: bbs.py    License: MIT License 5 votes vote down vote up
def shift_from_top(self, by):
        return Rectangle(x1=self.x1, x2=self.x2, y1=self.y1+by, y2=self.y2+by)
        """
        if by != 0:
            self.y1 += by
            self.y2 += by
        """

        """
        pad_top = abs(by1) if by1 < 0 else 0
        pad_right = bx2 - img_width if bx2 >= img_width else 0
        pad_bottom = by2 - img_height if by2 >= img_height else 0
        pad_left = abs(bx1) if bx1 < 0 else 0

        bx1 = max(bx1, 0)
        by1 = max(by1, 0)
        bx2 = max(bx2, img_width)
        by2 = max(by2, img_height)

        img_body = img[by1:by2, bx1:bx2, ...]
        if any([val > 0 for val in [pad_top, pad_right, pad_bottom, pad_left]]):
            img_body = np.pad(img_body, ((pad_top, pad_bottom), (pad_left, pad_right), (0, 0)), mode="")
        img_body = misc.imresize(img_body, (height, width))

        return img_body
        """ 
Example 24
Project: cat-bbs   Author: aleju   File: bbs.py    License: MIT License 5 votes vote down vote up
def extract_from_image(self, img):
        pad_top = 0
        pad_right = 0
        pad_bottom = 0
        pad_left = 0

        height, width = img.shape[0], img.shape[1]
        x1, x2, y1, y2 = self.x1, self.x2, self.y1, self.y2

        if x1 < 0:
            pad_left = abs(x1)
            x2 = x2 + abs(x1)
            x1 = 0
        if y1 < 0:
            pad_top = abs(y1)
            y2 = y2 + abs(y1)
            y1 = 0
        if x2 >= width:
            pad_right = x2 - (width - 1)
        if y2 >= height:
            pad_bottom = y2 - (height - 1)

        if any([val > 0 for val in [pad_top, pad_right, pad_bottom, pad_left]]):
            if len(img.shape) == 2:
                img = np.pad(img, ((pad_top, pad_bottom), (pad_left, pad_right)), mode="constant")
            else:
                img = np.pad(img, ((pad_top, pad_bottom), (pad_left, pad_right), (0, 0)), mode="constant")

        return img[y1:y2, x1:x2]

    # val = int, float, tuple of int (top, right, btm, left), tuple of float (top, right, btm, left) 
Example 25
Project: cat-bbs   Author: aleju   File: predict_video.py    License: MIT License 5 votes vote down vote up
def find_bbs(img, model, conf_threshold, input_size):
    """Find bounding boxes in an image."""
    # pad image so that its square
    img_pad, (pad_top, pad_right, pad_bottom, pad_left) = to_aspect_ratio_add(img, 1.0, return_paddings=True)

    # resize padded image to desired input size
    # "linear" interpolation seems to be enough here for 400x400 or larger images
    # change to "area" or "cubic" for marginally better quality
    img_rs = ia.imresize_single_image(img_pad, (input_size, input_size), interpolation="linear")

    # convert to torch-ready input variable
    inputs_np = (np.array([img_rs])/255.0).astype(np.float32).transpose(0, 3, 1, 2)
    inputs = torch.from_numpy(inputs_np)
    inputs = Variable(inputs, volatile=True)
    if GPU >= 0:
        inputs = inputs.cuda(GPU)

    # apply model and measure the model's time
    time_start = time.time()
    outputs_pred = model(inputs)
    time_req = time.time() - time_start

    # process the model's output (i.e. convert heatmaps to BBs)
    result = ModelResult(
        outputs_pred,
        inputs_np,
        img,
        (pad_top, pad_right, pad_bottom, pad_left)
    )
    bbs = result.get_bbs()

    return bbs, time_req 
Example 26
Project: cat-bbs   Author: aleju   File: train.py    License: MIT License 5 votes vote down vote up
def generate_video_image(batch_idx, examples, model):
    """Generate frames for a video of the training progress.
    Each frame contains N examples shown in a grid. Each example shows
    the input image and the main heatmap predicted by the model."""
    start_time = time.time()
    #print("A", time.time() - start_time)
    model.eval()

    # fw through network
    inputs, outputs_gt = examples_to_batch(examples, iaa.Noop())
    inputs_torch = torch.from_numpy(inputs)
    inputs_torch = Variable(inputs_torch, volatile=True)
    if GPU >= 0:
        inputs_torch = inputs_torch.cuda(GPU)
    outputs_pred_torch = model(inputs_torch)
    #print("B", time.time() - start_time)

    outputs_pred = outputs_pred_torch.cpu().data.numpy()
    inputs = (inputs * 255).astype(np.uint8).transpose(0, 2, 3, 1)
    #print("C", time.time() - start_time)
    heatmaps = []
    for i in range(inputs.shape[0]):
        hm_drawn = draw_heatmap(inputs[i], np.squeeze(outputs_pred[i][0]), alpha=0.5)
        heatmaps.append(hm_drawn)
    #print("D", time.time() - start_time)
    grid = ia.draw_grid(heatmaps, cols=11, rows=6).astype(np.uint8)
    #grid_rs = misc.imresize(grid, (720-32, 1280-32))
    # pad by 42 for the text and to get the image to 720p aspect ratio
    grid_pad = np.pad(grid, ((0, 42), (0, 0), (0, 0)), mode="constant")
    grid_pad_text = ia.draw_text(
        grid_pad,
        x=grid_pad.shape[1]-220,
        y=grid_pad.shape[0]-35,
        text="Batch %05d" % (batch_idx,),
        color=[255, 255, 255]
    )
    #print("E", time.time() - start_time)
    return grid_pad_text 
Example 27
Project: DDPAE-video-prediction   Author: jthsieh   File: video_transforms.py    License: MIT License 5 votes vote down vote up
def __call__(self, video):
    """
    Args:
        video (np.ndarray): Video to be padded.
    Returns:
        np.ndarray: Padded video.
    """
    pad_width = ((0, 0), (self.padding, self.padding), (self.padding, self.padding), (0, 0))
    return np.pad(video, pad_width=pad_width, mode='constant', constant_values=self.fill) 
Example 28
Project: disentangling_conditional_gans   Author: zalandoresearch   File: misc.py    License: MIT License 5 votes vote down vote up
def setup_text_label(text, font='Calibri', fontsize=32, padding=6, glow_size=2.0, glow_coef=3.0, glow_exp=2.0, cache_size=100): # => (alpha, glow)
    # Lookup from cache.
    key = (text, font, fontsize, padding, glow_size, glow_coef, glow_exp)
    if key in _text_label_cache:
        value = _text_label_cache[key]
        del _text_label_cache[key] # LRU policy
        _text_label_cache[key] = value
        return value

    # Limit cache size.
    while len(_text_label_cache) >= cache_size:
        _text_label_cache.popitem(last=False)

    # Render text.
    import moviepy.editor # pip install moviepy
    alpha = moviepy.editor.TextClip(text, font=font, fontsize=fontsize).mask.make_frame(0)
    alpha = np.pad(alpha, padding, mode='constant', constant_values=0.0)
    glow = scipy.ndimage.gaussian_filter(alpha, glow_size)
    glow = 1.0 - np.maximum(1.0 - glow * glow_coef, 0.0) ** glow_exp

    # Add to cache.
    value = (alpha, glow)
    _text_label_cache[key] = value
    return value

#---------------------------------------------------------------------------- 
Example 29
Project: deep-learning-note   Author: wdxtub   File: util.py    License: MIT License 5 votes vote down vote up
def conv_output_size(input_size, filter_size, stride=1, pad=0):
    return (input_size + 2*pad - filter_size) / stride + 1 
Example 30
Project: deep-learning-note   Author: wdxtub   File: util.py    License: MIT License 5 votes vote down vote up
def im2col(input_data, filter_h, filter_w, stride=1, pad=0):
    """
    Parameters
    ----------
    input_data : 由(数据量, 通道, 高, 长)的4维数组构成的输入数据
    filter_h : 滤波器的高
    filter_w : 滤波器的长
    stride : 步幅
    pad : 填充
    Returns
    -------
    col : 2维数组
    """
    N, C, H, W = input_data.shape
    out_h = (H + 2*pad - filter_h)//stride + 1
    out_w = (W + 2*pad - filter_w)//stride + 1

    img = np.pad(input_data, [(0,0), (0,0), (pad, pad), (pad, pad)], 'constant')
    col = np.zeros((N, C, filter_h, filter_w, out_h, out_w))

    for y in range(filter_h):
        y_max = y + stride*out_h
        for x in range(filter_w):
            x_max = x + stride*out_w
            col[:, :, y, x, :, :] = img[:, :, y:y_max:stride, x:x_max:stride]

    col = col.transpose(0, 4, 5, 1, 2, 3).reshape(N*out_h*out_w, -1)
    return col