Python torchvision.transforms.functional.to_pil_image() Examples

The following are 30 code examples of torchvision.transforms.functional.to_pil_image(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module torchvision.transforms.functional , or try the search function .
Example #1
Source File: demo_segmentation.py    From Text_Segmentation_Image_Inpainting with GNU General Public License v3.0 6 votes vote down vote up
def process(eval_img, device='cpu'):
    (img, origin, unpadder), file_name = eval_img
    with torch.no_grad():
        out = model(img.to(device))

    prob = F.sigmoid(out)
    mask = prob > 0.5
    mask = torch.nn.MaxPool2d(kernel_size=(3, 3), padding=(1, 1), stride=1)(mask.float()).byte()
    mask = unpadder(mask)
    mask = mask.float().cpu()

    save_image(mask, file_name + ' _mask.jpg')
    origin_np = np.array(to_pil_image(origin[0]))
    mask_np = to_pil_image(mask[0]).convert("L")
    mask_np = np.array(mask_np, dtype='uint8')
    mask_np = draw_bounding_box(origin_np, mask_np, 500)
    mask_ = Image.fromarray(mask_np)
    mask_.save(file_name + "_contour.jpg")
    # ret, mask_np = cv2.threshold(mask_np, 127, 255, 0)
    # dst = cv2.inpaint(origin_np, mask_np, 1, cv2.INPAINT_NS)
    # out = Image.fromarray(dst)
    # out.save(file_name + ' _box.jpg') 
Example #2
Source File: train.py    From Holocron with MIT License 6 votes vote down vote up
def plot_samples(images, targets):
    # Unnormalize image
    nb_samples = 4
    _, axes = plt.subplots(1, nb_samples, figsize=(20, 5))
    for idx in range(nb_samples):
        img = images[idx]
        img *= torch.tensor([0.229, 0.224, 0.225]).view(-1, 1, 1)
        img += torch.tensor([0.485, 0.456, 0.406]).view(-1, 1, 1)
        img = F.to_pil_image(img)

        axes[idx].imshow(img)
        axes[idx].axis('off')
        for box, label in zip(targets[idx]['boxes'], targets[idx]['labels']):
            xmin = int(box[0] * images[idx].shape[-1])
            ymin = int(box[1] * images[idx].shape[-2])
            xmax = int(box[2] * images[idx].shape[-1])
            ymax = int(box[3] * images[idx].shape[-2])

            rect = Rectangle((xmin, ymin), xmax - xmin, ymax - ymin,
                             linewidth=2, edgecolor='lime', facecolor='none')
            axes[idx].add_patch(rect)
            axes[idx].text(xmin, ymin, classes[label.item()], color='lime', fontsize=12)

    plt.show() 
Example #3
Source File: train.py    From Holocron with MIT License 6 votes vote down vote up
def plot_samples(images, targets, ignore_index=None):
    # Unnormalize image
    nb_samples = 4
    _, axes = plt.subplots(2, nb_samples, figsize=(20, 5))
    for idx in range(nb_samples):
        img = images[idx]
        img *= torch.tensor([0.229, 0.224, 0.225]).view(-1, 1, 1)
        img += torch.tensor([0.485, 0.456, 0.406]).view(-1, 1, 1)
        img = F.to_pil_image(img)
        target = targets[idx]
        if isinstance(ignore_index, int):
            target[target == ignore_index] = 0

        axes[0][idx].imshow(img)
        axes[0][idx].axis('off')
        axes[1][idx].imshow(target)
        axes[1][idx].axis('off')
    plt.show() 
Example #4
Source File: augmentation.py    From nni with MIT License 6 votes vote down vote up
def test_tta():
    img_f = os.path.join(settings.TEST_IMG_DIR, '0c2637aa9.jpg')
    img = Image.open(img_f)
    img = img.convert('RGB')

    tta_index = 7
    trans1 = TTATransform(tta_index)
    img = trans1(img)
    #img.show()

    img_np = np.array(img)
    img_np = np.expand_dims(img_np, 0)
    print(img_np.shape)
    img_np = tta_back_mask_np(img_np, tta_index)
    img_np = np.reshape(img_np, (768, 768, 3))
    img_back = F.to_pil_image(img_np)
    img_back.show() 
Example #5
Source File: inference.py    From RCRNet-Pytorch with MIT License 6 votes vote down vote up
def inference():
    model.eval()
    print("Begin inference on {} {}.".format(args.dataset, args.split))
    for data in tqdm(dataloader):
        images = [frame['image'].to(device) for frame in data]
        with torch.no_grad():
            preds = model(images)
            preds = [torch.sigmoid(pred) for pred in preds]
        # save predicted saliency maps
        for i, pred_ in enumerate(preds):
            for j, pred in enumerate(pred_.detach().cpu()):
                dataset = data[i]['dataset'][j]
                image_id = data[i]['image_id'][j]
                height = data[i]['height'].item()
                width = data[i]['width'].item()
                result_path = os.path.join(args.results_folder, "{}/{}.png".format(dataset, image_id))

                result = TF.to_pil_image(pred)
                result = result.resize((height, width))
                dirname = os.path.dirname(result_path)
                if not os.path.exists(dirname):
                    os.makedirs(dirname)
                result.save(result_path) 
Example #6
Source File: test_transform_goturn.py    From open-vot with MIT License 6 votes vote down vote up
def test_transform_goturn(self):
        base_dataset = VOT(self.vot_dir, return_rect=True, download=True)
        transform = TransformGOTURN()
        dataset = Pairwise(
            base_dataset, transform, pairs_per_video=1,
            frame_range=1, causal=True)
        self.assertGreater(len(dataset), 0)

        for crop_z, crop_x, labels in dataset:
            self.assertEqual(crop_z.size(), crop_x.size())

        if self.visualize:
            for t in range(10):
                crop_z, crop_x, labels = random.choice(dataset)
                mean_color = torch.tensor(
                    transform.mean_color).float().view(3, 1, 1)
                crop_z = F.to_pil_image((crop_z + mean_color) / 255.0)
                crop_x = F.to_pil_image((crop_x + mean_color) / 255.0)
                labels = labels.cpu().numpy()
                labels *= transform.out_size / transform.label_scale_factor

                bndbox = np.concatenate([
                    labels[:2], labels[2:] - labels[:2]])
                show_frame(crop_x, bndbox, fig_n=1, pause=1) 
Example #7
Source File: data_specs.py    From margipose with Apache License 2.0 5 votes vote down vote up
def unconvert(self, tensor):
        return tr.to_pil_image(denormalize_pixels(tensor.clone(), self.mean, self.stddev), 'RGB') 
Example #8
Source File: test.py    From Noise2Noise-Cryo-EM-image-denoising with MIT License 5 votes vote down vote up
def test():
    device = torch.device(conf.cuda if torch.cuda.is_available() else "cpu")
    test_dataset = TestingDataset(conf.data_path_test,conf.crop_img_size)
    test_loader =  DataLoader(test_dataset, batch_size=1, shuffle=False)
    print('Loading model from: {}'.format(conf.model_path_test))
    model = UNet(in_channels=conf.img_channel,out_channels=conf.img_channel)
    print('loading model')
    model.load_state_dict(torch.load(conf.model_path_test))
    model.eval()
    model.to(device)
    result_dir = conf.denoised_dir
    if not os.path.exists(result_dir):
        os.mkdir(result_dir)
    for batch_idx, source in enumerate(test_loader):
        source_img = tvF.to_pil_image(source.squeeze(0))
        source = source.to(device)
        denoised_img = model(source).detach().cpu()
        
        img_name = test_loader.dataset.test_list[batch_idx]
        
        denoised_result= tvF.to_pil_image(torch.clamp(denoised_img.squeeze(0), 0, 1))
        
        fname = os.path.splitext(img_name)[0]
        
        source_img.save(os.path.join(result_dir, f'{fname}-noisy.png'))
        denoised_result.save(os.path.join(result_dir, f'{fname}-denoised.png')) 
Example #9
Source File: test.py    From Noise2Noise-Cryo-EM-image-denoising with MIT License 5 votes vote down vote up
def test():
    device = torch.device(conf.cuda if torch.cuda.is_available() else "cpu")
    test_dataset = Testinging_Dataset(conf.data_path_test,conf.test_noise_param,conf.crop_img_size)
    test_loader =  DataLoader(test_dataset, batch_size=1, shuffle=False)
    print('Loading model from: {}'.format(conf.model_path_test))
    model = UNet(in_channels=conf.img_channel,out_channels=conf.img_channel)
    print('loading model')
    model.load_state_dict(torch.load(conf.model_path_test))
    model.eval()
    model.to(device)
    result_dir = conf.denoised_dir
    if not os.path.exists(result_dir):
        os.mkdir(result_dir)
    for batch_idx, (source,img_cropped) in enumerate(test_loader):
        source_img = tvF.to_pil_image(source.squeeze(0))
        img_truth = img_cropped.squeeze(0).numpy().astype(np.uint8)
        source = source.to(device)
        denoised_img = model(source).detach().cpu()
        
        img_name = test_loader.dataset.image_list[batch_idx]
        
        denoised_result= tvF.to_pil_image(torch.clamp(denoised_img.squeeze(0), 0, 1))
        fname = os.path.splitext(img_name)[0]
        
        source_img.save(os.path.join(result_dir, f'{fname}-noisy.png'))
        denoised_result.save(os.path.join(result_dir, f'{fname}-denoised.png'))       
        io.imsave(os.path.join(result_dir, f'{fname}-ground_truth.png'),img_truth) 
Example #10
Source File: transforms.py    From ACDRNet with Apache License 2.0 5 votes vote down vote up
def __call__(self, img, mask):
        return F.to_pil_image(img, self.mode), F.to_pil_image(mask, self.mode) 
Example #11
Source File: video_transforms.py    From pvse with MIT License 5 votes vote down vote up
def __call__(self, pic):
        """
        Args:
            pic (Tensor or numpy.ndarray): Image to be converted to PIL Image.

        Returns:
            PIL Image: Image converted to PIL Image.

        """
        return F.to_pil_image(pic, self.mode) 
Example #12
Source File: generate_pseudo_labels.py    From RCRNet-Pytorch with MIT License 5 votes vote down vote up
def generate_pseudo_label():
    pseudo_label_generator.eval()

    for data in tqdm(dataloader):
        images = []
        labels = []
        for frame in data:
            images.append(frame['image'].to(device))
            labels.append(frame['label'].to(device) if 'label' in frame else None)
        with torch.no_grad():
            for i in range(1, args.frame_between_label_num+1):
                pseudo_label = pseudo_label_generator.generate_pseudo_label(images[i], images[0], images[-1], labels[0], labels[-1])
                labels[i] = torch.sigmoid(pseudo_label).detach()
            # save pseudo-labels
            for i, label_ in enumerate(labels):
                for j, label in enumerate(label_.detach().cpu()):
                    dataset = data[i]['dataset'][j]
                    image_id = data[i]['image_id'][j]
                    pseudo_label_path = os.path.join(pseudo_label_folder, "{}/{}.png".format(dataset, image_id))

                    height = data[i]['height'].item()
                    width = data[i]['width'].item()
                    result = TF.to_pil_image(label)
                    result = result.resize((height, width))
                    dirname = os.path.dirname(pseudo_label_path)
                    if not os.path.exists(dirname):
                        os.makedirs(dirname)
                    result.save(pseudo_label_path) 
Example #13
Source File: test-jv.py    From Jacinle with MIT License 5 votes vote down vote up
def imwrite(path, tensor):
    TF.to_pil_image(tensor).save(path) 
Example #14
Source File: test_transform_siamfc.py    From open-vot with MIT License 5 votes vote down vote up
def test_transform_siamfc(self):
        base_dataset = VOT(self.vot_dir, anno_type='rect', download=True)
        transform = TransformSiamFC(stats_path=self.stats_path)
        dataset = Pairwise(
            base_dataset, transform=transform, pairs_per_video=1, subset='train')
        self.assertGreater(len(dataset), 0)

        for crop_z, crop_x, labels, weights in dataset:
            self.assertAlmostEqual(
                weights[labels == 1].sum().item(),
                weights[labels == 0].sum().item())
            self.assertAlmostEqual(
                weights.sum().item(), labels[labels >= 0].numel())
            self.assertEqual(
                weights[labels == transform.ignore_label].sum().item(), 0)

        if self.visualize:
            crop_z, crop_x, labels, weights = random.choice(dataset)
            crop_z = F.to_pil_image(crop_z / 255.0)
            crop_x = F.to_pil_image(crop_x / 255.0)
            labels = self._rescale(labels.squeeze().cpu().numpy())
            weights = self._rescale(weights.squeeze().cpu().numpy())

            bndbox_z = np.array([31, 31, 64, 64])
            bndbox_x = np.array([95, 95, 64, 64])

            show_frame(crop_z, bndbox_z, fig_n=1, pause=1)
            show_frame(crop_x, bndbox_x, fig_n=2, pause=1)
            show_frame(labels, fig_n=3, pause=1, cmap='hot')
            show_frame(weights, fig_n=4, pause=5, cmap='hot') 
Example #15
Source File: image_prior.py    From Torchelie with MIT License 5 votes vote down vote up
def superres(img,
             hourglass,
             input_dim,
             scale,
             iters,
             lr,
             noise_std=1 / 30,
             device='cuda'):
    im = TFF.to_tensor(img)[None].to(device)
    z = input_noise((im.shape[2] * scale, im.shape[3] * scale), input_dim)
    z = z.to(device)

    def body(batch):
        recon = hourglass(z + torch.randn_like(z) * noise_std)
        loss = F.mse_loss(
            F.interpolate(recon, size=im.shape[2:], mode='bilinear'), im)
        loss.backward()
        return {
            "loss": loss,
        }

    def display():
        recon = hourglass(z)
        loss = F.mse_loss(
            F.interpolate(recon, size=im.shape[2:], mode='bilinear'), im)
        return {
            "loss":
            loss,
            "recon":
            recon.clamp(0, 1),
            'orig':
            F.interpolate(im, scale_factor=scale, mode='bicubic').clamp(0, 1)
        }

    loop = make_loop(hourglass, body, display, iters, lr)
    loop.to(device)
    loop.run(1)
    with torch.no_grad():
        hourglass.eval()
        return TFF.to_pil_image(hourglass(z)[0].cpu()) 
Example #16
Source File: data_loaders.py    From ModelFeast with MIT License 5 votes vote down vote up
def _tansform_(self, x):
        x = np.array(x, dtype='float32') / 255
        x = (x - 0.5) / 0.5 
        x = x.transpose((2, 0, 1)) # 将 channel 放到第0维,这是 pytorch 要求的输入方式
        x = torch.from_numpy(x)

        # # for inceptionresnetV2
        # x = TF.to_pil_image(x)
        # x = TF.resize(x, (64, 32))
        # x = TF.to_tensor(x)

        return x 
Example #17
Source File: preprocessing_transforms.py    From ViP with MIT License 5 votes vote down vote up
def _to_pil(self, clip):
        # Must be of type uint8 if images have multiple channels, int16, int32, or float32 if there is only one channel
        if isinstance(clip[0], np.ndarray):
            if 'float' in str(clip[0].dtype):
                clip = np.array(clip).astype('float32')
            if 'int64' == str(clip[0].dtype):
                clip = np.array(clip).astype('int32')
            if clip[0].ndim == 3:
                clip = np.array(clip).astype('uint8')

        output=[]
        for frame in clip:
            output.append(F.to_pil_image(frame))
        
        return output 
Example #18
Source File: utils.py    From skorch with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def convert_cell_to_img(t, padding=16):
    """Converts pytorch tensor into a Pillow Image. The padding will be removed
    from the resulting image"""
    std = torch.Tensor([0.229, 0.224, 0.225]).reshape(-1, 1, 1)
    mu = torch.Tensor([0.485, 0.456, 0.406]).reshape(-1, 1, 1)
    output = t.mul(std)
    output.add_(mu)
    img = to_pil_image(output)
    w, h = img.size
    return img.crop((padding, padding, w - padding, h - padding)) 
Example #19
Source File: transforms.py    From person-reid-lib with MIT License 5 votes vote down vote up
def to_pil_image(self, pic):
        if pic.shape[2] == 3:
            return ImageData(F.to_pil_image(pic, self.mode))
        elif pic.shape[2] == 1:
            return ImageData(F.to_pil_image(pic))
        elif pic.shape[2] == 5:
            if self.use_flow:
                pic_rgb = F.to_pil_image(pic[..., :3], self.mode)
                pic_x = F.to_pil_image(pic[..., 3:4])
                pic_y = F.to_pil_image(pic[..., 4:5])
                return ImageData(pic_rgb, pic_x, pic_y)
            else:
                return ImageData(F.to_pil_image(pic[..., :3], self.mode))
        else:
            raise ValueError 
Example #20
Source File: transforms.py    From person-reid-lib with MIT License 5 votes vote down vote up
def _instance_process(self, pic_list, params):
        if isinstance(pic_list, np.ndarray):
            if pic_list.ndim == 3:
                return self.to_pil_image(pic_list)
            elif pic_list.ndim == 4:
                return [self.to_pil_image(pic_i) for pic_i in range(pic_list.shape[0])]
            else:
                raise TypeError
        raise TypeError 
Example #21
Source File: utils.py    From noise2noise-pytorch with MIT License 5 votes vote down vote up
def create_montage(img_name, noise_type, save_path, source_t, denoised_t, clean_t, show):
    """Creates montage for easy comparison."""

    fig, ax = plt.subplots(1, 3, figsize=(9, 3))
    fig.canvas.set_window_title(img_name.capitalize()[:-4])

    # Bring tensors to CPU
    source_t = source_t.cpu().narrow(0, 0, 3)
    denoised_t = denoised_t.cpu()
    clean_t = clean_t.cpu()
    
    source = tvF.to_pil_image(source_t)
    denoised = tvF.to_pil_image(torch.clamp(denoised_t, 0, 1))
    clean = tvF.to_pil_image(clean_t)

    # Build image montage
    psnr_vals = [psnr(source_t, clean_t), psnr(denoised_t, clean_t)]
    titles = ['Input: {:.2f} dB'.format(psnr_vals[0]),
              'Denoised: {:.2f} dB'.format(psnr_vals[1]),
              'Ground truth']
    zipped = zip(titles, [source, denoised, clean])
    for j, (title, img) in enumerate(zipped):
        ax[j].imshow(img)
        ax[j].set_title(title)
        ax[j].axis('off')

    # Open pop up window, if requested
    if show > 0:
        plt.show()

    # Save to files
    fname = os.path.splitext(img_name)[0]
    source.save(os.path.join(save_path, f'{fname}-{noise_type}-noisy.png'))
    denoised.save(os.path.join(save_path, f'{fname}-{noise_type}-denoised.png'))
    fig.savefig(os.path.join(save_path, f'{fname}-{noise_type}-montage.png'), bbox_inches='tight') 
Example #22
Source File: datasets.py    From noise2noise-pytorch with MIT License 5 votes vote down vote up
def __getitem__(self, index):
        """Retrieves image from folder and corrupts it."""

        # Use converged image, if requested
        if self.clean_targets:
            target = self.reference
        else:
            target_fname = self.imgs[index].replace('render', 'target')
            file_ext = '.exr' if self.hdr_targets else '.png'
            target_fname = os.path.splitext(target_fname)[0] + file_ext
            target_path = os.path.join(self.root_dir, 'target', target_fname)
            if self.hdr_targets:
                target = tvF.to_pil_image(load_hdr_as_tensor(target_path))
            else:
                target = Image.open(target_path).convert('RGB')

        # Get buffers
        render_path = os.path.join(self.root_dir, 'render', self.imgs[index])
        albedo_path = os.path.join(self.root_dir, 'albedo', self.albedos[index])
        normal_path =  os.path.join(self.root_dir, 'normal', self.normals[index])

        if self.hdr_buffers:
            render = tvF.to_pil_image(load_hdr_as_tensor(render_path))
            albedo = tvF.to_pil_image(load_hdr_as_tensor(albedo_path))
            normal = tvF.to_pil_image(load_hdr_as_tensor(normal_path))
        else:
            render = Image.open(render_path).convert('RGB')
            albedo = Image.open(albedo_path).convert('RGB')
            normal = Image.open(normal_path).convert('RGB')

        # Crop
        if self.crop_size != 0:
            buffers = [render, albedo, normal, target]
            buffers = [tvF.to_tensor(b) for b in self._random_crop(buffers)]

        # Stack buffers to create input volume
        source = torch.cat(buffers[:3], dim=0)
        target = buffers[3]

        return source, target 
Example #23
Source File: test_on_image.py    From LCFCN with Apache License 2.0 5 votes vote down vote up
def apply(image_path, model_name, model_path):
  transformer = ut.ComposeJoint(
                    [
                         [transforms.ToTensor(), None],
                         [transforms.Normalize(*ut.mean_std), None],
                         [None,  ut.ToLong() ]
                    ])  

  # Load best model
  model = model_dict[model_name](n_classes=2).cuda()
  model.load_state_dict(torch.load(model_path))

  # Read Image
  image_raw = imread(image_path)
  collection = list(map(FT.to_pil_image, [image_raw, image_raw]))
  image, _ = transformer(collection)

  batch = {"images":image[None]}
  
  # Make predictions
  pred_blobs = model.predict(batch, method="blobs").squeeze()
  pred_counts = int(model.predict(batch, method="counts").ravel()[0])

  # Save Output
  save_path = image_path + "_blobs_count:{}.png".format(pred_counts)

  imsave(save_path, ut.combine_image_blobs(image_raw, pred_blobs))
  print("| Counts: {}\n| Output saved in: {}".format(pred_counts, save_path)) 
Example #24
Source File: shanghai.py    From LCFCN with Apache License 2.0 5 votes vote down vote up
def __getitem__(self, index):        
        name = self.img_names[index]
      
        # LOAD IMG, POINT, and ROI
        image = imread(os.path.join(self.path, "images", name))
        if image.ndim == 2:
            image = image[:,:,None].repeat(3,2)
        pointList = hu.load_mat(os.path.join(self.path, 
                        "ground-truth", 
          "GT_" + name.replace(".jpg", "") +".mat"))
        pointList = pointList["image_info"][0][0][0][0][0] 
        
        points = np.zeros(image.shape[:2], "uint8")[:,:,None]
        H, W = image.shape[:2]
        for x, y in pointList:
            points[min(int(y), H-1), min(int(x), W-1)] = 1

        counts = torch.LongTensor(np.array([pointList.shape[0]]))

        collection = list(map(FT.to_pil_image, [image, points]))
        image, points = transformers.apply_transform(self.split, image, points, 
                   transform_name=self.exp_dict['dataset']['transform'])
            
        return {"images":image, 
                "points":points.squeeze(), 
                "counts":counts, 
                'meta':{"index":index}} 
Example #25
Source File: trancos.py    From LCFCN with Apache License 2.0 5 votes vote down vote up
def __getitem__(self, index):
        name = self.img_names[index]

        # LOAD IMG, POINT, and ROI
        image = imread(os.path.join(self.path, name + ".jpg"))
        points = imread(os.path.join(self.path, name + "dots.png"))[:,:,:1].clip(0,1)
        roi = loadmat(os.path.join(self.path, name + "mask.mat"))["BW"][:,:,np.newaxis]
        
        # LOAD IMG AND POINT
        image = image * roi
        image = hu.shrink2roi(image, roi)
        points = hu.shrink2roi(points, roi).astype("uint8")

        counts = torch.LongTensor(np.array([int(points.sum())]))   
        
        collection = list(map(FT.to_pil_image, [image, points]))
        image, points = transformers.apply_transform(self.split, image, points, 
                   transform_name=self.exp_dict['dataset']['transform'])
            
        return {"images":image, 
                "points":points.squeeze(), 
                "counts":counts, 
                'meta':{"index":index}} 
Example #26
Source File: dataset.py    From pytorch-UNet with MIT License 5 votes vote down vote up
def __call__(self, image, mask):
        # transforming to PIL image
        image, mask = F.to_pil_image(image), F.to_pil_image(mask)

        # random crop
        if self.crop:
            i, j, h, w = T.RandomCrop.get_params(image, self.crop)
            image, mask = F.crop(image, i, j, h, w), F.crop(mask, i, j, h, w)

        if np.random.rand() < self.p_flip:
            image, mask = F.hflip(image), F.hflip(mask)

        # color transforms || ONLY ON IMAGE
        if self.color_jitter_params:
            image = self.color_tf(image)

        # random affine transform
        if np.random.rand() < self.p_random_affine:
            affine_params = T.RandomAffine(180).get_params((-90, 90), (1, 1), (2, 2), (-45, 45), self.crop)
            image, mask = F.affine(image, *affine_params), F.affine(mask, *affine_params)

        # transforming to tensor
        image = F.to_tensor(image)
        if not self.long_mask:
            mask = F.to_tensor(mask)
        else:
            mask = to_long_tensor(mask)

        return image, mask 
Example #27
Source File: utils.py    From ICDAR-2019-SROIE with MIT License 4 votes vote down vote up
def transform(image, boxes, labels, split):
    """
    Apply the transformations above.

    :param image: image, a PIL Image
    :param boxes: bounding boxes in boundary coordinates, a tensor of dimensions (n_objects, 4)
    :param labels: labels of objects, a tensor of dimensions (n_objects)
    :param split: one of 'TRAIN' or 'TEST', since different sets of transformations are applied
    :return: transformed image, transformed bounding box coordinates, transformed labels
    """
    assert split in {'TRAIN', 'TEST'}

    # Mean and standard deviation of ImageNet data that our base VGG from torchvision was trained on
    # see: https://pytorch.org/docs/stable/torchvision/models.html
    mean = [0.485, 0.456, 0.406]
    std = [0.229, 0.224, 0.225]

    new_image = image
    new_boxes = boxes
    new_labels = labels
    # Skip the following operations if validation/evaluation
    if split == 'TRAIN':
        # A series of photometric distortions in random order, each with 50% chance of occurrence, as in Caffe repo
        new_image = photometric_distort(new_image)

        # Convert PIL image to Torch tensor
        new_image = FT.to_tensor(new_image)

        # Expand image (zoom out) with a 50% chance - helpful for training detection of small objects
        # Fill surrounding space with the mean of ImageNet data that our base VGG was trained on
        if random.random() < 0.5:
            new_image, new_boxes = expand(new_image, boxes, filler=mean)

        # Randomly crop image (zoom in)
        new_image, new_boxes, new_labels = random_crop(new_image, new_boxes, new_labels)

        # Convert Torch tensor to PIL image
        new_image = FT.to_pil_image(new_image)

        # Flip image with a 50% chance
        if random.random() < 0.5:
            new_image, new_boxes = flip(new_image, new_boxes)

    # Resize image to (300, 300) - this also converts absolute boundary coordinates to their fractional form
    new_image, new_boxes = resize(new_image, new_boxes, dims=(300, 300))

    # Convert PIL image to Torch tensor
    new_image = FT.to_tensor(new_image)

    # Normalize by mean and standard deviation of ImageNet data that our base VGG was trained on
    new_image = FT.normalize(new_image, mean=mean, std=std)

    return new_image, new_boxes, new_labels 
Example #28
Source File: image_prior.py    From Torchelie with MIT License 4 votes vote down vote up
def inpainting(img,
               mask,
               hourglass,
               input_dim,
               iters,
               lr,
               noise_std=1 / 30,
               device='cuda'):
    im = TFF.to_tensor(img)[None].to(device)
    mask = TFF.to_tensor(mask)[None].to(device)
    z = input_noise((im.shape[2], im.shape[3]), input_dim)
    z = z.to(device)
    print(hourglass)

    def body(batch):
        recon = hourglass(z + torch.randn_like(z) * noise_std)
        loss = torch.sum(
            F.mse_loss(F.interpolate(recon, size=im.shape[2:], mode='nearest'),
                       im,
                       reduction='none') * mask / mask.sum())
        loss.backward()
        return {"loss": loss}

    def display():
        recon = hourglass(z)
        recon = F.interpolate(recon, size=im.shape[2:], mode='nearest')
        loss = F.mse_loss(recon * mask, im)

        result = recon * (1 - mask) + im * mask
        return {
            "loss": loss,
            "recon": recon.clamp(0, 1),
            'orig': im,
            'result': result.clamp(0, 1)
        }

    loop = make_loop(hourglass, body, display, iters, lr)
    loop.test_loop.callbacks.add_callbacks([tcb.Log('result', 'result')])
    loop.to(device)
    loop.run(1)
    with torch.no_grad():
        hourglass.eval()
        return TFF.to_pil_image(hourglass(z)[0].cpu()) 
Example #29
Source File: utils.py    From a-PyTorch-Tutorial-to-Object-Detection with MIT License 4 votes vote down vote up
def transform(image, boxes, labels, difficulties, split):
    """
    Apply the transformations above.

    :param image: image, a PIL Image
    :param boxes: bounding boxes in boundary coordinates, a tensor of dimensions (n_objects, 4)
    :param labels: labels of objects, a tensor of dimensions (n_objects)
    :param difficulties: difficulties of detection of these objects, a tensor of dimensions (n_objects)
    :param split: one of 'TRAIN' or 'TEST', since different sets of transformations are applied
    :return: transformed image, transformed bounding box coordinates, transformed labels, transformed difficulties
    """
    assert split in {'TRAIN', 'TEST'}

    # Mean and standard deviation of ImageNet data that our base VGG from torchvision was trained on
    # see: https://pytorch.org/docs/stable/torchvision/models.html
    mean = [0.485, 0.456, 0.406]
    std = [0.229, 0.224, 0.225]

    new_image = image
    new_boxes = boxes
    new_labels = labels
    new_difficulties = difficulties
    # Skip the following operations for evaluation/testing
    if split == 'TRAIN':
        # A series of photometric distortions in random order, each with 50% chance of occurrence, as in Caffe repo
        new_image = photometric_distort(new_image)

        # Convert PIL image to Torch tensor
        new_image = FT.to_tensor(new_image)

        # Expand image (zoom out) with a 50% chance - helpful for training detection of small objects
        # Fill surrounding space with the mean of ImageNet data that our base VGG was trained on
        if random.random() < 0.5:
            new_image, new_boxes = expand(new_image, boxes, filler=mean)

        # Randomly crop image (zoom in)
        new_image, new_boxes, new_labels, new_difficulties = random_crop(new_image, new_boxes, new_labels,
                                                                         new_difficulties)

        # Convert Torch tensor to PIL image
        new_image = FT.to_pil_image(new_image)

        # Flip image with a 50% chance
        if random.random() < 0.5:
            new_image, new_boxes = flip(new_image, new_boxes)

    # Resize image to (300, 300) - this also converts absolute boundary coordinates to their fractional form
    new_image, new_boxes = resize(new_image, new_boxes, dims=(300, 300))

    # Convert PIL image to Torch tensor
    new_image = FT.to_tensor(new_image)

    # Normalize by mean and standard deviation of ImageNet data that our base VGG was trained on
    new_image = FT.normalize(new_image, mean=mean, std=std)

    return new_image, new_boxes, new_labels, new_difficulties 
Example #30
Source File: __init__.py    From flashtorch with MIT License 4 votes vote down vote up
def apply_transforms(image, size=224):
    """Transforms a PIL image to torch.Tensor.

    Applies a series of tranformations on PIL image including a conversion
    to a tensor. The returned tensor has a shape of :math:`(N, C, H, W)` and
    is ready to be used as an input to neural networks.

    First the image is resized to 256, then cropped to 224. The `means` and
    `stds` for normalisation are taken from numbers used in ImageNet, as
    currently developing the package for visualizing pre-trained models.

    The plan is to to expand this to handle custom size/mean/std.

    Args:
        image (PIL.Image.Image or numpy array)
        size (int, optional, default=224): Desired size (width/height) of the
            output tensor

    Shape:
        Input: :math:`(C, H, W)` for numpy array
        Output: :math:`(N, C, H, W)`

    Returns:
        torch.Tensor (torch.float32): Transformed image tensor

    Note:
        Symbols used to describe dimensions:
            - N: number of images in a batch
            - C: number of channels
            - H: height of the image
            - W: width of the image

    """

    if not isinstance(image, Image.Image):
        image = F.to_pil_image(image)

    means = [0.485, 0.456, 0.406]
    stds = [0.229, 0.224, 0.225]

    transform = transforms.Compose([
        transforms.Resize(size),
        transforms.CenterCrop(size),
        transforms.ToTensor(),
        transforms.Normalize(means, stds)
    ])

    tensor = transform(image).unsqueeze(0)

    tensor.requires_grad = True

    return tensor