Source code for torchray.utils

# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.

r"""Utility functions."""

import json
import math
import os
from urllib.parse import urlparse
import urllib.request

import matplotlib.pyplot as plt
import numpy as np
from PIL import Image
import torch
import torch.nn.functional as F

EPSILON_DOUBLE = torch.tensor(2.220446049250313e-16, dtype=torch.float64)
EPSILON_SINGLE = torch.tensor(1.19209290E-07, dtype=torch.float32)
SQRT_TWO_DOUBLE = torch.tensor(math.sqrt(2), dtype=torch.float32)
SQRT_TWO_SINGLE = SQRT_TWO_DOUBLE.to(torch.float32)

_DEFAULT_CONFIG = {
    'mongo': {
        'server': 'mongod',
        'hostname': 'localhost',
        'port': 27017,
        'database': './data/db'
    },
    'benchmark': {
        'voc_dir': './data/datasets/voc',
        'coco_dir': './data/datasets/coco',
        'coco_anno_dir': './data/datasets/coco/annotations',
        'imagenet_dir': './data/datasets/imagenet',
        'models_dir': './data/models',
        'experiments_dir': './data'
    }
}

_config_read = False


[docs]def get_config(): """Read the TorchRay config file. Read the config file from the current directory or the user's home directory and return the configuration. Returns: dict: configuration. """ global _config_read config = _DEFAULT_CONFIG if _config_read: return config def _update(source, delta): if isinstance(source, dict): assert isinstance(delta, dict) for k in source.keys(): if k in delta: source[k] = _update(source[k], delta[k]) for k in delta.keys(): # Catch name errors in config file. assert k in source else: source = delta return source config = _DEFAULT_CONFIG for curr_dir in os.curdir, os.path.expanduser('~'): path = os.path.join(curr_dir, '.torchrayrc') if os.path.exists(path): with open(path, 'r') as file: config_ = json.load(file) _update(config, config_) break _config_read = True return config
[docs]def get_device(gpu=0): r"""Get the :class`torch.device` to use; specify device with :attr:`gpu`. Args: gpu (int, optional): Index of the GPU device; specify ``None`` to force CPU. Default: ``0``. Returns: :class:`torch.device`: device to use. """ device = torch.device( f'cuda:{gpu}' if torch.cuda.is_available() and gpu is not None else 'cpu') return device
[docs]def xmkdir(path): r"""Create a directory path recursively. The function creates :attr:`path` if the directory does not exist. Args:: path (str): path to create. """ if path is not None and not os.path.exists(path): try: os.makedirs(path) except FileExistsError: # Race condition in multi-processing. pass
[docs]def is_url(obj): r"""Check if an object is an URL. Args: obj (object): object to test. Returns: bool: ``True`` if :attr:`x` is an URL string; otherwise ``False``. """ try: result = urlparse(obj) return all([result.scheme, result.netloc, result.path]) except Exception: return False
[docs]def tensor_to_im(tensor): r"""Reshape a tensor as a grayscale image stack. The function reshapes the tensor :attr:`x` of size :math:`N\times K\times H\times W` to have shape :math:`(NK)\times 1\times H\times W`. Args: tensor (:class:`torch.Tensor`): tensor to rearrange. Returns: :class:`torch.Tensor`: Reshaped tensor. """ return tensor.reshape(-1, *tensor.shape[2:])[:, None, :, :]
[docs]def pil_to_tensor(pil_image): r"""Convert a PIL image to a tensor. Args: pil_image (:class:`PIL.Image`): PIL image. Returns: :class:`torch.Tensor`: the image as a :math:`3\times H\times W` tensor in the [0, 1] range. """ pil_image = np.array(pil_image) if len(pil_image.shape) == 2: pil_image = pil_image[:, :, None] return torch.tensor(pil_image, dtype=torch.float32).permute(2, 0, 1) / 255
[docs]def im_to_numpy(tensor): r"""Convert a tensor image to a NumPy image. The function converts the :math:`K\times H\times W` tensor :attr:`tensor` to a corresponding :math:`H\times W\times K` NumPy array. Args: tensor (:class:`torch.Tensor`): input tensor. Returns: :class:`numpy.ndarray`: NumPy array. """ tensor_reshaped = tensor.expand(3, *tensor.shape[1:]).permute(1, 2, 0) return tensor_reshaped.detach().cpu().numpy()
[docs]def imread(file, as_pil=False, resize=None, to_rgb=False): r""" Read an image as a tensor. The function reads the image :attr:`file` as a PyTorch tensor. `file` can also be an URL. To reshape the image use the option :attr:`reshape`, passing the desired shape ``(W, H)`` as tuple. Passing an integer sets the shortest side to that length while preserving the aspect ratio. Args: file (str): Path or ULR to the image. resize (float, int, tuple or list): Resize the image to this size. as_pil (bool): If ``True``, returns the PIL image instead of converting to a tensor. to_rgb (optional, bool): If `True`, convert the PIL image to RGB. Default: ``False``. Returns: :class:`torch.Tensor`: The image read as a :math:`3\times H\times W` tensor in the [0, 1] range. """ # Read an example image as a numpy array. if is_url(file): hdr = { 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.11 ' '(KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.' '11', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*' '/*;q=0.8', 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.3', 'Accept-Encoding': 'none', 'Accept-Language': 'en-US,en;q=0.8', 'Connection': 'keep-alive' } req = urllib.request.Request(file, headers=hdr) file = urllib.request.urlopen(req) img = Image.open(file) if to_rgb: img = img.convert('RGB') if resize is not None: if not isinstance(resize, tuple) and not isinstance(resize, list): scale = float(resize) / float(min(img.size[0], img.size[1])) resize = [round(scale * h) for h in img.size] if resize != img.size: img = img.resize(resize, Image.ANTIALIAS) if as_pil: return img return pil_to_tensor(img)
[docs]def imsc(img, *args, quiet=False, lim=None, interpolation='lanczos', **kwargs): r"""Rescale and displays an image represented as a img. The function scales the img :attr:`im` to the [0 ,1] range. The img is assumed to have shape :math:`3\times H\times W` (RGB) :math:`1\times H\times W` (grayscale). Args: img (:class:`torch.Tensor` or :class:`PIL.Image`): image. quiet (bool, optional): if False, do not display image. Default: ``False``. lim (list, optional): maximum and minimum intensity value for rescaling. Default: ``None``. interpolation (str, optional): The interpolation mode to use with :func:`matplotlib.pyplot.imshow` (e.g. ``'lanczos'`` or ``'nearest'``). Default: ``'lanczos'``. Returns: :class:`torch.Tensor`: Rescaled image img. """ if isinstance(img, Image.Image): img = pil_to_tensor(img) handle = None with torch.no_grad(): if not lim: lim = [img.min(), img.max()] img = img - lim[0] # also makes a copy img.mul_(1 / (lim[1] - lim[0])) img = torch.clamp(img, min=0, max=1) if not quiet: bitmap = img.expand(3, *img.shape[1:]).permute(1, 2, 0).cpu().numpy() handle = plt.imshow( bitmap, *args, interpolation=interpolation, **kwargs) curr_ax = plt.gca() curr_ax.axis('off') return img, handle
[docs]def resample(source, target_size, transform): r"""Spatially resample a tensor. The function resamples the :attr:`source` tensor generating a :attr:`target` tensors of size :attr:`target_size`. Resampling uses the transform :attr:`transform`, specified as a :math:`2\times 2` matrix in the form .. math:: \begin{bmatrix} s_u & t_u\\ s_v & t_v \end{bmatrix} where :math:`s_u` is the scaling factor along the horizontal spatial direction, :math:`t_u` the horizontal offset, and :math:`s_v, t_v` the corresponding quantity for the vertical direction. Internally, the function uses :func:`torch.nn.functional.grid_sample` with bilinear interpolation and zero padding. The transformation defines the forward mapping, so that a pixel :math:`(u,v)` in the source tensro is mapped to pixel :math:`u' = s_u u + t_u, v' = s_v v + tv`. The reference frames are defined as follows. Pixels are unit squares, so that a :math:`H\times W` tensor maps to the rectangle :math:`[0, W) \times [0, H)`. Hence element :math:`x_{ncij}` of a tensor :math:`x` maps to a unit square whose center is :math:`(u,v) = (i + 1/2, j+1/2)`. Example: In order to stretch an :math:`H \times W` source tensor to a target :math:`H' \times W'` tensor, one would use the transformation matrix .. math:: \begin{bmatrix} W'/W & 0\\ H'/H & 0\\ \end{bmatrix} Args: source (:class:`torch.Tensor`): :math:`N\times C\times H\times W` tensor. target_size (tuple of int): target size. transform (:class:`torch.Tensor`): :math:`2\times 2` transformation tensor. Returns: :class:`torch.Tensor`: resampled tensor. """ dtype = source.dtype dev = source.device height_, width_ = target_size ur_ = torch.arange(width_, dtype=dtype, device=dev) + 0.5 vr_ = torch.arange(height_, dtype=dtype, device=dev) + 0.5 height, weight = source.shape[2:] ur = 2 * ((ur_ + transform[0, 1]) / transform[0, 0]) / weight - 1 vr = 2 * ((vr_ + transform[1, 1]) / transform[1, 0]) / height - 1 v, u = torch.meshgrid(vr, ur) v = v.unsqueeze(2) u = u.unsqueeze(2) grid = torch.cat((u, v), dim=2) grid = grid.unsqueeze(0).expand(len(source), -1, -1, -1) return torch.nn.functional.grid_sample(source, grid)
[docs]def imsmooth(tensor, sigma, stride=1, padding=0, padding_mode='constant', padding_value=0): r"""Apply a 2D Gaussian filter to a tensor. The 2D filter itself is implementing by separating the 2D convolution in two 1D convolutions, first along the vertical direction and then along the horizontal one. Each 1D Gaussian kernel is given by: .. math:: f_i \propto \exp\left(-\frac{1}{2} \frac{i^2}{\sigma^2} \right), ~~~ i \in \{-W,\dots,W\}, ~~~ W = \lceil 4\sigma \rceil. This kernel is normalized to sum to one exactly. Given the latter, the function calls `torch.nn.functional.conv2d` to perform the actual convolution. Various padding parameters and the stride are passed to the latter. Args: tensor (:class:`torch.Tensor`): :math:`N\times C\times H\times W` image tensor. sigma (float): standard deviation of the Gaussian kernel. stride (int, optional): subsampling factor. Default: ``1``. padding (int, optional): extra padding. Default: ``0``. padding_mode (str, optional): ``'constant'``, ``'reflect'`` or ``'replicate'``. Default: ``'constant'``. padding_value (float, optional): constant value for the `constant` padding mode. Default: ``0``. Returns: :class:`torch.Tensor`: :math:`N\times C\times H\times W` tensor with the smoothed images. """ assert sigma >= 0 width = math.ceil(4 * sigma) filt = (torch.arange(-width, width + 1, dtype=torch.float32, device=tensor.device) / (SQRT_TWO_SINGLE * sigma + EPSILON_SINGLE)) filt = torch.exp(-filt * filt) filt /= torch.sum(filt) num_channels = tensor.shape[1] width = width + padding if padding_mode == 'constant' and padding_value == 0: other_padding = width x = tensor else: # pad: (before, after) pairs starting from last dimension backward x = F.pad(tensor, (width, width, width, width), mode=padding_mode, value=padding_value) other_padding = 0 padding = 0 x = F.conv2d(x, filt.reshape((1, 1, -1, 1)).expand(num_channels, -1, -1, -1), padding=(other_padding, padding), stride=(stride, 1), groups=num_channels) x = F.conv2d(x, filt.reshape((1, 1, 1, -1)).expand(num_channels, -1, -1, -1), padding=(padding, other_padding), stride=(1, stride), groups=num_channels) return x
[docs]def imarraysc(tiles, spacing=0, quiet=False, lim=None, interpolation='lanczos'): r"""Display or arrange an image or tensor batch as a mosaic. The function displays the tensor `tiles` as a set of tiles. `tiles` has shape :math:`K\times C\times H\times W` and the generated mosaic is a *new* tensor with shape :math:`C\times (MH) \times (NW)` where :math:`MN \geq K`. Missing tiles are filled with zeros. The range of each tile is individually scaled to the range [0, 1]. Args: tiles (:class:`torch.Tensor`): tensor to display or rearrange. spacing (int, optional): thickness of the border (infilled with zeros) around each tile. Default: ``0``. quiet (bool, optional): If False, do not display the mosaic. Default: ``False``. lim (list, optional): maximum and minimum intensity value for rescaling. Default: ``None``. interpolation (str, optional): interpolation to use with :func:`matplotlib.pyplot.imshow`. Default: ``'lanczos'``. Returns: class:`torch.Tensor`: The rearranged tensor. """ num = tiles.shape[0] num_cols = math.ceil(math.sqrt(num)) num_rows = (num + num_cols - 1) // num_cols num_channels = tiles.shape[1] height = tiles.shape[2] width = tiles.shape[3] mosaic = torch.zeros(num_channels, height * num_rows + spacing * (num_rows - 1), width * num_cols + spacing * (num_cols - 1)) for t in range(num): u = t % num_cols v = t // num_cols mosaic[0:num_channels, v*(height+spacing):v*(height+spacing)+height, u*(width+spacing):u*(width+spacing)+width] = imsc(tiles[t], quiet=True, lim=lim)[0] return imsc(mosaic, quiet=quiet, interpolation=interpolation)