Source code for torchray.utils

# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.

r"""Utility functions."""

import json
import math
import os
from urllib.parse import urlparse
import urllib.request

import matplotlib.pyplot as plt
import numpy as np
from PIL import Image
import torch
import torch.nn.functional as F

EPSILON_DOUBLE = torch.tensor(2.220446049250313e-16, dtype=torch.float64)
EPSILON_SINGLE = torch.tensor(1.19209290E-07, dtype=torch.float32)
SQRT_TWO_DOUBLE = torch.tensor(math.sqrt(2), dtype=torch.float32)
SQRT_TWO_SINGLE = SQRT_TWO_DOUBLE.to(torch.float32)

_DEFAULT_CONFIG = {
    'mongo': {
        'server': 'mongod',
        'hostname': 'localhost',
        'port': 27017,
        'database': './data/db'
    },
    'benchmark': {
        'voc_dir': './data/datasets/voc',
        'coco_dir': './data/datasets/coco',
        'coco_anno_dir': './data/datasets/coco/annotations',
        'imagenet_dir': './data/datasets/imagenet',
        'models_dir': './data/models',
        'experiments_dir': './data'
    }
}

_config_read = False


[docs]def get_config():
    """Read the TorchRay config file.

    Read the config file from the current directory or the user's home
    directory and return the configuration.

    Returns:
        dict: configuration.
    """
    global _config_read
    config = _DEFAULT_CONFIG
    if _config_read:
        return config

    def _update(source, delta):
        if isinstance(source, dict):
            assert isinstance(delta, dict)
            for k in source.keys():
                if k in delta:
                    source[k] = _update(source[k], delta[k])
            for k in delta.keys():
                # Catch name errors in config file.
                assert k in source
        else:
            source = delta
        return source

    config = _DEFAULT_CONFIG
    for curr_dir in os.curdir, os.path.expanduser('~'):
        path = os.path.join(curr_dir, '.torchrayrc')
        if os.path.exists(path):
            with open(path, 'r') as file:
                config_ = json.load(file)
                _update(config, config_)
                break

    _config_read = True
    return config


[docs]def get_device(gpu=0):
    r"""Get the :class`torch.device` to use; specify device with :attr:`gpu`.

    Args:
        gpu (int, optional): Index of the GPU device; specify ``None`` to
            force CPU. Default: ``0``.

    Returns:
        :class:`torch.device`: device to use.
    """
    device = torch.device(
        f'cuda:{gpu}'
        if torch.cuda.is_available() and gpu is not None
        else 'cpu')
    return device


[docs]def xmkdir(path):
    r"""Create a directory path recursively.

    The function creates :attr:`path` if the directory does not exist.

    Args::
        path (str): path to create.
    """
    if path is not None and not os.path.exists(path):
        try:
            os.makedirs(path)
        except FileExistsError:
            # Race condition in multi-processing.
            pass


[docs]def is_url(obj):
    r"""Check if an object is an URL.

    Args:
        obj (object): object to test.

    Returns:
        bool: ``True`` if :attr:`x` is an URL string; otherwise ``False``.
    """
    try:
        result = urlparse(obj)
        return all([result.scheme, result.netloc, result.path])
    except Exception:
        return False


[docs]def tensor_to_im(tensor):
    r"""Reshape a tensor as a grayscale image stack.

    The function reshapes the tensor :attr:`x` of size
    :math:`N\times K\times H\times W`
    to have shape :math:`(NK)\times 1\times H\times W`.

    Args:
        tensor (:class:`torch.Tensor`): tensor to rearrange.

    Returns:
        :class:`torch.Tensor`: Reshaped tensor.
    """
    return tensor.reshape(-1, *tensor.shape[2:])[:, None, :, :]


[docs]def pil_to_tensor(pil_image):
    r"""Convert a PIL image to a tensor.

    Args:
        pil_image (:class:`PIL.Image`): PIL image.

    Returns:
        :class:`torch.Tensor`: the image as a :math:`3\times H\times W` tensor
        in the [0, 1] range.
    """
    pil_image = np.array(pil_image)
    if len(pil_image.shape) == 2:
        pil_image = pil_image[:, :, None]
    return torch.tensor(pil_image, dtype=torch.float32).permute(2, 0, 1) / 255


[docs]def im_to_numpy(tensor):
    r"""Convert a tensor image to a NumPy image.

    The function converts the :math:`K\times H\times W` tensor :attr:`tensor`
    to a corresponding :math:`H\times W\times K` NumPy array.

    Args:
        tensor (:class:`torch.Tensor`): input tensor.

    Returns:
        :class:`numpy.ndarray`: NumPy array.
    """
    tensor_reshaped = tensor.expand(3, *tensor.shape[1:]).permute(1, 2, 0)
    return tensor_reshaped.detach().cpu().numpy()


[docs]def imread(file, as_pil=False, resize=None, to_rgb=False):
    r"""
    Read an image as a tensor.

    The function reads the image :attr:`file` as a PyTorch tensor.
    `file` can also be an URL.

    To reshape the image use the option :attr:`reshape`, passing the desired
    shape ``(W, H)`` as tuple. Passing an integer sets the shortest side to
    that length while preserving the aspect ratio.

    Args:
        file (str): Path or ULR to the image.
        resize (float, int, tuple or list): Resize the image to this size.
        as_pil (bool): If ``True``, returns the PIL image instead of converting
            to a tensor.
        to_rgb (optional, bool): If `True`, convert the PIL image to RGB.
            Default: ``False``.

    Returns:
        :class:`torch.Tensor`:
            The image read as a :math:`3\times H\times W` tensor in
            the [0, 1] range.
    """
    # Read an example image as a numpy array.
    if is_url(file):
        hdr = {
            'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.11 '
                          '(KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.'
                          '11',
            'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*'
                      '/*;q=0.8',
            'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.3',
            'Accept-Encoding': 'none',
            'Accept-Language': 'en-US,en;q=0.8',
            'Connection': 'keep-alive'
        }
        req = urllib.request.Request(file, headers=hdr)
        file = urllib.request.urlopen(req)

    img = Image.open(file)
    if to_rgb:
        img = img.convert('RGB')
    if resize is not None:
        if not isinstance(resize, tuple) and not isinstance(resize, list):
            scale = float(resize) / float(min(img.size[0], img.size[1]))
            resize = [round(scale * h) for h in img.size]
        if resize != img.size:
            img = img.resize(resize, Image.ANTIALIAS)
    if as_pil:
        return img
    return pil_to_tensor(img)


[docs]def imsc(img, *args, quiet=False, lim=None, interpolation='lanczos', **kwargs):
    r"""Rescale and displays an image represented as a img.

    The function scales the img :attr:`im` to the [0 ,1] range.
    The img is assumed to have shape :math:`3\times H\times W` (RGB)
    :math:`1\times H\times W` (grayscale).

    Args:
        img (:class:`torch.Tensor` or :class:`PIL.Image`): image.
        quiet (bool, optional): if False, do not display image.
            Default: ``False``.
        lim (list, optional): maximum and minimum intensity value for
            rescaling. Default: ``None``.
        interpolation (str, optional): The interpolation mode to use with
            :func:`matplotlib.pyplot.imshow` (e.g. ``'lanczos'`` or
            ``'nearest'``). Default: ``'lanczos'``.

    Returns:
        :class:`torch.Tensor`: Rescaled image img.
    """
    if isinstance(img, Image.Image):
        img = pil_to_tensor(img)
    handle = None
    with torch.no_grad():
        if not lim:
            lim = [img.min(), img.max()]
        img = img - lim[0]  # also makes a copy
        img.mul_(1 / (lim[1] - lim[0]))
        img = torch.clamp(img, min=0, max=1)
        if not quiet:
            bitmap = img.expand(3,
                                *img.shape[1:]).permute(1, 2, 0).cpu().numpy()
            handle = plt.imshow(
                bitmap, *args, interpolation=interpolation, **kwargs)
            curr_ax = plt.gca()
            curr_ax.axis('off')
    return img, handle


[docs]def resample(source, target_size, transform):
    r"""Spatially resample a tensor.

    The function resamples the :attr:`source` tensor generating a
    :attr:`target` tensors of size :attr:`target_size`. Resampling uses the
    transform :attr:`transform`, specified as a :math:`2\times 2` matrix in the
    form

    .. math::
        \begin{bmatrix}
        s_u & t_u\\
        s_v & t_v
        \end{bmatrix}

    where :math:`s_u` is the scaling factor along the horizontal spatial
    direction, :math:`t_u` the horizontal offset, and :math:`s_v, t_v` the
    corresponding quantity for the vertical direction.

    Internally, the function uses :func:`torch.nn.functional.grid_sample` with
    bilinear interpolation and zero padding.

    The transformation defines the forward
    mapping, so that a pixel :math:`(u,v)` in the source tensro is mapped to
    pixel :math:`u' = s_u  u + t_u, v' = s_v v + tv`.

    The reference frames are defined as follows. Pixels are unit squares, so
    that a :math:`H\times W` tensor maps to the rectangle :math:`[0, W) \times
    [0, H)`. Hence element :math:`x_{ncij}` of a tensor :math:`x` maps
    to a unit square whose center is :math:`(u,v) = (i + 1/2, j+1/2)`.

    Example:
        In order to stretch an :math:`H \times W` source tensor to a target
        :math:`H' \times W'` tensor, one would use the transformation matrix

        .. math::
            \begin{bmatrix}
            W'/W & 0\\
            H'/H & 0\\
            \end{bmatrix}

    Args:
        source (:class:`torch.Tensor`): :math:`N\times C\times H\times W`
            tensor.
        target_size (tuple of int): target size.
        transform (:class:`torch.Tensor`): :math:`2\times 2` transformation
            tensor.

    Returns:
        :class:`torch.Tensor`: resampled tensor.

    """
    dtype = source.dtype
    dev = source.device

    height_, width_ = target_size
    ur_ = torch.arange(width_, dtype=dtype, device=dev) + 0.5
    vr_ = torch.arange(height_, dtype=dtype, device=dev) + 0.5

    height, weight = source.shape[2:]
    ur = 2 * ((ur_ + transform[0, 1]) / transform[0, 0]) / weight - 1
    vr = 2 * ((vr_ + transform[1, 1]) / transform[1, 0]) / height - 1

    v, u = torch.meshgrid(vr, ur)
    v = v.unsqueeze(2)
    u = u.unsqueeze(2)

    grid = torch.cat((u, v), dim=2)
    grid = grid.unsqueeze(0).expand(len(source), -1, -1, -1)

    return torch.nn.functional.grid_sample(source, grid)


[docs]def imsmooth(tensor,
             sigma,
             stride=1,
             padding=0,
             padding_mode='constant',
             padding_value=0):
    r"""Apply a 2D Gaussian filter to a tensor.

    The 2D filter itself is implementing by separating the 2D convolution in
    two 1D convolutions, first along the vertical direction and then along
    the horizontal one. Each 1D Gaussian kernel is given by:

    .. math::
        f_i \propto \exp\left(-\frac{1}{2} \frac{i^2}{\sigma^2} \right),
            ~~~ i \in \{-W,\dots,W\},
            ~~~ W = \lceil 4\sigma \rceil.

    This kernel is normalized to sum to one exactly. Given the latter, the
    function calls `torch.nn.functional.conv2d`
    to perform the actual convolution. Various padding parameters and the
    stride are passed to the latter.

    Args:
        tensor (:class:`torch.Tensor`): :math:`N\times C\times H\times W`
            image tensor.
        sigma (float): standard deviation of the Gaussian kernel.
        stride (int, optional): subsampling factor. Default: ``1``.
        padding (int, optional): extra padding. Default: ``0``.
        padding_mode (str, optional): ``'constant'``, ``'reflect'`` or
            ``'replicate'``. Default: ``'constant'``.
        padding_value (float, optional): constant value for the `constant`
            padding mode. Default: ``0``.

    Returns:
        :class:`torch.Tensor`: :math:`N\times C\times H\times W` tensor with
        the smoothed images.
    """
    assert sigma >= 0
    width = math.ceil(4 * sigma)
    filt = (torch.arange(-width,
                         width + 1,
                         dtype=torch.float32,
                         device=tensor.device) /
            (SQRT_TWO_SINGLE * sigma + EPSILON_SINGLE))
    filt = torch.exp(-filt * filt)
    filt /= torch.sum(filt)
    num_channels = tensor.shape[1]
    width = width + padding
    if padding_mode == 'constant' and padding_value == 0:
        other_padding = width
        x = tensor
    else:
        # pad: (before, after) pairs starting from last dimension backward
        x = F.pad(tensor,
                  (width, width, width, width),
                  mode=padding_mode,
                  value=padding_value)
        other_padding = 0
        padding = 0
    x = F.conv2d(x,
                 filt.reshape((1, 1, -1, 1)).expand(num_channels, -1, -1, -1),
                 padding=(other_padding, padding),
                 stride=(stride, 1),
                 groups=num_channels)
    x = F.conv2d(x,
                 filt.reshape((1, 1, 1, -1)).expand(num_channels, -1, -1, -1),
                 padding=(padding, other_padding),
                 stride=(1, stride),
                 groups=num_channels)
    return x


[docs]def imarraysc(tiles,
              spacing=0,
              quiet=False,
              lim=None,
              interpolation='lanczos'):
    r"""Display or arrange an image or tensor batch as a mosaic.

    The function displays the tensor `tiles` as a set of tiles. `tiles` has
    shape :math:`K\times C\times H\times W` and the generated mosaic
    is a *new* tensor with shape :math:`C\times (MH) \times (NW)` where
    :math:`MN \geq K`.

    Missing tiles are filled with zeros.

    The range of each tile is individually scaled to the range [0, 1].

    Args:
        tiles (:class:`torch.Tensor`): tensor to display or rearrange.
        spacing (int, optional): thickness of the border (infilled
            with zeros) around each tile. Default: ``0``.
        quiet (bool, optional): If False, do not display the mosaic.
            Default: ``False``.
        lim (list, optional): maximum and minimum intensity value for
            rescaling. Default: ``None``.
        interpolation (str, optional): interpolation to use with
            :func:`matplotlib.pyplot.imshow`. Default: ``'lanczos'``.

    Returns:
        class:`torch.Tensor`: The rearranged tensor.
    """
    num = tiles.shape[0]
    num_cols = math.ceil(math.sqrt(num))
    num_rows = (num + num_cols - 1) // num_cols
    num_channels = tiles.shape[1]
    height = tiles.shape[2]
    width = tiles.shape[3]
    mosaic = torch.zeros(num_channels,
                         height * num_rows + spacing * (num_rows - 1),
                         width * num_cols + spacing * (num_cols - 1))
    for t in range(num):
        u = t % num_cols
        v = t // num_cols
        mosaic[0:num_channels,
               v*(height+spacing):v*(height+spacing)+height,
               u*(width+spacing):u*(width+spacing)+width] = imsc(tiles[t],
                                                                 quiet=True,
                                                                 lim=lim)[0]
    return imsc(mosaic, quiet=quiet, interpolation=interpolation)