Module 3_mxrcnn.lib.mx-rcnn.symdata.image

Expand source code
import numpy as np
import cv2


def get_image(roi_rec, short, max_size, mean, std):
    """
    read, resize, transform image, return im_tensor, im_info, gt_boxes
    roi_rec should have keys: ["image", "boxes", "gt_classes", "flipped"]
    0 --- x (width, second dim of im)
    |
    y (height, first dim of im)
    """
    im = imdecode(roi_rec['image'])
    if roi_rec["flipped"]:
        im = im[:, ::-1, :]
    im, im_scale = resize(im, short, max_size)
    height, width = im.shape[:2]
    im_info = np.array([height, width, im_scale], dtype=np.float32)
    im_tensor = transform(im, mean, std)

    # gt boxes: (x1, y1, x2, y2, cls)
    if roi_rec['gt_classes'].size > 0:
        gt_inds = np.where(roi_rec['gt_classes'] != 0)[0]
        gt_boxes = np.empty((len(gt_inds), 5), dtype=np.float32)
        gt_boxes[:, 0:4] = roi_rec['boxes'][gt_inds, :]
        gt_boxes[:, 4] = roi_rec['gt_classes'][gt_inds]
        # scale gt_boxes
        gt_boxes[:, 0:4] *= im_scale
    else:
        gt_boxes = np.empty((0, 5), dtype=np.float32)

    return im_tensor, im_info, gt_boxes


def imdecode(image_path):
    """Return BGR image read by opencv"""
    import os
    assert os.path.exists(image_path), image_path + ' not found'
    im = cv2.imread(image_path)
    return im


def resize(im, short, max_size):
    """
    only resize input image to target size and return scale
    :param im: BGR image input by opencv
    :param short: one dimensional size (the short side)
    :param max_size: one dimensional max size (the long side)
    :return: resized image (NDArray) and scale (float)
    """
    im_shape = im.shape
    im_size_min = np.min(im_shape[0:2])
    im_size_max = np.max(im_shape[0:2])
    im_scale = float(short) / float(im_size_min)
    # prevent bigger axis from being more than max_size:
    if np.round(im_scale * im_size_max) > max_size:
        im_scale = float(max_size) / float(im_size_max)
    im = cv2.resize(im, None, None, fx=im_scale, fy=im_scale, interpolation=cv2.INTER_LINEAR)
    return im, im_scale


def transform(im, mean, std):
    """
    transform into mxnet tensor,
    subtract pixel size and transform to correct format
    :param im: [height, width, channel] in BGR
    :param mean: [RGB pixel mean]
    :param std: [RGB pixel std var]
    :return: [batch, channel, height, width]
    """
    im_tensor = np.zeros((3, im.shape[0], im.shape[1]))
    for i in range(3):
        im_tensor[i, :, :] = (im[:, :, 2 - i] - mean[i]) / std[i]
    return im_tensor


def transform_inverse(im_tensor, mean, std):
    """
    transform from mxnet im_tensor to ordinary RGB image
    im_tensor is limited to one image
    :param im_tensor: [batch, channel, height, width]
    :param mean: [RGB pixel mean]
    :param std: [RGB pixel std var]
    :return: im [height, width, channel(RGB)]
    """
    assert im_tensor.shape[0] == 3
    im = im_tensor.transpose((1, 2, 0))
    im = im * std + mean
    im = im.astype(np.uint8)
    return im


def tensor_vstack(tensor_list, pad=0):
    """
    vertically stack tensors by adding a new axis
    expand dims if only 1 tensor
    :param tensor_list: list of tensor to be stacked vertically
    :param pad: label to pad with
    :return: tensor with max shape
    """
    if len(tensor_list) == 1:
        return tensor_list[0][np.newaxis, :]

    ndim = len(tensor_list[0].shape)
    dimensions = [len(tensor_list)]  # first dim is batch size
    for dim in range(ndim):
        dimensions.append(max([tensor.shape[dim] for tensor in tensor_list]))

    dtype = tensor_list[0].dtype
    if pad == 0:
        all_tensor = np.zeros(tuple(dimensions), dtype=dtype)
    elif pad == 1:
        all_tensor = np.ones(tuple(dimensions), dtype=dtype)
    else:
        all_tensor = np.full(tuple(dimensions), pad, dtype=dtype)
    if ndim == 1:
        for ind, tensor in enumerate(tensor_list):
            all_tensor[ind, :tensor.shape[0]] = tensor
    elif ndim == 2:
        for ind, tensor in enumerate(tensor_list):
            all_tensor[ind, :tensor.shape[0], :tensor.shape[1]] = tensor
    elif ndim == 3:
        for ind, tensor in enumerate(tensor_list):
            all_tensor[ind, :tensor.shape[0], :tensor.shape[1], :tensor.shape[2]] = tensor
    else:
        raise Exception('Sorry, unimplemented.')
    return all_tensor

Functions

def get_image(roi_rec, short, max_size, mean, std)

read, resize, transform image, return im_tensor, im_info, gt_boxes roi_rec should have keys: ["image", "boxes", "gt_classes", "flipped"] 0 — x (width, second dim of im) | y (height, first dim of im)

Expand source code
def get_image(roi_rec, short, max_size, mean, std):
    """
    read, resize, transform image, return im_tensor, im_info, gt_boxes
    roi_rec should have keys: ["image", "boxes", "gt_classes", "flipped"]
    0 --- x (width, second dim of im)
    |
    y (height, first dim of im)
    """
    im = imdecode(roi_rec['image'])
    if roi_rec["flipped"]:
        im = im[:, ::-1, :]
    im, im_scale = resize(im, short, max_size)
    height, width = im.shape[:2]
    im_info = np.array([height, width, im_scale], dtype=np.float32)
    im_tensor = transform(im, mean, std)

    # gt boxes: (x1, y1, x2, y2, cls)
    if roi_rec['gt_classes'].size > 0:
        gt_inds = np.where(roi_rec['gt_classes'] != 0)[0]
        gt_boxes = np.empty((len(gt_inds), 5), dtype=np.float32)
        gt_boxes[:, 0:4] = roi_rec['boxes'][gt_inds, :]
        gt_boxes[:, 4] = roi_rec['gt_classes'][gt_inds]
        # scale gt_boxes
        gt_boxes[:, 0:4] *= im_scale
    else:
        gt_boxes = np.empty((0, 5), dtype=np.float32)

    return im_tensor, im_info, gt_boxes
def imdecode(image_path)

Return BGR image read by opencv

Expand source code
def imdecode(image_path):
    """Return BGR image read by opencv"""
    import os
    assert os.path.exists(image_path), image_path + ' not found'
    im = cv2.imread(image_path)
    return im
def resize(im, short, max_size)

only resize input image to target size and return scale :param im: BGR image input by opencv :param short: one dimensional size (the short side) :param max_size: one dimensional max size (the long side) :return: resized image (NDArray) and scale (float)

Expand source code
def resize(im, short, max_size):
    """
    only resize input image to target size and return scale
    :param im: BGR image input by opencv
    :param short: one dimensional size (the short side)
    :param max_size: one dimensional max size (the long side)
    :return: resized image (NDArray) and scale (float)
    """
    im_shape = im.shape
    im_size_min = np.min(im_shape[0:2])
    im_size_max = np.max(im_shape[0:2])
    im_scale = float(short) / float(im_size_min)
    # prevent bigger axis from being more than max_size:
    if np.round(im_scale * im_size_max) > max_size:
        im_scale = float(max_size) / float(im_size_max)
    im = cv2.resize(im, None, None, fx=im_scale, fy=im_scale, interpolation=cv2.INTER_LINEAR)
    return im, im_scale
def tensor_vstack(tensor_list, pad=0)

vertically stack tensors by adding a new axis expand dims if only 1 tensor :param tensor_list: list of tensor to be stacked vertically :param pad: label to pad with :return: tensor with max shape

Expand source code
def tensor_vstack(tensor_list, pad=0):
    """
    vertically stack tensors by adding a new axis
    expand dims if only 1 tensor
    :param tensor_list: list of tensor to be stacked vertically
    :param pad: label to pad with
    :return: tensor with max shape
    """
    if len(tensor_list) == 1:
        return tensor_list[0][np.newaxis, :]

    ndim = len(tensor_list[0].shape)
    dimensions = [len(tensor_list)]  # first dim is batch size
    for dim in range(ndim):
        dimensions.append(max([tensor.shape[dim] for tensor in tensor_list]))

    dtype = tensor_list[0].dtype
    if pad == 0:
        all_tensor = np.zeros(tuple(dimensions), dtype=dtype)
    elif pad == 1:
        all_tensor = np.ones(tuple(dimensions), dtype=dtype)
    else:
        all_tensor = np.full(tuple(dimensions), pad, dtype=dtype)
    if ndim == 1:
        for ind, tensor in enumerate(tensor_list):
            all_tensor[ind, :tensor.shape[0]] = tensor
    elif ndim == 2:
        for ind, tensor in enumerate(tensor_list):
            all_tensor[ind, :tensor.shape[0], :tensor.shape[1]] = tensor
    elif ndim == 3:
        for ind, tensor in enumerate(tensor_list):
            all_tensor[ind, :tensor.shape[0], :tensor.shape[1], :tensor.shape[2]] = tensor
    else:
        raise Exception('Sorry, unimplemented.')
    return all_tensor
def transform(im, mean, std)

transform into mxnet tensor, subtract pixel size and transform to correct format :param im: [height, width, channel] in BGR :param mean: [RGB pixel mean] :param std: [RGB pixel std var] :return: [batch, channel, height, width]

Expand source code
def transform(im, mean, std):
    """
    transform into mxnet tensor,
    subtract pixel size and transform to correct format
    :param im: [height, width, channel] in BGR
    :param mean: [RGB pixel mean]
    :param std: [RGB pixel std var]
    :return: [batch, channel, height, width]
    """
    im_tensor = np.zeros((3, im.shape[0], im.shape[1]))
    for i in range(3):
        im_tensor[i, :, :] = (im[:, :, 2 - i] - mean[i]) / std[i]
    return im_tensor
def transform_inverse(im_tensor, mean, std)

transform from mxnet im_tensor to ordinary RGB image im_tensor is limited to one image :param im_tensor: [batch, channel, height, width] :param mean: [RGB pixel mean] :param std: [RGB pixel std var] :return: im [height, width, channel(RGB)]

Expand source code
def transform_inverse(im_tensor, mean, std):
    """
    transform from mxnet im_tensor to ordinary RGB image
    im_tensor is limited to one image
    :param im_tensor: [batch, channel, height, width]
    :param mean: [RGB pixel mean]
    :param std: [RGB pixel std var]
    :return: im [height, width, channel(RGB)]
    """
    assert im_tensor.shape[0] == 3
    im = im_tensor.transpose((1, 2, 0))
    im = im * std + mean
    im = im.astype(np.uint8)
    return im