Module 3_mxrcnn.lib.mx-rcnn.symdata.bbox
Expand source code
import numpy as np
def bbox_flip(bbox, width, flip_x=False):
"""
invalid value in bbox_transform if this wrong (no overlap), note index 0 and 2
also note need to save before assignment
:param bbox: [n][x1, y1, x2, y2]
:param width: cv2 (height, width, channel)
:param flip_x: will flip x1 and x2
:return: flipped box
"""
if flip_x:
xmax = width - bbox[:, 0]
xmin = width - bbox[:, 2]
bbox[:, 0] = xmin
bbox[:, 2] = xmax
return bbox
def bbox_overlaps(boxes, query_boxes):
"""
determine overlaps between boxes and query_boxes
:param boxes: n * 4 bounding boxes
:param query_boxes: k * 4 bounding boxes
:return: overlaps: n * k overlaps
"""
n_ = boxes.shape[0]
k_ = query_boxes.shape[0]
overlaps = np.zeros((n_, k_), dtype=np.float)
for k in range(k_):
query_box_area = (query_boxes[k, 2] - query_boxes[k, 0] + 1) * (query_boxes[k, 3] - query_boxes[k, 1] + 1)
for n in range(n_):
iw = min(boxes[n, 2], query_boxes[k, 2]) - max(boxes[n, 0], query_boxes[k, 0]) + 1
if iw > 0:
ih = min(boxes[n, 3], query_boxes[k, 3]) - max(boxes[n, 1], query_boxes[k, 1]) + 1
if ih > 0:
box_area = (boxes[n, 2] - boxes[n, 0] + 1) * (boxes[n, 3] - boxes[n, 1] + 1)
all_area = float(box_area + query_box_area - iw * ih)
overlaps[n, k] = iw * ih / all_area
return overlaps
def clip_boxes(boxes, im_shape):
"""
Clip boxes to image boundaries.
:param boxes: [N, 4* num_classes]
:param im_shape: tuple of 2
:return: [N, 4* num_classes]
"""
# x1 >= 0
boxes[:, 0::4] = np.maximum(np.minimum(boxes[:, 0::4], im_shape[1] - 1), 0)
# y1 >= 0
boxes[:, 1::4] = np.maximum(np.minimum(boxes[:, 1::4], im_shape[0] - 1), 0)
# x2 < im_shape[1]
boxes[:, 2::4] = np.maximum(np.minimum(boxes[:, 2::4], im_shape[1] - 1), 0)
# y2 < im_shape[0]
boxes[:, 3::4] = np.maximum(np.minimum(boxes[:, 3::4], im_shape[0] - 1), 0)
return boxes
def bbox_transform(ex_rois, gt_rois, box_stds):
"""
compute bounding box regression targets from ex_rois to gt_rois
:param ex_rois: [N, 4]
:param gt_rois: [N, 4]
:return: [N, 4]
"""
assert ex_rois.shape[0] == gt_rois.shape[0], 'inconsistent rois number'
ex_widths = ex_rois[:, 2] - ex_rois[:, 0] + 1.0
ex_heights = ex_rois[:, 3] - ex_rois[:, 1] + 1.0
ex_ctr_x = ex_rois[:, 0] + 0.5 * (ex_widths - 1.0)
ex_ctr_y = ex_rois[:, 1] + 0.5 * (ex_heights - 1.0)
gt_widths = gt_rois[:, 2] - gt_rois[:, 0] + 1.0
gt_heights = gt_rois[:, 3] - gt_rois[:, 1] + 1.0
gt_ctr_x = gt_rois[:, 0] + 0.5 * (gt_widths - 1.0)
gt_ctr_y = gt_rois[:, 1] + 0.5 * (gt_heights - 1.0)
targets_dx = (gt_ctr_x - ex_ctr_x) / (ex_widths + 1e-14) / box_stds[0]
targets_dy = (gt_ctr_y - ex_ctr_y) / (ex_heights + 1e-14) / box_stds[1]
targets_dw = np.log(gt_widths / ex_widths) / box_stds[2]
targets_dh = np.log(gt_heights / ex_heights) / box_stds[3]
targets = np.vstack((targets_dx, targets_dy, targets_dw, targets_dh)).transpose()
return targets
def bbox_pred(boxes, box_deltas, box_stds):
"""
Transform the set of class-agnostic boxes into class-specific boxes
by applying the predicted offsets (box_deltas)
:param boxes: !important [N 4]
:param box_deltas: [N, 4 * num_classes]
:return: [N 4 * num_classes]
"""
if boxes.shape[0] == 0:
return np.zeros((0, box_deltas.shape[1]))
widths = boxes[:, 2] - boxes[:, 0] + 1.0
heights = boxes[:, 3] - boxes[:, 1] + 1.0
ctr_x = boxes[:, 0] + 0.5 * (widths - 1.0)
ctr_y = boxes[:, 1] + 0.5 * (heights - 1.0)
dx = box_deltas[:, 0::4] * box_stds[0]
dy = box_deltas[:, 1::4] * box_stds[1]
dw = box_deltas[:, 2::4] * box_stds[2]
dh = box_deltas[:, 3::4] * box_stds[3]
pred_ctr_x = dx * widths[:, np.newaxis] + ctr_x[:, np.newaxis]
pred_ctr_y = dy * heights[:, np.newaxis] + ctr_y[:, np.newaxis]
pred_w = np.exp(dw) * widths[:, np.newaxis]
pred_h = np.exp(dh) * heights[:, np.newaxis]
pred_boxes = np.zeros(box_deltas.shape)
# x1
pred_boxes[:, 0::4] = pred_ctr_x - 0.5 * (pred_w - 1.0)
# y1
pred_boxes[:, 1::4] = pred_ctr_y - 0.5 * (pred_h - 1.0)
# x2
pred_boxes[:, 2::4] = pred_ctr_x + 0.5 * (pred_w - 1.0)
# y2
pred_boxes[:, 3::4] = pred_ctr_y + 0.5 * (pred_h - 1.0)
return pred_boxes
def nms(dets, thresh):
"""
greedily select boxes with high confidence and overlap with current maximum <= thresh
rule out overlap >= thresh
:param dets: [[x1, y1, x2, y2 score]]
:param thresh: retain overlap < thresh
:return: indexes to keep
"""
x1 = dets[:, 0]
y1 = dets[:, 1]
x2 = dets[:, 2]
y2 = dets[:, 3]
scores = dets[:, 4]
areas = (x2 - x1 + 1) * (y2 - y1 + 1)
order = scores.argsort()[::-1]
keep = []
while order.size > 0:
i = order[0]
keep.append(i)
xx1 = np.maximum(x1[i], x1[order[1:]])
yy1 = np.maximum(y1[i], y1[order[1:]])
xx2 = np.minimum(x2[i], x2[order[1:]])
yy2 = np.minimum(y2[i], y2[order[1:]])
w = np.maximum(0.0, xx2 - xx1 + 1)
h = np.maximum(0.0, yy2 - yy1 + 1)
inter = w * h
ovr = inter / (areas[i] + areas[order[1:]] - inter)
inds = np.where(ovr <= thresh)[0]
order = order[inds + 1]
return keep
def im_detect(rois, scores, bbox_deltas, im_info,
bbox_stds, nms_thresh, conf_thresh):
"""rois (nroi, 4), scores (nrois, nclasses), bbox_deltas (nrois, 4 * nclasses), im_info (3)"""
rois = rois.asnumpy()
scores = scores.asnumpy()
bbox_deltas = bbox_deltas.asnumpy()
im_info = im_info.asnumpy()
height, width, scale = im_info
# post processing
pred_boxes = bbox_pred(rois, bbox_deltas, bbox_stds)
pred_boxes = clip_boxes(pred_boxes, (height, width))
# we used scaled image & roi to train, so it is necessary to transform them back
pred_boxes = pred_boxes / scale
# convert to per class detection results
det = []
for j in range(1, scores.shape[-1]):
indexes = np.where(scores[:, j] > conf_thresh)[0]
cls_scores = scores[indexes, j, np.newaxis]
cls_boxes = pred_boxes[indexes, j * 4:(j + 1) * 4]
cls_dets = np.hstack((cls_boxes, cls_scores))
keep = nms(cls_dets, thresh=nms_thresh)
cls_id = np.ones_like(cls_scores) * j
det.append(np.hstack((cls_id, cls_scores, cls_boxes))[keep, :])
# assemble all classes
det = np.concatenate(det, axis=0)
return det
Functions
def bbox_flip(bbox, width, flip_x=False)
-
invalid value in bbox_transform if this wrong (no overlap), note index 0 and 2 also note need to save before assignment :param bbox: [n][x1, y1, x2, y2] :param width: cv2 (height, width, channel) :param flip_x: will flip x1 and x2 :return: flipped box
Expand source code
def bbox_flip(bbox, width, flip_x=False): """ invalid value in bbox_transform if this wrong (no overlap), note index 0 and 2 also note need to save before assignment :param bbox: [n][x1, y1, x2, y2] :param width: cv2 (height, width, channel) :param flip_x: will flip x1 and x2 :return: flipped box """ if flip_x: xmax = width - bbox[:, 0] xmin = width - bbox[:, 2] bbox[:, 0] = xmin bbox[:, 2] = xmax return bbox
def bbox_overlaps(boxes, query_boxes)
-
determine overlaps between boxes and query_boxes :param boxes: n * 4 bounding boxes :param query_boxes: k * 4 bounding boxes :return: overlaps: n * k overlaps
Expand source code
def bbox_overlaps(boxes, query_boxes): """ determine overlaps between boxes and query_boxes :param boxes: n * 4 bounding boxes :param query_boxes: k * 4 bounding boxes :return: overlaps: n * k overlaps """ n_ = boxes.shape[0] k_ = query_boxes.shape[0] overlaps = np.zeros((n_, k_), dtype=np.float) for k in range(k_): query_box_area = (query_boxes[k, 2] - query_boxes[k, 0] + 1) * (query_boxes[k, 3] - query_boxes[k, 1] + 1) for n in range(n_): iw = min(boxes[n, 2], query_boxes[k, 2]) - max(boxes[n, 0], query_boxes[k, 0]) + 1 if iw > 0: ih = min(boxes[n, 3], query_boxes[k, 3]) - max(boxes[n, 1], query_boxes[k, 1]) + 1 if ih > 0: box_area = (boxes[n, 2] - boxes[n, 0] + 1) * (boxes[n, 3] - boxes[n, 1] + 1) all_area = float(box_area + query_box_area - iw * ih) overlaps[n, k] = iw * ih / all_area return overlaps
def bbox_pred(boxes, box_deltas, box_stds)
-
Transform the set of class-agnostic boxes into class-specific boxes by applying the predicted offsets (box_deltas) :param boxes: !important [N 4] :param box_deltas: [N, 4 * num_classes] :return: [N 4 * num_classes]
Expand source code
def bbox_pred(boxes, box_deltas, box_stds): """ Transform the set of class-agnostic boxes into class-specific boxes by applying the predicted offsets (box_deltas) :param boxes: !important [N 4] :param box_deltas: [N, 4 * num_classes] :return: [N 4 * num_classes] """ if boxes.shape[0] == 0: return np.zeros((0, box_deltas.shape[1])) widths = boxes[:, 2] - boxes[:, 0] + 1.0 heights = boxes[:, 3] - boxes[:, 1] + 1.0 ctr_x = boxes[:, 0] + 0.5 * (widths - 1.0) ctr_y = boxes[:, 1] + 0.5 * (heights - 1.0) dx = box_deltas[:, 0::4] * box_stds[0] dy = box_deltas[:, 1::4] * box_stds[1] dw = box_deltas[:, 2::4] * box_stds[2] dh = box_deltas[:, 3::4] * box_stds[3] pred_ctr_x = dx * widths[:, np.newaxis] + ctr_x[:, np.newaxis] pred_ctr_y = dy * heights[:, np.newaxis] + ctr_y[:, np.newaxis] pred_w = np.exp(dw) * widths[:, np.newaxis] pred_h = np.exp(dh) * heights[:, np.newaxis] pred_boxes = np.zeros(box_deltas.shape) # x1 pred_boxes[:, 0::4] = pred_ctr_x - 0.5 * (pred_w - 1.0) # y1 pred_boxes[:, 1::4] = pred_ctr_y - 0.5 * (pred_h - 1.0) # x2 pred_boxes[:, 2::4] = pred_ctr_x + 0.5 * (pred_w - 1.0) # y2 pred_boxes[:, 3::4] = pred_ctr_y + 0.5 * (pred_h - 1.0) return pred_boxes
def bbox_transform(ex_rois, gt_rois, box_stds)
-
compute bounding box regression targets from ex_rois to gt_rois :param ex_rois: [N, 4] :param gt_rois: [N, 4] :return: [N, 4]
Expand source code
def bbox_transform(ex_rois, gt_rois, box_stds): """ compute bounding box regression targets from ex_rois to gt_rois :param ex_rois: [N, 4] :param gt_rois: [N, 4] :return: [N, 4] """ assert ex_rois.shape[0] == gt_rois.shape[0], 'inconsistent rois number' ex_widths = ex_rois[:, 2] - ex_rois[:, 0] + 1.0 ex_heights = ex_rois[:, 3] - ex_rois[:, 1] + 1.0 ex_ctr_x = ex_rois[:, 0] + 0.5 * (ex_widths - 1.0) ex_ctr_y = ex_rois[:, 1] + 0.5 * (ex_heights - 1.0) gt_widths = gt_rois[:, 2] - gt_rois[:, 0] + 1.0 gt_heights = gt_rois[:, 3] - gt_rois[:, 1] + 1.0 gt_ctr_x = gt_rois[:, 0] + 0.5 * (gt_widths - 1.0) gt_ctr_y = gt_rois[:, 1] + 0.5 * (gt_heights - 1.0) targets_dx = (gt_ctr_x - ex_ctr_x) / (ex_widths + 1e-14) / box_stds[0] targets_dy = (gt_ctr_y - ex_ctr_y) / (ex_heights + 1e-14) / box_stds[1] targets_dw = np.log(gt_widths / ex_widths) / box_stds[2] targets_dh = np.log(gt_heights / ex_heights) / box_stds[3] targets = np.vstack((targets_dx, targets_dy, targets_dw, targets_dh)).transpose() return targets
def clip_boxes(boxes, im_shape)
-
Clip boxes to image boundaries. :param boxes: [N, 4 num_classes] :param im_shape: tuple of 2 :return: [N, 4 num_classes]
Expand source code
def clip_boxes(boxes, im_shape): """ Clip boxes to image boundaries. :param boxes: [N, 4* num_classes] :param im_shape: tuple of 2 :return: [N, 4* num_classes] """ # x1 >= 0 boxes[:, 0::4] = np.maximum(np.minimum(boxes[:, 0::4], im_shape[1] - 1), 0) # y1 >= 0 boxes[:, 1::4] = np.maximum(np.minimum(boxes[:, 1::4], im_shape[0] - 1), 0) # x2 < im_shape[1] boxes[:, 2::4] = np.maximum(np.minimum(boxes[:, 2::4], im_shape[1] - 1), 0) # y2 < im_shape[0] boxes[:, 3::4] = np.maximum(np.minimum(boxes[:, 3::4], im_shape[0] - 1), 0) return boxes
def im_detect(rois, scores, bbox_deltas, im_info, bbox_stds, nms_thresh, conf_thresh)
-
rois (nroi, 4), scores (nrois, nclasses), bbox_deltas (nrois, 4 * nclasses), im_info (3)
Expand source code
def im_detect(rois, scores, bbox_deltas, im_info, bbox_stds, nms_thresh, conf_thresh): """rois (nroi, 4), scores (nrois, nclasses), bbox_deltas (nrois, 4 * nclasses), im_info (3)""" rois = rois.asnumpy() scores = scores.asnumpy() bbox_deltas = bbox_deltas.asnumpy() im_info = im_info.asnumpy() height, width, scale = im_info # post processing pred_boxes = bbox_pred(rois, bbox_deltas, bbox_stds) pred_boxes = clip_boxes(pred_boxes, (height, width)) # we used scaled image & roi to train, so it is necessary to transform them back pred_boxes = pred_boxes / scale # convert to per class detection results det = [] for j in range(1, scores.shape[-1]): indexes = np.where(scores[:, j] > conf_thresh)[0] cls_scores = scores[indexes, j, np.newaxis] cls_boxes = pred_boxes[indexes, j * 4:(j + 1) * 4] cls_dets = np.hstack((cls_boxes, cls_scores)) keep = nms(cls_dets, thresh=nms_thresh) cls_id = np.ones_like(cls_scores) * j det.append(np.hstack((cls_id, cls_scores, cls_boxes))[keep, :]) # assemble all classes det = np.concatenate(det, axis=0) return det
def nms(dets, thresh)
-
greedily select boxes with high confidence and overlap with current maximum <= thresh rule out overlap >= thresh :param dets: [[x1, y1, x2, y2 score]] :param thresh: retain overlap < thresh :return: indexes to keep
Expand source code
def nms(dets, thresh): """ greedily select boxes with high confidence and overlap with current maximum <= thresh rule out overlap >= thresh :param dets: [[x1, y1, x2, y2 score]] :param thresh: retain overlap < thresh :return: indexes to keep """ x1 = dets[:, 0] y1 = dets[:, 1] x2 = dets[:, 2] y2 = dets[:, 3] scores = dets[:, 4] areas = (x2 - x1 + 1) * (y2 - y1 + 1) order = scores.argsort()[::-1] keep = [] while order.size > 0: i = order[0] keep.append(i) xx1 = np.maximum(x1[i], x1[order[1:]]) yy1 = np.maximum(y1[i], y1[order[1:]]) xx2 = np.minimum(x2[i], x2[order[1:]]) yy2 = np.minimum(y2[i], y2[order[1:]]) w = np.maximum(0.0, xx2 - xx1 + 1) h = np.maximum(0.0, yy2 - yy1 + 1) inter = w * h ovr = inter / (areas[i] + areas[order[1:]] - inter) inds = np.where(ovr <= thresh)[0] order = order[inds + 1] return keep