Module 3_mxrcnn.lib.mx-rcnn.symnet.symbol_vgg
Expand source code
import mxnet as mx
from . import proposal_target
def get_vgg_feature(data):
# group 1
conv1_1 = mx.symbol.Convolution(
data=data, kernel=(3, 3), pad=(1, 1), num_filter=64, workspace=2048, name="conv1_1")
relu1_1 = mx.symbol.Activation(data=conv1_1, act_type="relu", name="relu1_1")
conv1_2 = mx.symbol.Convolution(
data=relu1_1, kernel=(3, 3), pad=(1, 1), num_filter=64, workspace=2048, name="conv1_2")
relu1_2 = mx.symbol.Activation(data=conv1_2, act_type="relu", name="relu1_2")
pool1 = mx.symbol.Pooling(
data=relu1_2, pool_type="max", kernel=(2, 2), stride=(2, 2), name="pool1")
# group 2
conv2_1 = mx.symbol.Convolution(
data=pool1, kernel=(3, 3), pad=(1, 1), num_filter=128, workspace=2048, name="conv2_1")
relu2_1 = mx.symbol.Activation(data=conv2_1, act_type="relu", name="relu2_1")
conv2_2 = mx.symbol.Convolution(
data=relu2_1, kernel=(3, 3), pad=(1, 1), num_filter=128, workspace=2048, name="conv2_2")
relu2_2 = mx.symbol.Activation(data=conv2_2, act_type="relu", name="relu2_2")
pool2 = mx.symbol.Pooling(
data=relu2_2, pool_type="max", kernel=(2, 2), stride=(2, 2), name="pool2")
# group 3
conv3_1 = mx.symbol.Convolution(
data=pool2, kernel=(3, 3), pad=(1, 1), num_filter=256, workspace=2048, name="conv3_1")
relu3_1 = mx.symbol.Activation(data=conv3_1, act_type="relu", name="relu3_1")
conv3_2 = mx.symbol.Convolution(
data=relu3_1, kernel=(3, 3), pad=(1, 1), num_filter=256, workspace=2048, name="conv3_2")
relu3_2 = mx.symbol.Activation(data=conv3_2, act_type="relu", name="relu3_2")
conv3_3 = mx.symbol.Convolution(
data=relu3_2, kernel=(3, 3), pad=(1, 1), num_filter=256, workspace=2048, name="conv3_3")
relu3_3 = mx.symbol.Activation(data=conv3_3, act_type="relu", name="relu3_3")
pool3 = mx.symbol.Pooling(
data=relu3_3, pool_type="max", kernel=(2, 2), stride=(2, 2), name="pool3")
# group 4
conv4_1 = mx.symbol.Convolution(
data=pool3, kernel=(3, 3), pad=(1, 1), num_filter=512, workspace=2048, name="conv4_1")
relu4_1 = mx.symbol.Activation(data=conv4_1, act_type="relu", name="relu4_1")
conv4_2 = mx.symbol.Convolution(
data=relu4_1, kernel=(3, 3), pad=(1, 1), num_filter=512, workspace=2048, name="conv4_2")
relu4_2 = mx.symbol.Activation(data=conv4_2, act_type="relu", name="relu4_2")
conv4_3 = mx.symbol.Convolution(
data=relu4_2, kernel=(3, 3), pad=(1, 1), num_filter=512, workspace=2048, name="conv4_3")
relu4_3 = mx.symbol.Activation(data=conv4_3, act_type="relu", name="relu4_3")
pool4 = mx.symbol.Pooling(
data=relu4_3, pool_type="max", kernel=(2, 2), stride=(2, 2), name="pool4")
# group 5
conv5_1 = mx.symbol.Convolution(
data=pool4, kernel=(3, 3), pad=(1, 1), num_filter=512, workspace=2048, name="conv5_1")
relu5_1 = mx.symbol.Activation(data=conv5_1, act_type="relu", name="relu5_1")
conv5_2 = mx.symbol.Convolution(
data=relu5_1, kernel=(3, 3), pad=(1, 1), num_filter=512, workspace=2048, name="conv5_2")
relu5_2 = mx.symbol.Activation(data=conv5_2, act_type="relu", name="relu5_2")
conv5_3 = mx.symbol.Convolution(
data=relu5_2, kernel=(3, 3), pad=(1, 1), num_filter=512, workspace=2048, name="conv5_3")
relu5_3 = mx.symbol.Activation(data=conv5_3, act_type="relu", name="relu5_3")
return relu5_3
def get_vgg_top_feature(data):
# group 6
flatten = mx.symbol.Flatten(data=data, name="flatten")
fc6 = mx.symbol.FullyConnected(data=flatten, num_hidden=4096, name="fc6")
relu6 = mx.symbol.Activation(data=fc6, act_type="relu", name="relu6")
drop6 = mx.symbol.Dropout(data=relu6, p=0.5, name="drop6")
# group 7
fc7 = mx.symbol.FullyConnected(data=drop6, num_hidden=4096, name="fc7")
relu7 = mx.symbol.Activation(data=fc7, act_type="relu", name="relu7")
drop7 = mx.symbol.Dropout(data=relu7, p=0.5, name="drop7")
return drop7
def get_vgg_train(anchor_scales, anchor_ratios, rpn_feature_stride,
rpn_pre_topk, rpn_post_topk, rpn_nms_thresh, rpn_min_size, rpn_batch_rois,
num_classes, rcnn_feature_stride, rcnn_pooled_size, rcnn_batch_size,
rcnn_batch_rois, rcnn_fg_fraction, rcnn_fg_overlap, rcnn_bbox_stds):
num_anchors = len(anchor_scales) * len(anchor_ratios)
data = mx.symbol.Variable(name="data")
im_info = mx.symbol.Variable(name="im_info")
gt_boxes = mx.symbol.Variable(name="gt_boxes")
rpn_label = mx.symbol.Variable(name='label')
rpn_bbox_target = mx.symbol.Variable(name='bbox_target')
rpn_bbox_weight = mx.symbol.Variable(name='bbox_weight')
# shared convolutional layers
conv_feat = get_vgg_feature(data)
# RPN layers
rpn_conv = mx.symbol.Convolution(
data=conv_feat, kernel=(3, 3), pad=(1, 1), num_filter=512, name="rpn_conv_3x3")
rpn_relu = mx.symbol.Activation(data=rpn_conv, act_type="relu", name="rpn_relu")
# rpn classification
rpn_cls_score = mx.symbol.Convolution(
data=rpn_relu, kernel=(1, 1), pad=(0, 0), num_filter=2 * num_anchors, name="rpn_cls_score")
rpn_cls_score_reshape = mx.symbol.Reshape(
data=rpn_cls_score, shape=(0, 2, -1, 0), name="rpn_cls_score_reshape")
rpn_cls_prob = mx.symbol.SoftmaxOutput(data=rpn_cls_score_reshape, label=rpn_label, multi_output=True,
normalization='valid', use_ignore=True, ignore_label=-1, name="rpn_cls_prob")
rpn_cls_act = mx.symbol.softmax(
data=rpn_cls_score_reshape, axis=1, name="rpn_cls_act")
rpn_cls_act_reshape = mx.symbol.Reshape(
data=rpn_cls_act, shape=(0, 2 * num_anchors, -1, 0), name='rpn_cls_act_reshape')
# rpn bbox regression
rpn_bbox_pred = mx.symbol.Convolution(
data=rpn_relu, kernel=(1, 1), pad=(0, 0), num_filter=4 * num_anchors, name="rpn_bbox_pred")
rpn_bbox_loss_ = rpn_bbox_weight * mx.symbol.smooth_l1(name='rpn_bbox_loss_', scalar=3.0, data=(rpn_bbox_pred - rpn_bbox_target))
rpn_bbox_loss = mx.sym.MakeLoss(name='rpn_bbox_loss', data=rpn_bbox_loss_, grad_scale=1.0 / rpn_batch_rois)
# rpn proposal
rois = mx.symbol.contrib.MultiProposal(
cls_prob=rpn_cls_act_reshape, bbox_pred=rpn_bbox_pred, im_info=im_info, name='rois',
feature_stride=rpn_feature_stride, scales=anchor_scales, ratios=anchor_ratios,
rpn_pre_nms_top_n=rpn_pre_topk, rpn_post_nms_top_n=rpn_post_topk,
threshold=rpn_nms_thresh, rpn_min_size=rpn_min_size)
# rcnn roi proposal target
group = mx.symbol.Custom(rois=rois, gt_boxes=gt_boxes, op_type='proposal_target',
num_classes=num_classes, batch_images=rcnn_batch_size,
batch_rois=rcnn_batch_rois, fg_fraction=rcnn_fg_fraction,
fg_overlap=rcnn_fg_overlap, box_stds=rcnn_bbox_stds)
rois = group[0]
label = group[1]
bbox_target = group[2]
bbox_weight = group[3]
# rcnn roi pool
roi_pool = mx.symbol.ROIPooling(
name='roi_pool', data=conv_feat, rois=rois, pooled_size=rcnn_pooled_size, spatial_scale=1.0 / rcnn_feature_stride)
# rcnn top feature
top_feat = get_vgg_top_feature(roi_pool)
# rcnn classification
cls_score = mx.symbol.FullyConnected(name='cls_score', data=top_feat, num_hidden=num_classes)
cls_prob = mx.symbol.SoftmaxOutput(name='cls_prob', data=cls_score, label=label, normalization='batch')
# rcnn bbox regression
bbox_pred = mx.symbol.FullyConnected(name='bbox_pred', data=top_feat, num_hidden=num_classes * 4)
bbox_loss_ = bbox_weight * mx.symbol.smooth_l1(name='bbox_loss_', scalar=1.0, data=(bbox_pred - bbox_target))
bbox_loss = mx.sym.MakeLoss(name='bbox_loss', data=bbox_loss_, grad_scale=1.0 / rcnn_batch_rois)
# reshape output
label = mx.symbol.Reshape(data=label, shape=(rcnn_batch_size, -1), name='label_reshape')
cls_prob = mx.symbol.Reshape(data=cls_prob, shape=(rcnn_batch_size, -1, num_classes), name='cls_prob_reshape')
bbox_loss = mx.symbol.Reshape(data=bbox_loss, shape=(rcnn_batch_size, -1, 4 * num_classes), name='bbox_loss_reshape')
# group output
group = mx.symbol.Group([rpn_cls_prob, rpn_bbox_loss, cls_prob, bbox_loss, mx.symbol.BlockGrad(label)])
return group
def get_vgg_test(anchor_scales, anchor_ratios, rpn_feature_stride,
rpn_pre_topk, rpn_post_topk, rpn_nms_thresh, rpn_min_size,
num_classes, rcnn_feature_stride, rcnn_pooled_size, rcnn_batch_size):
num_anchors = len(anchor_scales) * len(anchor_ratios)
data = mx.symbol.Variable(name="data")
im_info = mx.symbol.Variable(name="im_info")
# shared convolutional layers
conv_feat = get_vgg_feature(data)
# rpn feature
rpn_conv = mx.symbol.Convolution(
data=conv_feat, kernel=(3, 3), pad=(1, 1), num_filter=512, name="rpn_conv_3x3")
rpn_relu = mx.symbol.Activation(data=rpn_conv, act_type="relu", name="rpn_relu")
# rpn classification
rpn_cls_score = mx.symbol.Convolution(
data=rpn_relu, kernel=(1, 1), pad=(0, 0), num_filter=2 * num_anchors, name="rpn_cls_score")
rpn_cls_score_reshape = mx.symbol.Reshape(
data=rpn_cls_score, shape=(0, 2, -1, 0), name="rpn_cls_score_reshape")
rpn_cls_act = mx.symbol.softmax(
data=rpn_cls_score_reshape, axis=1, name="rpn_cls_act")
rpn_cls_act_reshape = mx.symbol.Reshape(
data=rpn_cls_act, shape=(0, 2 * num_anchors, -1, 0), name='rpn_cls_act_reshape')
# rpn bbox regression
rpn_bbox_pred = mx.symbol.Convolution(
data=rpn_relu, kernel=(1, 1), pad=(0, 0), num_filter=4 * num_anchors, name="rpn_bbox_pred")
# rpn proposal
rois = mx.symbol.contrib.MultiProposal(
cls_prob=rpn_cls_act_reshape, bbox_pred=rpn_bbox_pred, im_info=im_info, name='rois',
feature_stride=rpn_feature_stride, scales=anchor_scales, ratios=anchor_ratios,
rpn_pre_nms_top_n=rpn_pre_topk, rpn_post_nms_top_n=rpn_post_topk,
threshold=rpn_nms_thresh, rpn_min_size=rpn_min_size)
# rcnn roi pool
roi_pool = mx.symbol.ROIPooling(
name='roi_pool', data=conv_feat, rois=rois, pooled_size=rcnn_pooled_size, spatial_scale=1.0 / rcnn_feature_stride)
# rcnn top feature
top_feat = get_vgg_top_feature(roi_pool)
# rcnn classification
cls_score = mx.symbol.FullyConnected(name='cls_score', data=top_feat, num_hidden=num_classes)
cls_prob = mx.symbol.softmax(name='cls_prob', data=cls_score)
# rcnn bbox regression
bbox_pred = mx.symbol.FullyConnected(name='bbox_pred', data=top_feat, num_hidden=num_classes * 4)
# reshape output
cls_prob = mx.symbol.Reshape(data=cls_prob, shape=(rcnn_batch_size, -1, num_classes), name='cls_prob_reshape')
bbox_pred = mx.symbol.Reshape(data=bbox_pred, shape=(rcnn_batch_size, -1, 4 * num_classes), name='bbox_pred_reshape')
# group output
group = mx.symbol.Group([rois, cls_prob, bbox_pred])
return group
Functions
def get_vgg_feature(data)
-
Expand source code
def get_vgg_feature(data): # group 1 conv1_1 = mx.symbol.Convolution( data=data, kernel=(3, 3), pad=(1, 1), num_filter=64, workspace=2048, name="conv1_1") relu1_1 = mx.symbol.Activation(data=conv1_1, act_type="relu", name="relu1_1") conv1_2 = mx.symbol.Convolution( data=relu1_1, kernel=(3, 3), pad=(1, 1), num_filter=64, workspace=2048, name="conv1_2") relu1_2 = mx.symbol.Activation(data=conv1_2, act_type="relu", name="relu1_2") pool1 = mx.symbol.Pooling( data=relu1_2, pool_type="max", kernel=(2, 2), stride=(2, 2), name="pool1") # group 2 conv2_1 = mx.symbol.Convolution( data=pool1, kernel=(3, 3), pad=(1, 1), num_filter=128, workspace=2048, name="conv2_1") relu2_1 = mx.symbol.Activation(data=conv2_1, act_type="relu", name="relu2_1") conv2_2 = mx.symbol.Convolution( data=relu2_1, kernel=(3, 3), pad=(1, 1), num_filter=128, workspace=2048, name="conv2_2") relu2_2 = mx.symbol.Activation(data=conv2_2, act_type="relu", name="relu2_2") pool2 = mx.symbol.Pooling( data=relu2_2, pool_type="max", kernel=(2, 2), stride=(2, 2), name="pool2") # group 3 conv3_1 = mx.symbol.Convolution( data=pool2, kernel=(3, 3), pad=(1, 1), num_filter=256, workspace=2048, name="conv3_1") relu3_1 = mx.symbol.Activation(data=conv3_1, act_type="relu", name="relu3_1") conv3_2 = mx.symbol.Convolution( data=relu3_1, kernel=(3, 3), pad=(1, 1), num_filter=256, workspace=2048, name="conv3_2") relu3_2 = mx.symbol.Activation(data=conv3_2, act_type="relu", name="relu3_2") conv3_3 = mx.symbol.Convolution( data=relu3_2, kernel=(3, 3), pad=(1, 1), num_filter=256, workspace=2048, name="conv3_3") relu3_3 = mx.symbol.Activation(data=conv3_3, act_type="relu", name="relu3_3") pool3 = mx.symbol.Pooling( data=relu3_3, pool_type="max", kernel=(2, 2), stride=(2, 2), name="pool3") # group 4 conv4_1 = mx.symbol.Convolution( data=pool3, kernel=(3, 3), pad=(1, 1), num_filter=512, workspace=2048, name="conv4_1") relu4_1 = mx.symbol.Activation(data=conv4_1, act_type="relu", name="relu4_1") conv4_2 = mx.symbol.Convolution( data=relu4_1, kernel=(3, 3), pad=(1, 1), num_filter=512, workspace=2048, name="conv4_2") relu4_2 = mx.symbol.Activation(data=conv4_2, act_type="relu", name="relu4_2") conv4_3 = mx.symbol.Convolution( data=relu4_2, kernel=(3, 3), pad=(1, 1), num_filter=512, workspace=2048, name="conv4_3") relu4_3 = mx.symbol.Activation(data=conv4_3, act_type="relu", name="relu4_3") pool4 = mx.symbol.Pooling( data=relu4_3, pool_type="max", kernel=(2, 2), stride=(2, 2), name="pool4") # group 5 conv5_1 = mx.symbol.Convolution( data=pool4, kernel=(3, 3), pad=(1, 1), num_filter=512, workspace=2048, name="conv5_1") relu5_1 = mx.symbol.Activation(data=conv5_1, act_type="relu", name="relu5_1") conv5_2 = mx.symbol.Convolution( data=relu5_1, kernel=(3, 3), pad=(1, 1), num_filter=512, workspace=2048, name="conv5_2") relu5_2 = mx.symbol.Activation(data=conv5_2, act_type="relu", name="relu5_2") conv5_3 = mx.symbol.Convolution( data=relu5_2, kernel=(3, 3), pad=(1, 1), num_filter=512, workspace=2048, name="conv5_3") relu5_3 = mx.symbol.Activation(data=conv5_3, act_type="relu", name="relu5_3") return relu5_3
def get_vgg_test(anchor_scales, anchor_ratios, rpn_feature_stride, rpn_pre_topk, rpn_post_topk, rpn_nms_thresh, rpn_min_size, num_classes, rcnn_feature_stride, rcnn_pooled_size, rcnn_batch_size)
-
Expand source code
def get_vgg_test(anchor_scales, anchor_ratios, rpn_feature_stride, rpn_pre_topk, rpn_post_topk, rpn_nms_thresh, rpn_min_size, num_classes, rcnn_feature_stride, rcnn_pooled_size, rcnn_batch_size): num_anchors = len(anchor_scales) * len(anchor_ratios) data = mx.symbol.Variable(name="data") im_info = mx.symbol.Variable(name="im_info") # shared convolutional layers conv_feat = get_vgg_feature(data) # rpn feature rpn_conv = mx.symbol.Convolution( data=conv_feat, kernel=(3, 3), pad=(1, 1), num_filter=512, name="rpn_conv_3x3") rpn_relu = mx.symbol.Activation(data=rpn_conv, act_type="relu", name="rpn_relu") # rpn classification rpn_cls_score = mx.symbol.Convolution( data=rpn_relu, kernel=(1, 1), pad=(0, 0), num_filter=2 * num_anchors, name="rpn_cls_score") rpn_cls_score_reshape = mx.symbol.Reshape( data=rpn_cls_score, shape=(0, 2, -1, 0), name="rpn_cls_score_reshape") rpn_cls_act = mx.symbol.softmax( data=rpn_cls_score_reshape, axis=1, name="rpn_cls_act") rpn_cls_act_reshape = mx.symbol.Reshape( data=rpn_cls_act, shape=(0, 2 * num_anchors, -1, 0), name='rpn_cls_act_reshape') # rpn bbox regression rpn_bbox_pred = mx.symbol.Convolution( data=rpn_relu, kernel=(1, 1), pad=(0, 0), num_filter=4 * num_anchors, name="rpn_bbox_pred") # rpn proposal rois = mx.symbol.contrib.MultiProposal( cls_prob=rpn_cls_act_reshape, bbox_pred=rpn_bbox_pred, im_info=im_info, name='rois', feature_stride=rpn_feature_stride, scales=anchor_scales, ratios=anchor_ratios, rpn_pre_nms_top_n=rpn_pre_topk, rpn_post_nms_top_n=rpn_post_topk, threshold=rpn_nms_thresh, rpn_min_size=rpn_min_size) # rcnn roi pool roi_pool = mx.symbol.ROIPooling( name='roi_pool', data=conv_feat, rois=rois, pooled_size=rcnn_pooled_size, spatial_scale=1.0 / rcnn_feature_stride) # rcnn top feature top_feat = get_vgg_top_feature(roi_pool) # rcnn classification cls_score = mx.symbol.FullyConnected(name='cls_score', data=top_feat, num_hidden=num_classes) cls_prob = mx.symbol.softmax(name='cls_prob', data=cls_score) # rcnn bbox regression bbox_pred = mx.symbol.FullyConnected(name='bbox_pred', data=top_feat, num_hidden=num_classes * 4) # reshape output cls_prob = mx.symbol.Reshape(data=cls_prob, shape=(rcnn_batch_size, -1, num_classes), name='cls_prob_reshape') bbox_pred = mx.symbol.Reshape(data=bbox_pred, shape=(rcnn_batch_size, -1, 4 * num_classes), name='bbox_pred_reshape') # group output group = mx.symbol.Group([rois, cls_prob, bbox_pred]) return group
def get_vgg_top_feature(data)
-
Expand source code
def get_vgg_top_feature(data): # group 6 flatten = mx.symbol.Flatten(data=data, name="flatten") fc6 = mx.symbol.FullyConnected(data=flatten, num_hidden=4096, name="fc6") relu6 = mx.symbol.Activation(data=fc6, act_type="relu", name="relu6") drop6 = mx.symbol.Dropout(data=relu6, p=0.5, name="drop6") # group 7 fc7 = mx.symbol.FullyConnected(data=drop6, num_hidden=4096, name="fc7") relu7 = mx.symbol.Activation(data=fc7, act_type="relu", name="relu7") drop7 = mx.symbol.Dropout(data=relu7, p=0.5, name="drop7") return drop7
def get_vgg_train(anchor_scales, anchor_ratios, rpn_feature_stride, rpn_pre_topk, rpn_post_topk, rpn_nms_thresh, rpn_min_size, rpn_batch_rois, num_classes, rcnn_feature_stride, rcnn_pooled_size, rcnn_batch_size, rcnn_batch_rois, rcnn_fg_fraction, rcnn_fg_overlap, rcnn_bbox_stds)
-
Expand source code
def get_vgg_train(anchor_scales, anchor_ratios, rpn_feature_stride, rpn_pre_topk, rpn_post_topk, rpn_nms_thresh, rpn_min_size, rpn_batch_rois, num_classes, rcnn_feature_stride, rcnn_pooled_size, rcnn_batch_size, rcnn_batch_rois, rcnn_fg_fraction, rcnn_fg_overlap, rcnn_bbox_stds): num_anchors = len(anchor_scales) * len(anchor_ratios) data = mx.symbol.Variable(name="data") im_info = mx.symbol.Variable(name="im_info") gt_boxes = mx.symbol.Variable(name="gt_boxes") rpn_label = mx.symbol.Variable(name='label') rpn_bbox_target = mx.symbol.Variable(name='bbox_target') rpn_bbox_weight = mx.symbol.Variable(name='bbox_weight') # shared convolutional layers conv_feat = get_vgg_feature(data) # RPN layers rpn_conv = mx.symbol.Convolution( data=conv_feat, kernel=(3, 3), pad=(1, 1), num_filter=512, name="rpn_conv_3x3") rpn_relu = mx.symbol.Activation(data=rpn_conv, act_type="relu", name="rpn_relu") # rpn classification rpn_cls_score = mx.symbol.Convolution( data=rpn_relu, kernel=(1, 1), pad=(0, 0), num_filter=2 * num_anchors, name="rpn_cls_score") rpn_cls_score_reshape = mx.symbol.Reshape( data=rpn_cls_score, shape=(0, 2, -1, 0), name="rpn_cls_score_reshape") rpn_cls_prob = mx.symbol.SoftmaxOutput(data=rpn_cls_score_reshape, label=rpn_label, multi_output=True, normalization='valid', use_ignore=True, ignore_label=-1, name="rpn_cls_prob") rpn_cls_act = mx.symbol.softmax( data=rpn_cls_score_reshape, axis=1, name="rpn_cls_act") rpn_cls_act_reshape = mx.symbol.Reshape( data=rpn_cls_act, shape=(0, 2 * num_anchors, -1, 0), name='rpn_cls_act_reshape') # rpn bbox regression rpn_bbox_pred = mx.symbol.Convolution( data=rpn_relu, kernel=(1, 1), pad=(0, 0), num_filter=4 * num_anchors, name="rpn_bbox_pred") rpn_bbox_loss_ = rpn_bbox_weight * mx.symbol.smooth_l1(name='rpn_bbox_loss_', scalar=3.0, data=(rpn_bbox_pred - rpn_bbox_target)) rpn_bbox_loss = mx.sym.MakeLoss(name='rpn_bbox_loss', data=rpn_bbox_loss_, grad_scale=1.0 / rpn_batch_rois) # rpn proposal rois = mx.symbol.contrib.MultiProposal( cls_prob=rpn_cls_act_reshape, bbox_pred=rpn_bbox_pred, im_info=im_info, name='rois', feature_stride=rpn_feature_stride, scales=anchor_scales, ratios=anchor_ratios, rpn_pre_nms_top_n=rpn_pre_topk, rpn_post_nms_top_n=rpn_post_topk, threshold=rpn_nms_thresh, rpn_min_size=rpn_min_size) # rcnn roi proposal target group = mx.symbol.Custom(rois=rois, gt_boxes=gt_boxes, op_type='proposal_target', num_classes=num_classes, batch_images=rcnn_batch_size, batch_rois=rcnn_batch_rois, fg_fraction=rcnn_fg_fraction, fg_overlap=rcnn_fg_overlap, box_stds=rcnn_bbox_stds) rois = group[0] label = group[1] bbox_target = group[2] bbox_weight = group[3] # rcnn roi pool roi_pool = mx.symbol.ROIPooling( name='roi_pool', data=conv_feat, rois=rois, pooled_size=rcnn_pooled_size, spatial_scale=1.0 / rcnn_feature_stride) # rcnn top feature top_feat = get_vgg_top_feature(roi_pool) # rcnn classification cls_score = mx.symbol.FullyConnected(name='cls_score', data=top_feat, num_hidden=num_classes) cls_prob = mx.symbol.SoftmaxOutput(name='cls_prob', data=cls_score, label=label, normalization='batch') # rcnn bbox regression bbox_pred = mx.symbol.FullyConnected(name='bbox_pred', data=top_feat, num_hidden=num_classes * 4) bbox_loss_ = bbox_weight * mx.symbol.smooth_l1(name='bbox_loss_', scalar=1.0, data=(bbox_pred - bbox_target)) bbox_loss = mx.sym.MakeLoss(name='bbox_loss', data=bbox_loss_, grad_scale=1.0 / rcnn_batch_rois) # reshape output label = mx.symbol.Reshape(data=label, shape=(rcnn_batch_size, -1), name='label_reshape') cls_prob = mx.symbol.Reshape(data=cls_prob, shape=(rcnn_batch_size, -1, num_classes), name='cls_prob_reshape') bbox_loss = mx.symbol.Reshape(data=bbox_loss, shape=(rcnn_batch_size, -1, 4 * num_classes), name='bbox_loss_reshape') # group output group = mx.symbol.Group([rpn_cls_prob, rpn_bbox_loss, cls_prob, bbox_loss, mx.symbol.BlockGrad(label)]) return group