Module 3_mxrcnn.lib.mx-rcnn.symimdb.pascal_voc
Expand source code
import os
import numpy as np
from symnet.logger import logger
from .imdb import IMDB
class PascalVOC(IMDB):
    classes = ['__background__',  # always index 0
               'aeroplane', 'bicycle', 'bird', 'boat',
               'bottle', 'bus', 'car', 'cat', 'chair',
               'cow', 'diningtable', 'dog', 'horse',
               'motorbike', 'person', 'pottedplant',
               'sheep', 'sofa', 'train', 'tvmonitor']
    def __init__(self, image_set, root_path, devkit_path):
        """
        fill basic information to initialize imdb
        :param image_set: 2007_trainval, 2007_test, etc
        :param root_path: 'data', will write 'cache'
        :param devkit_path: 'data/VOCdevkit', load data and write results
        """
        super(PascalVOC, self).__init__('voc_' + image_set, root_path)
        year, image_set = image_set.split('_')
        self._config = {'comp_id': 'comp4',
                        'use_diff': False,
                        'min_size': 2}
        self._class_to_ind = dict(zip(self.classes, range(self.num_classes)))
        self._image_index_file = os.path.join(devkit_path, 'VOC' + year, 'ImageSets', 'Main', image_set + '.txt')
        self._image_file_tmpl = os.path.join(devkit_path, 'VOC' + year, 'JPEGImages', '{}.jpg')
        self._image_anno_tmpl = os.path.join(devkit_path, 'VOC' + year, 'Annotations', '{}.xml')
        # results
        result_folder = os.path.join(devkit_path, 'results', 'VOC' + year, 'Main')
        if not os.path.exists(result_folder):
            os.makedirs(result_folder)
        self._result_file_tmpl = os.path.join(result_folder, 'comp4_det_' + image_set + '_{}.txt')
        # get roidb
        self._roidb = self._get_cached('roidb', self._load_gt_roidb)
        logger.info('%s num_images %d' % (self.name, self.num_images))
    def _load_gt_roidb(self):
        image_index = self._load_image_index()
        gt_roidb = [self._load_annotation(index) for index in image_index]
        return gt_roidb
    def _load_image_index(self):
        with open(self._image_index_file) as f:
            image_set_index = [x.strip() for x in f.readlines()]
        return image_set_index
    def _load_annotation(self, index):
        # store original annotation as orig_objs
        height, width, orig_objs = self._parse_voc_anno(self._image_anno_tmpl.format(index))
        # filter difficult objects
        if not self._config['use_diff']:
            non_diff_objs = [obj for obj in orig_objs if obj['difficult'] == 0]
            objs = non_diff_objs
        else:
            objs = orig_objs
        num_objs = len(objs)
        boxes = np.zeros((num_objs, 4), dtype=np.uint16)
        gt_classes = np.zeros((num_objs,), dtype=np.int32)
        # Load object bounding boxes into a data frame.
        for ix, obj in enumerate(objs):
            # Make pixel indexes 0-based
            x1 = obj['bbox'][0] - 1
            y1 = obj['bbox'][1] - 1
            x2 = obj['bbox'][2] - 1
            y2 = obj['bbox'][3] - 1
            cls = self._class_to_ind[obj['name'].lower().strip()]
            boxes[ix, :] = [x1, y1, x2, y2]
            gt_classes[ix] = cls
        roi_rec = {'index': index,
                   'objs': orig_objs,
                   'image': self._image_file_tmpl.format(index),
                   'height': height,
                   'width': width,
                   'boxes': boxes,
                   'gt_classes': gt_classes,
                   'flipped': False}
        return roi_rec
    @staticmethod
    def _parse_voc_anno(filename):
        import xml.etree.ElementTree as ET
        tree = ET.parse(filename)
        height = int(tree.find('size').find('height').text)
        width = int(tree.find('size').find('width').text)
        objects = []
        for obj in tree.findall('object'):
            obj_dict = dict()
            obj_dict['name'] = obj.find('name').text
            obj_dict['difficult'] = int(obj.find('difficult').text)
            bbox = obj.find('bndbox')
            obj_dict['bbox'] = [int(float(bbox.find('xmin').text)),
                                int(float(bbox.find('ymin').text)),
                                int(float(bbox.find('xmax').text)),
                                int(float(bbox.find('ymax').text))]
            objects.append(obj_dict)
        return height, width, objects
    def _evaluate_detections(self, detections, use_07_metric=True, **kargs):
        self._write_pascal_results(detections)
        self._do_python_eval(detections, use_07_metric)
    def _write_pascal_results(self, all_boxes):
        for cls_ind, cls in enumerate(self.classes):
            if cls == '__background__':
                continue
            logger.info('Writing %s VOC results file' % cls)
            filename = self._result_file_tmpl.format(cls)
            with open(filename, 'wt') as f:
                for im_ind, roi_rec in enumerate(self.roidb):
                    index = roi_rec['index']
                    dets = all_boxes[cls_ind][im_ind]
                    if len(dets) == 0:
                        continue
                    # the VOCdevkit expects 1-based indices
                    for k in range(dets.shape[0]):
                        f.write('{:s} {:.3f} {:.1f} {:.1f} {:.1f} {:.1f}\n'.
                                format(index, dets[k, -1],
                                       dets[k, 0] + 1, dets[k, 1] + 1, dets[k, 2] + 1, dets[k, 3] + 1))
    def _do_python_eval(self, all_boxes, use_07_metric):
        aps = []
        for cls_ind, cls in enumerate(self.classes):
            if cls == '__background__':
                continue
            # class_anno is a dict [image_index, [bbox, difficult, det]]
            class_anno = {}
            npos = 0
            for roi_rec in self.roidb:
                index = roi_rec['index']
                objects = [obj for obj in roi_rec['objs'] if obj['name'] == cls]
                bbox = np.array([x['bbox'] for x in objects])
                difficult = np.array([x['difficult'] for x in objects]).astype(np.bool)
                det = [False] * len(objects)  # stand for detected
                npos = npos + sum(~difficult)
                class_anno[index] = {'bbox': bbox,
                                     'difficult': difficult,
                                     'det': det}
            # bbox is 2d array of all detections, corresponding to each image_id
            image_ids = []
            bbox = []
            confidence = []
            for im_ind, dets in enumerate(all_boxes[cls_ind]):
                for k in range(dets.shape[0]):
                    image_ids.append(self.roidb[im_ind]['index'])
                    bbox.append([dets[k, 0] + 1, dets[k, 1] + 1, dets[k, 2] + 1, dets[k, 3] + 1])
                    confidence.append(dets[k, -1])
            bbox = np.array(bbox)
            confidence = np.array(confidence)
            rec, prec, ap = self.voc_eval(class_anno, npos, image_ids, bbox, confidence,
                                          ovthresh=0.5, use_07_metric=use_07_metric)
            aps.append(ap)
            logger.info('AP for {} = {:.4f}'.format(cls, ap))
        logger.info('Mean AP = {:.4f}'.format(np.mean(aps)))
    @staticmethod
    def voc_eval(class_anno, npos, image_ids, bbox, confidence, ovthresh=0.5, use_07_metric=False):
        # sort by confidence
        if bbox.shape[0] > 0:
            sorted_inds = np.argsort(-confidence)
            sorted_scores = np.sort(-confidence)
            bbox = bbox[sorted_inds, :]
            image_ids = [image_ids[x] for x in sorted_inds]
        # go down detections and mark true positives and false positives
        nd = len(image_ids)
        tp = np.zeros(nd)
        fp = np.zeros(nd)
        for d in range(nd):
            r = class_anno[image_ids[d]]
            bb = bbox[d, :].astype(float)
            ovmax = -np.inf
            bbgt = r['bbox'].astype(float)
            if bbgt.size > 0:
                # compute overlaps
                # intersection
                ixmin = np.maximum(bbgt[:, 0], bb[0])
                iymin = np.maximum(bbgt[:, 1], bb[1])
                ixmax = np.minimum(bbgt[:, 2], bb[2])
                iymax = np.minimum(bbgt[:, 3], bb[3])
                iw = np.maximum(ixmax - ixmin + 1., 0.)
                ih = np.maximum(iymax - iymin + 1., 0.)
                inters = iw * ih
                # union
                uni = ((bb[2] - bb[0] + 1.) * (bb[3] - bb[1] + 1.) +
                       (bbgt[:, 2] - bbgt[:, 0] + 1.) *
                       (bbgt[:, 3] - bbgt[:, 1] + 1.) - inters)
                overlaps = inters / uni
                ovmax = np.max(overlaps)
                jmax = np.argmax(overlaps)
            if ovmax > ovthresh:
                if not r['difficult'][jmax]:
                    if not r['det'][jmax]:
                        tp[d] = 1.
                        r['det'][jmax] = 1
                    else:
                        fp[d] = 1.
            else:
                fp[d] = 1.
        # compute precision recall
        fp = np.cumsum(fp)
        tp = np.cumsum(tp)
        rec = tp / float(npos)
        # avoid division by zero in case first detection matches a difficult ground ruth
        prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps)
        ap = PascalVOC.voc_ap(rec, prec, use_07_metric)
        return rec, prec, ap
    @staticmethod
    def voc_ap(rec, prec, use_07_metric=False):
        if use_07_metric:
            ap = 0.
            for t in np.arange(0., 1.1, 0.1):
                if np.sum(rec >= t) == 0:
                    p = 0
                else:
                    p = np.max(prec[rec >= t])
                ap += p / 11.
        else:
            # append sentinel values at both ends
            mrec = np.concatenate(([0.], rec, [1.]))
            mpre = np.concatenate(([0.], prec, [0.]))
            # compute precision integration ladder
            for i in range(mpre.size - 1, 0, -1):
                mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])
            # look for recall value changes
            i = np.where(mrec[1:] != mrec[:-1])[0]
            # sum (\delta recall) * prec
            ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
        return apClasses
- class PascalVOC (image_set, root_path, devkit_path)
- 
fill basic information to initialize imdb :param image_set: 2007_trainval, 2007_test, etc :param root_path: 'data', will write 'cache' :param devkit_path: 'data/VOCdevkit', load data and write results Expand source codeclass PascalVOC(IMDB): classes = ['__background__', # always index 0 'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor'] def __init__(self, image_set, root_path, devkit_path): """ fill basic information to initialize imdb :param image_set: 2007_trainval, 2007_test, etc :param root_path: 'data', will write 'cache' :param devkit_path: 'data/VOCdevkit', load data and write results """ super(PascalVOC, self).__init__('voc_' + image_set, root_path) year, image_set = image_set.split('_') self._config = {'comp_id': 'comp4', 'use_diff': False, 'min_size': 2} self._class_to_ind = dict(zip(self.classes, range(self.num_classes))) self._image_index_file = os.path.join(devkit_path, 'VOC' + year, 'ImageSets', 'Main', image_set + '.txt') self._image_file_tmpl = os.path.join(devkit_path, 'VOC' + year, 'JPEGImages', '{}.jpg') self._image_anno_tmpl = os.path.join(devkit_path, 'VOC' + year, 'Annotations', '{}.xml') # results result_folder = os.path.join(devkit_path, 'results', 'VOC' + year, 'Main') if not os.path.exists(result_folder): os.makedirs(result_folder) self._result_file_tmpl = os.path.join(result_folder, 'comp4_det_' + image_set + '_{}.txt') # get roidb self._roidb = self._get_cached('roidb', self._load_gt_roidb) logger.info('%s num_images %d' % (self.name, self.num_images)) def _load_gt_roidb(self): image_index = self._load_image_index() gt_roidb = [self._load_annotation(index) for index in image_index] return gt_roidb def _load_image_index(self): with open(self._image_index_file) as f: image_set_index = [x.strip() for x in f.readlines()] return image_set_index def _load_annotation(self, index): # store original annotation as orig_objs height, width, orig_objs = self._parse_voc_anno(self._image_anno_tmpl.format(index)) # filter difficult objects if not self._config['use_diff']: non_diff_objs = [obj for obj in orig_objs if obj['difficult'] == 0] objs = non_diff_objs else: objs = orig_objs num_objs = len(objs) boxes = np.zeros((num_objs, 4), dtype=np.uint16) gt_classes = np.zeros((num_objs,), dtype=np.int32) # Load object bounding boxes into a data frame. for ix, obj in enumerate(objs): # Make pixel indexes 0-based x1 = obj['bbox'][0] - 1 y1 = obj['bbox'][1] - 1 x2 = obj['bbox'][2] - 1 y2 = obj['bbox'][3] - 1 cls = self._class_to_ind[obj['name'].lower().strip()] boxes[ix, :] = [x1, y1, x2, y2] gt_classes[ix] = cls roi_rec = {'index': index, 'objs': orig_objs, 'image': self._image_file_tmpl.format(index), 'height': height, 'width': width, 'boxes': boxes, 'gt_classes': gt_classes, 'flipped': False} return roi_rec @staticmethod def _parse_voc_anno(filename): import xml.etree.ElementTree as ET tree = ET.parse(filename) height = int(tree.find('size').find('height').text) width = int(tree.find('size').find('width').text) objects = [] for obj in tree.findall('object'): obj_dict = dict() obj_dict['name'] = obj.find('name').text obj_dict['difficult'] = int(obj.find('difficult').text) bbox = obj.find('bndbox') obj_dict['bbox'] = [int(float(bbox.find('xmin').text)), int(float(bbox.find('ymin').text)), int(float(bbox.find('xmax').text)), int(float(bbox.find('ymax').text))] objects.append(obj_dict) return height, width, objects def _evaluate_detections(self, detections, use_07_metric=True, **kargs): self._write_pascal_results(detections) self._do_python_eval(detections, use_07_metric) def _write_pascal_results(self, all_boxes): for cls_ind, cls in enumerate(self.classes): if cls == '__background__': continue logger.info('Writing %s VOC results file' % cls) filename = self._result_file_tmpl.format(cls) with open(filename, 'wt') as f: for im_ind, roi_rec in enumerate(self.roidb): index = roi_rec['index'] dets = all_boxes[cls_ind][im_ind] if len(dets) == 0: continue # the VOCdevkit expects 1-based indices for k in range(dets.shape[0]): f.write('{:s} {:.3f} {:.1f} {:.1f} {:.1f} {:.1f}\n'. format(index, dets[k, -1], dets[k, 0] + 1, dets[k, 1] + 1, dets[k, 2] + 1, dets[k, 3] + 1)) def _do_python_eval(self, all_boxes, use_07_metric): aps = [] for cls_ind, cls in enumerate(self.classes): if cls == '__background__': continue # class_anno is a dict [image_index, [bbox, difficult, det]] class_anno = {} npos = 0 for roi_rec in self.roidb: index = roi_rec['index'] objects = [obj for obj in roi_rec['objs'] if obj['name'] == cls] bbox = np.array([x['bbox'] for x in objects]) difficult = np.array([x['difficult'] for x in objects]).astype(np.bool) det = [False] * len(objects) # stand for detected npos = npos + sum(~difficult) class_anno[index] = {'bbox': bbox, 'difficult': difficult, 'det': det} # bbox is 2d array of all detections, corresponding to each image_id image_ids = [] bbox = [] confidence = [] for im_ind, dets in enumerate(all_boxes[cls_ind]): for k in range(dets.shape[0]): image_ids.append(self.roidb[im_ind]['index']) bbox.append([dets[k, 0] + 1, dets[k, 1] + 1, dets[k, 2] + 1, dets[k, 3] + 1]) confidence.append(dets[k, -1]) bbox = np.array(bbox) confidence = np.array(confidence) rec, prec, ap = self.voc_eval(class_anno, npos, image_ids, bbox, confidence, ovthresh=0.5, use_07_metric=use_07_metric) aps.append(ap) logger.info('AP for {} = {:.4f}'.format(cls, ap)) logger.info('Mean AP = {:.4f}'.format(np.mean(aps))) @staticmethod def voc_eval(class_anno, npos, image_ids, bbox, confidence, ovthresh=0.5, use_07_metric=False): # sort by confidence if bbox.shape[0] > 0: sorted_inds = np.argsort(-confidence) sorted_scores = np.sort(-confidence) bbox = bbox[sorted_inds, :] image_ids = [image_ids[x] for x in sorted_inds] # go down detections and mark true positives and false positives nd = len(image_ids) tp = np.zeros(nd) fp = np.zeros(nd) for d in range(nd): r = class_anno[image_ids[d]] bb = bbox[d, :].astype(float) ovmax = -np.inf bbgt = r['bbox'].astype(float) if bbgt.size > 0: # compute overlaps # intersection ixmin = np.maximum(bbgt[:, 0], bb[0]) iymin = np.maximum(bbgt[:, 1], bb[1]) ixmax = np.minimum(bbgt[:, 2], bb[2]) iymax = np.minimum(bbgt[:, 3], bb[3]) iw = np.maximum(ixmax - ixmin + 1., 0.) ih = np.maximum(iymax - iymin + 1., 0.) inters = iw * ih # union uni = ((bb[2] - bb[0] + 1.) * (bb[3] - bb[1] + 1.) + (bbgt[:, 2] - bbgt[:, 0] + 1.) * (bbgt[:, 3] - bbgt[:, 1] + 1.) - inters) overlaps = inters / uni ovmax = np.max(overlaps) jmax = np.argmax(overlaps) if ovmax > ovthresh: if not r['difficult'][jmax]: if not r['det'][jmax]: tp[d] = 1. r['det'][jmax] = 1 else: fp[d] = 1. else: fp[d] = 1. # compute precision recall fp = np.cumsum(fp) tp = np.cumsum(tp) rec = tp / float(npos) # avoid division by zero in case first detection matches a difficult ground ruth prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps) ap = PascalVOC.voc_ap(rec, prec, use_07_metric) return rec, prec, ap @staticmethod def voc_ap(rec, prec, use_07_metric=False): if use_07_metric: ap = 0. for t in np.arange(0., 1.1, 0.1): if np.sum(rec >= t) == 0: p = 0 else: p = np.max(prec[rec >= t]) ap += p / 11. else: # append sentinel values at both ends mrec = np.concatenate(([0.], rec, [1.])) mpre = np.concatenate(([0.], prec, [0.])) # compute precision integration ladder for i in range(mpre.size - 1, 0, -1): mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i]) # look for recall value changes i = np.where(mrec[1:] != mrec[:-1])[0] # sum (\delta recall) * prec ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1]) return apAncestorsClass variables- var classes
 Static methods- def voc_ap(rec, prec, use_07_metric=False)
- 
Expand source code@staticmethod def voc_ap(rec, prec, use_07_metric=False): if use_07_metric: ap = 0. for t in np.arange(0., 1.1, 0.1): if np.sum(rec >= t) == 0: p = 0 else: p = np.max(prec[rec >= t]) ap += p / 11. else: # append sentinel values at both ends mrec = np.concatenate(([0.], rec, [1.])) mpre = np.concatenate(([0.], prec, [0.])) # compute precision integration ladder for i in range(mpre.size - 1, 0, -1): mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i]) # look for recall value changes i = np.where(mrec[1:] != mrec[:-1])[0] # sum (\delta recall) * prec ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1]) return ap
- def voc_eval(class_anno, npos, image_ids, bbox, confidence, ovthresh=0.5, use_07_metric=False)
- 
Expand source code@staticmethod def voc_eval(class_anno, npos, image_ids, bbox, confidence, ovthresh=0.5, use_07_metric=False): # sort by confidence if bbox.shape[0] > 0: sorted_inds = np.argsort(-confidence) sorted_scores = np.sort(-confidence) bbox = bbox[sorted_inds, :] image_ids = [image_ids[x] for x in sorted_inds] # go down detections and mark true positives and false positives nd = len(image_ids) tp = np.zeros(nd) fp = np.zeros(nd) for d in range(nd): r = class_anno[image_ids[d]] bb = bbox[d, :].astype(float) ovmax = -np.inf bbgt = r['bbox'].astype(float) if bbgt.size > 0: # compute overlaps # intersection ixmin = np.maximum(bbgt[:, 0], bb[0]) iymin = np.maximum(bbgt[:, 1], bb[1]) ixmax = np.minimum(bbgt[:, 2], bb[2]) iymax = np.minimum(bbgt[:, 3], bb[3]) iw = np.maximum(ixmax - ixmin + 1., 0.) ih = np.maximum(iymax - iymin + 1., 0.) inters = iw * ih # union uni = ((bb[2] - bb[0] + 1.) * (bb[3] - bb[1] + 1.) + (bbgt[:, 2] - bbgt[:, 0] + 1.) * (bbgt[:, 3] - bbgt[:, 1] + 1.) - inters) overlaps = inters / uni ovmax = np.max(overlaps) jmax = np.argmax(overlaps) if ovmax > ovthresh: if not r['difficult'][jmax]: if not r['det'][jmax]: tp[d] = 1. r['det'][jmax] = 1 else: fp[d] = 1. else: fp[d] = 1. # compute precision recall fp = np.cumsum(fp) tp = np.cumsum(tp) rec = tp / float(npos) # avoid division by zero in case first detection matches a difficult ground ruth prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps) ap = PascalVOC.voc_ap(rec, prec, use_07_metric) return rec, prec, ap
 Inherited members