Faster RCNN pascal_voc.py
2016-05-15 19:39
302 查看
主要定义了一个pascal_voc类,在类的内部定义了它的一些属性和方法。
def _init_(self, image_set, year, devkit_path=None) 构造器方法
def gt_roidb(self) 以’gt’ 方法生成roidb 。其中会调用_load_pascal_annotation方法从下载的数据文件annotation中载入图像的ground-truth信息。
def _load_pascal_annotation(self, index) 从XML文件载入图像信息,而且是ground-truth信息,比如boxes
def rpn_roidb(self): 以‘rpn’ 方法生成roidb。先调用gt_roidb生成gt_roidb, 然后调用_load_rpn_roidb载入rpn_roidb, 最后调用其父类的静态方法imdb.merge_roidbs将两者合并,即对于最后生成的roidb中每一张图像中,即包含gt_roidb中的box等信息,也包含rpn_roidb 中的box等信息。
def _load_rpn_roidb(self, gt_roidb) 调用父类方法create_roidb_from_box_list 从box_list 中读取每张图像的boxes
def create_roidb_from_box_list(self, box_list, gt_roidb): 从box_list 中读取每张图像的boxes
def _init_(self, image_set, year, devkit_path=None) 构造器方法
def __init__(self, image_set, year, devkit_path=None): imdb.__init__(self, 'voc_' + year + '_' + image_set) self._year = year self._image_set = image_set # print '~~~~~~~~~~~~~~~~~~~PASCAL_VOC OBJECT _image_set: {}'.format(self._image_set) # trainval self._devkit_path = self._get_default_path() if devkit_path is None \ else devkit_path self._data_path = os.path.join(self._devkit_path, 'VOC' + self._year) self._classes = ('__background__', # always index 0 'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor') self._class_to_ind = dict(zip(self.classes, xrange(self.num_classes))) self._image_ext = '.jpg' self._image_index = self._load_image_set_index() # Default to roidb handler # self.selective_search_roidb是一个函数对象,把这个函数对象付给_roidb_handler属性 self._roidb_handler = self.selective_search_roidb self._salt = str(uuid.uuid4()) self._comp_id = 'comp4' # PASCAL specific config options self.config = {'cleanup' : True, 'use_salt' : True, 'use_diff' : False, 'matlab_eval' : False, 'rpn_file' : None, 'min_size' : 2} assert os.path.exists(self._devkit_path), \ 'VOCdevkit path does not exist: {}'.format(self._devkit_path) assert os.path.exists(self._data_path), \ 'Path does not exist: {}'.format(self._data_path)
def gt_roidb(self) 以’gt’ 方法生成roidb 。其中会调用_load_pascal_annotation方法从下载的数据文件annotation中载入图像的ground-truth信息。
def gt_roidb(self): """ Return the database of ground-truth regions of interest. This function loads/saves from/to a cache file to speed up future calls. """ cache_file = os.path.join(self.cache_path, self.name + '_gt_roidb.pkl') if os.path.exists(cache_file): with open(cache_file, 'rb') as fid: roidb = cPickle.load(fid) print '{} gt roidb loaded from {}'.format(self.name, cache_file) return roidb gt_roidb = [self._load_pascal_annotation(index) for index in self.image_index] with open(cache_file, 'wb') as fid: cPickle.dump(gt_roidb, fid, cPickle.HIGHEST_PROTOCOL) print 'wrote gt roidb to {}'.format(cache_file) return gt_roidb
def _load_pascal_annotation(self, index) 从XML文件载入图像信息,而且是ground-truth信息,比如boxes
def _load_pascal_annotation(self, index): """ Load image and bounding boxes info from XML file in the PASCAL VOC format. """ filename = os.path.join(self._data_path, 'Annotations', index + '.xml') tree = ET.parse(filename) objs = tree.findall('object') if not self.config['use_diff']: # Exclude the samples labeled as difficult non_diff_objs = [ obj for obj in objs if int(obj.find('difficult').text) == 0] # if len(non_diff_objs) != len(objs): # print 'Removed {} difficult objects'.format( # len(objs) - len(non_diff_objs)) objs = non_diff_objs num_objs = len(objs) boxes = np.zeros((num_objs, 4), dtype=np.uint16) gt_classes = np.zeros((num_objs), dtype=np.int32) # overlaps为 num_objs * K 的数组, K表示总共的类别数, num_objs表示当前这张图片中box的个数 overlaps = np.zeros((num_objs, self.num_classes), dtype=np.float32) # "Seg" area for pascal is just the box area seg_areas = np.zeros((num_objs), dtype=np.float32) # Load object bounding boxes into a data frame. for ix, obj in enumerate(objs): bbox = obj.find('bndbox') # Make pixel indexes 0-based x1 = float(bbox.find('xmin').text) - 1 y1 = float(bbox.find('ymin').text) - 1 x2 = float(bbox.find('xmax').text) - 1 y2 = float(bbox.find('ymax').text) - 1 cls = self._class_to_ind[obj.find('name').text.lower().strip()] boxes[ix, :] = [x1, y1, x2, y2] gt_classes[ix] = cls # 从anatation直接载入图像的信息,因为本身就是ground-truth , 所以overlap直接设为1 overlaps[ix, cls] = 1.0 seg_areas[ix] = (x2 - x1 + 1) * (y2 - y1 + 1) overlaps = scipy.sparse.csr_matrix(overlaps) return {'boxes' : boxes, 'gt_classes': gt_classes, 'gt_overlaps' : overlaps, 'flipped' : False, 'seg_areas' : seg_areas}
def rpn_roidb(self): 以‘rpn’ 方法生成roidb。先调用gt_roidb生成gt_roidb, 然后调用_load_rpn_roidb载入rpn_roidb, 最后调用其父类的静态方法imdb.merge_roidbs将两者合并,即对于最后生成的roidb中每一张图像中,即包含gt_roidb中的box等信息,也包含rpn_roidb 中的box等信息。
def rpn_roidb(self): if int(self._year) == 2007 or self._image_set != 'test': gt_roidb = self.gt_roidb() # 求取rpn_roidb需要以gt_roidb作为参数才能得到 rpn_roidb = self._load_rpn_roidb(gt_roidb) roidb = imdb.merge_roidbs(gt_roidb, rpn_roidb) else: roidb = self._load_rpn_roidb(None) return roidb
def _load_rpn_roidb(self, gt_roidb) 调用父类方法create_roidb_from_box_list 从box_list 中读取每张图像的boxes
def _load_rpn_roidb(self, gt_roidb): filename = self.config['rpn_file'] print 'loading {}'.format(filename) assert os.path.exists(filename), \ 'rpn data not found at: {}'.format(filename) with open(filename, 'rb') as f: # 读取rpn_file里的box,形成box_list; box_list为一个列表,每张图像对应其中的一个元素, # 所以box_list 的大小要与gt_roidb 相同 box_list = cPickle.load(f) return self.create_roidb_from_box_list(box_list, gt_roidb)
def create_roidb_from_box_list(self, box_list, gt_roidb): 从box_list 中读取每张图像的boxes
def create_roidb_from_box_list(self, box_list, gt_roidb): # box_list 的大小要与gt_roidb 相同, 并且各图像一一对应 assert len(box_list) == self.num_images, \ 'Number of boxes must match number of ground-truth images' roidb = [] for i in xrange(self.num_images): # 遍历每张图像, 当前图像中box的个数 boxes = box_list[i] # 当前这张图像中的box个数 num_boxes = boxes.shape[0] # overlaps的shape始终为:num_boxes × num_classes 。 overlaps = np.zeros((num_boxes, self.num_classes), dtype=np.float32) if gt_roidb is not None and gt_roidb[i]['boxes'].size > 0: gt_boxes = gt_roidb[i]['boxes'] gt_classes = gt_roidb[i]['gt_classes'] # 计算当前图像的rpn_file中记录的boxes与gtboxes的IOU overlap, 返回的gt_overlaps的 #shape为 num_boxes × num_gtboxes, 后面要对gt_overlaps求max和argmax gt_overlaps = bbox_overlaps(boxes.astype(np.float), gt_boxes.astype(np.float)) # 对gt_overlaps 求argmax 和 max argmaxes = gt_overlaps.argmax(axis=1) maxes = gt_overlaps.max(axis=1) # 返回 maxes > 0的位置信息 I = np.where(maxes > 0)[0] overlaps[I, gt_classes[argmaxes[I]]] = maxes[I] overlaps = scipy.sparse.csr_matrix(overlaps) roidb.append({ 'boxes' : boxes, # gt_classes 为一个全0一维数组(这是为什么????) 'gt_classes' : np.zeros((num_boxes,), dtype=np.int32), # 最终还是将shape为num_boxes × num_classes 的数组进行存储, 所以,以‘rpn’方法生成的 #rpn_roidb中的gt_overlaps是rpn_file中的box与gt_roidb中box的overlap,而不像 #gt_roidb()方法生成的gt_roidb中的gt_overlaps全部为1.0 'gt_overlaps' : overlaps, 'flipped' : False, 'seg_areas' : np.zeros((num_boxes,), dtype=np.float32), }) return roidb
相关文章推荐
- Java之多线程异步和同步
- 机房重构——报表
- Faster RCNN roidb.py
- 走进Android天才少年-1999年的Lody(罗迪)
- Spring 关键点
- Spring MVC学习笔记——POJO
- Faster RCNN imdb.py
- 337_house_robber_III
- 测试
- Sql 编辑器 大小写转换快捷键
- Faster RCNN train_faster_rcnn_alt_opt.py
- iOS MVVM 框架设计 和 MVC 框架
- 微软面试100题-74
- redis 文章
- Fragment之间Bundle传值
- findViewById空指针
- BZOJ1004: [HNOI2008]Cards
- 十万个为什么之数学
- javascript事件编程
- 第十一周学习进度表