Faster-RCNN_TF代码解读15:roi_data_layer/minibatch.py
2017-09-19 09:32
519 查看
# -------------------------------------------------------- # Fast R-CNN # Copyright (c) 2015 Microsoft # Licensed under The MIT License [see LICENSE for details] # Written by Ross Girshick # -------------------------------------------------------- """Compute minibatch blobs for training a Fast R-CNN network.""" import numpy as np import numpy.random as npr import cv2 from fast_rcnn.config import cfg from utils.blob import prep_im_for_blob, im_list_to_blob #roidb为一个列表,列表中为该minibatch的信息,n个dict def get_minibatch(roidb, num_classes): """Given a roidb, construct a minibatch sampled from it.""" #minibatch的图像个数 num_images = len(roidb) # Sample random scales to use for each image in this batch #cfg.TRAIN.SCALES为(0.25, 0.5, 1.0, 2.0, 3.0) #建立一个最低为0,最高为5的(最低最高取不到)的(2,)大小的array random_scale_inds = npr.randint(0, high=len(cfg.TRAIN.SCALES), size=num_images) assert(cfg.TRAIN.BATCH_SIZE % num_images == 0), \ 'num_images ({}) must divide BATCH_SIZE ({})'. \ format(num_images, cfg.TRAIN.BATCH_SIZE) #cfg.TRAIN.BATCH_SIZE为128 ,rois_per_image为64 rois_per_image = cfg.TRAIN.BATCH_SIZE / num_images #cfg.TRAIN.FG_FRACTION为0.25,fg_rois_per_image为16 fg_rois_per_image = np.round(cfg.TRAIN.FG_FRACTION * rois_per_image) # Get the input image blob, formatted for caffe #得到用于训练的blob(array)(对原始图像减去均值,缩放,下边和右边区域可能为0)和图像缩放比例im_scales(list) im_blob, im_scales = _get_image_blob(roidb, random_scale_inds) #存入blobs字典 blobs = {'data': im_blob} #False if cfg.TRAIN.HAS_RPN: assert len(im_scales) == 1, "Single batch only" assert len(roidb) == 1, "Single batch only" # gt boxes: (x1, y1, x2, y2, cls) #组合 gt 信息 gt_inds = np.where(roidb[0]['gt_classes'] != 0)[0] gt_boxes = np.empty((len(gt_inds), 5), dtype=np.float32) gt_boxes[:, 0:4] = roidb[0]['boxes'][gt_inds, :] * im_scales[0] gt_boxes[:, 4] = roidb[0]['gt_classes'][gt_inds] blobs['gt_boxes'] = gt_boxes #im_blob.shape[1]为高,im_blob.shape[2]为高,im_scales[0]为缩放比例 blobs['im_info'] = np.array( [[im_blob.shape[1], im_blob.shape[2], im_scales[0]]], dtype=np.float32) else: # not using RPN # Now, build the region of interest and label blobs rois_blob = np.zeros((0, 5), dtype=np.float32) labels_blob = np.zeros((0), dtype=np.float32) bbox_targets_blob = np.zeros((0, 4 * num_classes), dtype=np.float32) bbox_inside_blob = np.zeros(bbox_targets_blob.shape, dtype=np.float32) # all_overlaps = [] for im_i in xrange(num_images): labels, overlaps, im_rois, bbox_targets, bbox_inside_weights \ = _sample_rois(roidb[im_i], fg_rois_per_image, rois_per_image, num_classes) # Add to RoIs blob rois = _project_im_rois(im_rois, im_scales[im_i]) batch_ind = im_i * np.ones((rois.shape[0], 1)) rois_blob_this_image = np.hstack((batch_ind, rois)) rois_blob = np.vstack((rois_blob, rois_blob_this_image)) # Add to labels, bbox targets, and bbox loss blobs labels_blob = np.hstack((labels_blob, labels)) bbox_targets_blob = np.vstack((bbox_targets_blob, bbox_targets)) bbox_inside_blob = np.vstack((bbox_inside_blob, bbox_inside_weights)) # all_overlaps = np.hstack((all_overlaps, overlaps)) # For debug visualizations # _vis_minibatch(im_blob, rois_blob, labels_blob, all_overlaps) blobs['rois'] = rois_blob blobs['labels'] = labels_blob if cfg.TRAIN.BBOX_REG: blobs['bbox_targets'] = bbox_targets_blob blobs['bbox_inside_weights'] = bbox_inside_blob blobs['bbox_outside_weights'] = \ np.array(bbox_inside_blob > 0).astype(np.float32) return blobs def _sample_rois(roidb, fg_rois_per_image, rois_per_image, num_classes): """Generate a random sample of RoIs comprising foreground and background examples. """ # label = class RoI has max overlap with labels = roidb['max_classes'] overlaps = roidb['max_overlaps'] rois = roidb['boxes'] # Select foreground RoIs as those with >= FG_THRESH overlap fg_inds = np.where(overlaps >= cfg.TRAIN.FG_THRESH)[0] # Guard against the case when an image has fewer than fg_rois_per_image # foreground RoIs fg_rois_per_this_image = int(np.minimum(fg_rois_per_image, fg_inds.size)) # Sample foreground regions without replacement if fg_inds.size > 0: fg_inds = npr.choice( fg_inds, size=fg_rois_per_this_image, replace=False) # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI) bg_inds = np.where((overlaps < cfg.TRAIN.BG_THRESH_HI) & (overlaps >= cfg.TRAIN.BG_THRESH_LO))[0] # Compute number of background RoIs to take from this image (guarding # against there being fewer than desired) bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image bg_rois_per_this_image = np.minimum(bg_rois_per_this_image, bg_inds.size) # Sample foreground regions without replacement if bg_inds.size > 0: bg_inds = npr.choice( bg_inds, size=bg_rois_per_this_image, replace=False) # The indices that we're selecting (both fg and bg) keep_inds = np.append(fg_inds, bg_inds) # Select sampled values from various arrays: labels = labels[keep_inds] # Clamp labels for the background RoIs to 0 labels[fg_rois_per_this_image:] = 0 overlaps = overlaps[keep_inds] rois = rois[keep_inds] bbox_targets, bbox_inside_weights = _get_bbox_regression_labels( roidb['bbox_targets'][keep_inds, :], num_classes) return labels, overlaps, rois, bbox_targets, bbox_inside_weights def _get_image_blob(roidb, scale_inds): """Builds an input blob from the images in the roidb at the specified scales. """ #一次传的图片数,为每一个roidb为一个dict num_images = len(roidb) processed_ims = [] im_scales = [] for i in xrange(num_images): im = cv2.imread(roidb[i]['image']) if roidb[i]['flipped']: #水平反转图片有'flipped'标签,但是'image'标签里存的是正常图片 im = im[:, ::-1, :] # cfg.TRAIN.SCALES为(0.25, 0.5, 1.0, 2.0, 3.0) #scale_inds为建立的一个最低为0,最高为5的(最低最高取不到)的(2,)大小的array #即target_size为从(0.25, 0.5, 1.0, 2.0, 3.0)随机取出的一个值 target_size = cfg.TRAIN.SCALES[scale_inds[i]] #cfg.PIXEL_MEANS 为 np.array([[[102.9801, 115.9465, 122.7717]]]) #cfg.TRAIN.MAX_SIZE为1000 #对图像进行缩放,返回缩放后的image以及缩放比例 im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size, cfg.TRAIN.MAX_SIZE) #以此存入im_scales和processed_ims列表 #其中im信息为ndarray,im_scale为int im_scales.append(im_scale) processed_ims.append(im) # Create a blob to hold the input images #processed_ims为缩放后的image信息 #返回blob,该blob存的是减去均值且缩放后的im信息,该blob可能右边与下边值为0 blob = im_list_to_blob(processed_ims) return blob, im_scales def _project_im_rois(im_rois, im_scale_factor): """Project image RoIs into the rescaled training image.""" rois = im_rois * im_scale_factor return rois def _get_bbox_regression_labels(bbox_target_data, num_classes): """Bounding-box regression targets are stored in a compact form in the roidb. This function expands those targets into the 4-of-4*K representation used by the network (i.e. only one class has non-zero targets). The loss weights are similarly expanded. Returns: bbox_target_data (ndarray): N x 4K blob of regression targets bbox_inside_weights (ndarray): N x 4K blob of loss weights """ clss = np.array(bbox_target_data[:, 0], dtype=np.uint16, copy=True) bbox_targets = np.zeros((clss.size, 4 * num_classes), dtype=np.float32) bbox_inside_weights = np.zeros(bbox_targets.shape, dtype=np.float32) inds = np.where(clss > 0)[0] for ind in inds: cls = clss[ind] start = 4 * cls end = start + 4 bbox_targets[ind, start:end] = bbox_target_data[ind, 1:] bbox_inside_weights[ind, start:end] = cfg.TRAIN.BBOX_INSIDE_WEIGHTS return bbox_targets, bbox_inside_weights def _vis_minibatch(im_blob, rois_blob, labels_blob, overlaps): """Visualize a mini-batch for debugging.""" import matplotlib.pyplot as plt for i in xrange(rois_blob.shape[0]): rois = rois_blob[i, :] im_ind = rois[0] roi = rois[1:] im = im_blob[im_ind, :, :, :].transpose((1, 2, 0)).copy() im += cfg.PIXEL_MEANS im = im[:, :, (2, 1, 0)] im = im.astype(np.uint8) cls = labels_blob[i] plt.imshow(im) print 'class: ', cls, ' overlap: ', overlaps[i] plt.gca().add_patch( plt.Rectangle((roi[0], roi[1]), roi[2] - roi[0], roi[3] - roi[1], fill=False, edgecolor='r', linewidth=3) ) plt.show()
相关文章推荐
- Faster-RCNN_TF代码解读16:roi_data_layer/roidb.py
- Faster-RCNN_TF代码解读14:roi_data_layer/layer.py
- 7. anchor_target_layer_tf.py ( Faster-RCNN_TF代码解读)
- 9. proposal_target_layer_tf.py ( Faster-RCNN_TF代码解读)
- Faster-RCNN_TF代码解读17:anchor_target_layer_tf.py
- Faster-RCNN_TF代码解读9:proposal_target_layer_tf.py
- Faster-RCNN_TF代码解读10:proposal_layer_tf.py
- Faster-RCNN_TF代码解读4:config.py
- 2. factory.py ( Faster-RCNN_TF代码解读)
- 6. network.py ( Faster-RCNN_TF代码解读)
- Faster-RCNN_TF代码解读5:networks/factory.py
- Faster-RCNN_TF代码解读6:pascal_voc.py
- Faster-RCNN_TF代码解读7:VGGnet_train.py
- 3. train.py ( Faster-RCNN_TF代码解读)
- Faster-RCNN_TF代码解读11:imdb.py
- Faster-RCNN_TF代码解读18:generate_anchors.py
- Faster-RCNN_TF代码解读8:networks.py
- Faster-RCNN_TF代码解读12:bbox_transform.py
- Faster-RCNN_TF代码解读20:blob.py
- Faster-RCNN_TF代码解读13:nms_wrapper.py