7. anchor_target_layer_tf.py ( Faster-RCNN_TF代码解读)

7. /lib/rpn_msr/anchor_target_layer_tf.py

本文件将anchors分配给gt targets,产生anchor classification labels和bounding-box regression targets.


# --------------------------------------------------------
# Faster R-CNN
# Copyright (c) 2015 Microsoft
# Licensed under The MIT License [see LICENSE for details]
# Written by Ross Girshick and Sean Bell
# --------------------------------------------------------

import os
import yaml
from fast_rcnn.config import cfg
import numpy as np
import numpy.random as npr
from generate_anchors import generate_anchors
from utils.cython_bbox import bbox_overlaps
from fast_rcnn.bbox_transform import bbox_transform
import pdb

DEBUG = False
#输入分别为rpn_cls_score层输出,GT信息,image信息,输入data,_feat_stride = [16,],anchor_scales = [8, 16, 32]
def anchor_target_layer(rpn_cls_score, gt_boxes, im_info, data, _feat_stride = [16,], anchor_scales = [4 ,8, 16, 32]):
Assign anchors to ground-truth targets. Produces anchor classification
labels and bounding-box regression targets.
_anchors = generate_anchors(scales=np.array(anchor_scales))
_num_anchors = _anchors.shape[0]

print 'anchors:'
print _anchors
print 'anchor shapes:'
print np.hstack((
_anchors[:, 2::4] - _anchors[:, 0::4],
_anchors[:, 3::4] - _anchors[:, 1::4],
_counts = cfg.EPS
_sums = np.zeros((1, 4))
_squared_sums = np.zeros((1, 4))
_fg_sum = 0
_bg_sum = 0
_count = 0
# allow boxes to sit over the edge by a small amount
_allowed_border =  0
# map of shape (..., H, W)
#height, width = rpn_cls_score.shape[1:3]

im_info = im_info[0]

# Algorithm:
# for each (H, W) location i
#   generate 9 anchor boxes centered on cell i
#   apply predicted bbox deltas at cell i to each of the 9 anchors
# filter out-of-image anchors
# measure GT overlap
assert rpn_cls_score.shape[0] == 1, \
'Only single item batches are supported'
# map of shape (..., H, W)
height, width = rpn_cls_score.shape[1:3]

print 'AnchorTargetLayer: height', height, 'width', width
print ''
print 'im_size: ({}, {})'.format(im_info[0], im_info[1])
print 'scale: {}'.format(im_info[2])
print 'height, width: ({}, {})'.format(height, width)
print 'rpn: gt_boxes.shape', gt_boxes.shape
print 'rpn: gt_boxes', gt_boxes

# 1. Generate proposals from bbox deltas and shifted anchors
#产生横向偏移值,偏移值的个数为width,以600 × 1000的图像为例,会有64个偏移值,因为width=1000/16=64
shift_x = np.arange(0, width) * _feat_stride
# 产生纵向偏移值,偏移值的个数为height,以600 × 1000的图像为例,会有39个偏移值,因为height=600/16=39(??有异议)
shift_y = np.arange(0, height) * _feat_stride
shift_x, shift_y = np.meshgrid(shift_x, shift_y)
# shift_x,shift_y均为39×64的二维数组,对应位置的元素组合即构成图像上需要偏移量大小(偏移量大小是相对与图像最
# 左上角的那9个anchor的偏移量大小),也就是说总共会得到2496个偏移值对。这些偏移值对与初始的anchor相加即可得到
# 所有的anchors,所以对于600×1000的图像,总共会产生2496×9个anchors,且存储在all_anchors变量中
#note: _feat_stride的值不是随便确定的,在经过vgg卷积神经网络后,一共有4个maxpool层,其余conv层pad方式为SAME,可以找到当前featuremap点对应原图像点
#[[ 0 0 0 0]
# [16 0 16 0]
# [32 0 32 0]
# [ 0 16 0 16]
# [16 16 16 16]
# [32 16 32 16]
# [ 0 32 0 32]
# [16 32 16 32]
# [32 32 32 32]
# [ 0 48 0 48]
# [16 48 16 48]
# [32 48 32 48]]
# numpy.ravel()多维数组降为一维,组合得到一个(width*height,4)的数组
shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),
shift_x.ravel(), shift_y.ravel())).transpose()
# add A anchors (1, A, 4) to
# cell K shifts (K, 1, 4) to get
# shift anchors (K, A, 4)
# reshape to (K*A, 4) shifted anchors
A = _num_anchors
K = shifts.shape[0]
#(1, A, 4)与(K, 1, 4)的数组进行相加,得到(K, A, 4)数组,实验得证,每个(K, 1, 4)的4元素都依次与(1, A, 4)中的每一个4元素相加,最后得到(K, A, 4)数组
all_anchors = (_anchors.reshape((1, A, 4)) +
shifts.reshape((1, K, 4)).transpose((1, 0, 2)))
all_anchors = all_anchors.reshape((K * A, 4))
total_anchors = int(K * A)

# only keep anchors inside the image
#_allowed_border目前定义为0,其实他规定了一个(-_allowed_border,-_allowed_border)(im_info[1] + _allowed_border,im_info[0] + _allowed_border)
inds_inside = np.where(
(all_anchors[:, 0] >= -_allowed_border) &
(all_anchors[:, 1] >= -_allowed_border) &
(all_anchors[:, 2] < im_info[1] + _allowed_border) &  # width
(all_anchors[:, 3] < im_info[0] + _allowed_border)    # height

print 'total_anchors', total_anchors
print 'inds_inside', len(inds_inside)
# keep only inside anchors
anchors = all_anchors[inds_inside, :]
print 'anchors.shape', anchors.shape

# label: 1 is positive, 0 is negative, -1 is dont care
labels = np.empty((len(inds_inside), ), dtype=np.float32)

# overlaps between the anchors and the gt boxes
# overlaps (ex, gt)
overlaps = bbox_overlaps(
np.ascontiguousarray(anchors, dtype=np.float),
np.ascontiguousarray(gt_boxes, dtype=np.float))
argmax_overlaps = overlaps.argmax(axis=1)
max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps]
gt_argmax_overlaps = overlaps.argmax(axis=0)
gt_max_overlaps = overlaps[gt_argmax_overlaps,
#这句代码没意义,gt_argmax_overlaps已经是通过overlaps.argmax得到的,再用gt_argmax_overlaps得到 gt_max_overlaps,再用gt_max_overlaps得gt_argmax_overlaps,还是原来的gt_argmax_overlaps
gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0]
# assign bg labels first so that positive labels can clobber them
#将max_overlaps(与lables大小相同,其实都是对应与anchor)小于0.3的都认为是bg(back ground),设置标签为0
labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0

# fg label: for each gt, anchor with highest overlap
labels[gt_argmax_overlaps] = 1
# fg label: above threshold IOU
labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1
# assign bg labels last so that negative labels can clobber positives
labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0

# subsample positive labels if we have too many
fg_inds = np.where(labels == 1)[0]
if len(fg_inds) > num_fg:
disable_inds = npr.choice(
fg_inds, size=(len(fg_inds) - num_fg), replace=False)
labels[disable_inds] = -1

# subsample negative labels if we have too many
num_bg = cfg.TRAIN.RPN_BATCHSIZE - np.sum(labels == 1)
bg_inds = np.where(labels == 0)[0]
if len(bg_inds) > num_bg:
# 从fg_inds随机挑选出size个元素,存入disable_inds中
disable_inds = npr.choice(
bg_inds, size=(len(bg_inds) - num_bg), replace=False)
# 对应disable_inds的引索设置为-1,即随机将一部分背景样本设置为-1标签样本
labels[disable_inds] = -1
#print "was %s inds, disabling %s, now %s inds" % (
#len(bg_inds), len(disable_inds), np.sum(labels == 0))
bbox_targets = np.zeros((len(inds_inside), 4), dtype=np.float32)
#_compute_targets函数返回一个用于anchor回归成target的包含每个anchor回归值(dx、dy、dw、dh)的array,形状((len(inds_inside), 4),即(anchors.shape[0],4)
bbox_targets = _compute_targets(anchors, gt_boxes[argmax_overlaps, :])
bbox_inside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32)
#对应labels==1的引索,全零的四个元素变为(1.0, 1.0, 1.0, 1.0)
bbox_inside_weights[labels == 1, :] = np.array(cfg.TRAIN.RPN_BBOX_INSIDE_WEIGHTS)
bbox_outside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32)
# uniform weighting of examples (given non-uniform sampling)
num_examples = np.sum(labels >= 0)
positive_weights = np.ones((1, 4)) * 1.0 / num_examples
negative_weights = np.ones((1, 4)) * 1.0 / num_examples
assert ((cfg.TRAIN.RPN_POSITIVE_WEIGHT > 0) &
positive_weights = (cfg.TRAIN.RPN_POSITIVE_WEIGHT /
np.sum(labels == 1))
negative_weights = ((1.0 - cfg.TRAIN.RPN_POSITIVE_WEIGHT) /
np.sum(labels == 0))
bbox_outside_weights[labels == 1, :] = positive_weights
bbox_outside_weights[labels == 0, :] = negative_weights

_sums += bbox_targets[labels == 1, :].sum(axis=0)
_squared_sums += (bbox_targets[labels == 1, :] ** 2).sum(axis=0)
_counts += np.sum(labels == 1)
means = _sums / _counts
stds = np.sqrt(_squared_sums / _counts - means ** 2)
print 'means:'
print means
print 'stdevs:'
print stds

# map up to original set of anchors
labels = _unmap(labels, total_anchors, inds_inside, fill=-1)
bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0)
bbox_inside_weights = _unmap(bbox_inside_weights, total_anchors, inds_inside, fill=0)
bbox_outside_weights = _unmap(bbox_outside_weights, total_anchors, inds_inside, fill=0)

print 'rpn: max max_overlap', np.max(max_overlaps)
print 'rpn: num_positive', np.sum(labels == 1)
print 'rpn: num_negative', np.sum(labels == 0)
_fg_sum += np.sum(labels == 1)
_bg_sum += np.sum(labels == 0)
_count += 1
print 'rpn: num_positive avg', _fg_sum / _count
print 'rpn: num_negative avg', _bg_sum / _count

# labels
#NOTE:由于越往后信息归类越精确,所以labels.reshape((1, height, width, A))顺序正常的
#之后transpose(0, 3, 1, 2),此时最精确信息为width,此时以width信息进行fastest聚类
# aa = np.array(range(1, 37))
# print aa
# kk = aa.reshape(1, 3, 3, 4)
# print kk
# cc = kk.transpose(0, 3, 1, 2)
# print cc
# kkk = cc.reshape(12, 3)
# print kkk
labels = labels.reshape((1, height, width, A)).transpose(0, 3, 1, 2)
labels = labels.reshape((1, 1, A * height, width))
rpn_labels = labels
# bbox_targets
bbox_targets = bbox_targets \
.reshape((1, height, width, A * 4)).transpose(0, 3, 1, 2)

rpn_bbox_targets = bbox_targets
# bbox_inside_weights
bbox_inside_weights = bbox_inside_weights \
.reshape((1, height, width, A * 4)).transpose(0, 3, 1, 2)
#assert bbox_inside_weights.shape[2] == height
#assert bbox_inside_weights.shape[3] == width

rpn_bbox_inside_weights = bbox_inside_weights

# bbox_outside_weights
bbox_outside_weights = bbox_outside_weights \
.reshape((1, height, width, A * 4)).transpose(0, 3, 1, 2)
#assert bbox_outside_weights.shape[2] == height
#assert bbox_outside_weights.shape[3] == width

rpn_bbox_outside_weights = bbox_outside_weights

return rpn_labels,rpn_bbox_targets,rpn_bbox_inside_weights,rpn_bbox_outside_weights

def _unmap(data, count, inds, fill=0):
""" Unmap a subset of item (data) back to the original set of items (of
size count) """
if len(data.shape) == 1:
ret = np.empty((count, ), dtype=np.float32)
ret[inds] = data
ret = np.empty((count, ) + data.shape[1:], dtype=np.float32)
ret[inds, :] = data
return ret

def _compute_targets(ex_rois, gt_rois):
"""Compute bounding-box regression targets for an image."""
assert ex_rois.shape[0] == gt_rois.shape[0]
assert ex_rois.shape[1] == 4
assert gt_rois.shape[1] == 5
return bbox_transform(ex_rois, gt_rois[:, :4]).astype(np.float32, copy=False)
