您的位置:首页 > 移动开发 > Objective-C

GOTURN(Generic Object Tracking Using Regression Networks ECCV2016)

2016-08-18 11:37 609 查看
This tracker is able to track objects at 100 fps. This real-time speed is due to two factors.

First, most previous neural network trackers are trained online. However, training neural networks is a slow process, leading to slow tracking. In contrast, our tracker is trained offline to learn a generic relationship between appearance and motion, so no online training is required.

Second, most trackers take a classification-based approah, classifying many image patches to find the target object. In contrast, this tracker used a regression-based approach, requiring just a single feed-forward pass through the network to regresses directly to the location of the target object.

The combination of offline training and one-pass regression leads to a significant speed-up compared to previous approaches and allows it to track objects at real-time speeds.

下面这个是train使用的网络,跟deploy有点不一样哦。





自己根据这个模型和网络实现的test过程,使用的是vot2014的数据库进行的测试,在测试过程中,在序列检测的过程中使用除第一帧(使用gounding truth)外,其他都是使用前一帧的结果进行target的获取,这使用过程中,发现,GOTURN回出现当相似的物体相遇之后,跟踪有时候回出现跳转到另外一个物体(分析:可以是在序列过程中误差的叠加造成forward时候的target一些偏差导致跟踪错误),还有,对于可形变物体跟踪效果也不好。(使用的时候要注意,输出的结果要除以10,才是得到的是左上角,和右下角相对于patch的位置)。

#include <string>
#include <vector>

#include "boost/algorithm/string.hpp"
#include "google/protobuf/text_format.h"
#include <opencv2/opencv.hpp>

#include "caffe/blob.hpp"
#include "caffe/layer.hpp"
#include "caffe/common.hpp"
#include "caffe/net.hpp"
#include "caffe/proto/caffe.pb.h"
#include "caffe/util/db.hpp"
#include "caffe/util/format.hpp"
#include "caffe/util/io.hpp"
#include <stdio.h>
#include <malloc.h>
#include <fstream>
#include <boost/progress.hpp>
#include <fstream>
#include <stdio.h>

#include <boost/math/special_functions/next.hpp>
#include <boost/random.hpp>

#include <limits>

#include "caffe/common.hpp"
#include "caffe/util/math_functions.hpp"
#include "caffe/util/rng.hpp"

//#include "caffe/util/math_functions.hpp"

using caffe::Blob;
using caffe::Caffe;
using caffe::Datum;
using caffe::Net;
using caffe::Layer;
using std::string;
namespace db = caffe::db;

void loaddata_target(boost::shared_ptr<Net<float> >& net, std::string image_path);
void loaddata_image(boost::shared_ptr<Net<float> >& net, std::string image_path);

void loaddata_target(boost::shared_ptr<Net<float> >& net, cv::Mat image);
void loaddata_image(boost::shared_ptr<Net<float> >& net, cv::Mat image);

//int lap(int x1_min,int x1_max,int x2_min,int x2_max);
int main(int argc, char** argv){
Caffe::set_mode(Caffe::GPU);
boost::shared_ptr<Net<float> > net(new Net<float>(argv[1], caffe::TEST));
net->CopyTrainedLayersFromBinaryProto(argv[2]);

//argv[3]表示的是包含pictures的文件夹
cv::Mat target, image;
cv::Mat pre_image, cur_image;
cv::Point point1, point2;
float coord;
char* filepath = new char[100];
std::ifstream in;//;(string(string(argv[3])));
sprintf(filepath, "%s/groundtruth.txt",argv[3]);
in.open(filepath);
delete [] filepath;
in >> coord;
in >> coord;
in >> coord;
point1.x = int(coord);
in >> coord;
point1.y = int(coord);
in >> coord;
in >> coord;
in >> coord;
point2.x = int(coord);
in >> coord;
point2.y = int(coord);
in.close();

char* image_path = new char[100];
int count = 1;
int cols,rows;
int w,h;
while(1){
//std::cout << point1 << " " << point2 << std::endl;
sprintf(image_path,"%s/%.8d.jpg",argv[3],count++);
pre_image = cv::imread(image_path);
//std::cout << image_path << std::endl;

sprintf(image_path,"%s/%.8d.jpg",argv[3],count++);
cur_image = cv::imread(image_path);

cols = cur_image.cols;
rows = cur_image.rows;

if(cols == 0 || rows == 0) break;

w = point2.x - point1.x;
h = point2.y - point1.y;

//std::cout << w <<" h=" << h << std::endl;

point1.x = point1.x - w/2;
point1.y = point1.y - h/2;
point2.x = point2.x + w/2;
point2.y = point2.y + h/2;

//std::cout << point1 << " " << point2 << std::endl;

point1.x = point1.x < 0 ? 0 : point1.x;
point1.y = point1.y < 0 ? 0 : point1.y;
point2.x = point2.x > cols ? cols : point2.x;
point2.y = point2.y > rows ? rows : point2.y;

// std::cout << point1 << " " << point2 << std::endl;

target = pre_image(cv::Rect(point1,point2));
image  = cur_image(cv::Rect(point1,point2));

loaddata_target(net,target);
loaddata_image(net,image);
net->Forward();

Blob<float>* output_layer = net->output_blobs()[0];
std::vector<int> points;
points.push_back(int(image.cols*((output_layer->cpu_data()[0]/10))));
points.push_back(int(image.rows*((output_layer->cpu_data()[1]/10))));
points.push_back(int(image.cols*((output_layer->cpu_data()[2]/10))));
points.push_back(int(image.rows*((output_layer->cpu_data()[3]/10))));

point2.x = point1.x + points[2];
point2.y = point1.y+ points[3];
point1.x  += points[0];
point1.y  += points[1];

cv::rectangle(cur_image, cv::Rect(point1,point2),cv::Scalar(0,225,255),1);

cv::imshow("target",target);

cv::imshow("image",cur_image);
if(count == 3)
cv::waitKey(0);
cv::waitKey(33);

//std::cout << image_path << std::endl;
//target = pre_image();
// break;
}
delete [] image_path;
return 1;
}

void loaddata_target(boost::shared_ptr<Net<float> >& net, std::string image_path){
Blob<float>* input_layer = net->input_blobs()[0];
int width, height;
width = input_layer->width();
height = input_layer->height();
int size = width*height;
cv::Mat image = cv::imread(image_path,-1);
cv::Mat image_resized;
cv::resize(image, image_resized, cv::Size(height, width));
float* input_data = input_layer->mutable_cpu_data();
int temp,idx;
for(int i = 0; i < height; ++i){
uchar* pdata = image_resized.ptr<uchar>(i);
for(int j = 0; j < width; ++j){
temp = 3*j;
idx = i*width+j;
input_data[idx] = (pdata[temp+2]);
input_data[idx+size] = (pdata[temp+1]);
input_data[idx+2*size] = (pdata[temp+0]);
}
}
//cv::imshow("image",image_resized);
}

void loaddata_image(boost::shared_ptr<Net<float> >& net, std::string image_path){
Blob<float>* input_layer = net->input_blobs()[1];
int width, height;
width = input_layer->width();
height = input_layer->height();
int size = width*height;
cv::Mat image = cv::imread(image_path,-1);
cv::Mat image_resized;
cv::resize(image, image_resized, cv::Size(height, width));
float* input_data = input_layer->mutable_cpu_data();
int temp,idx;
for(int i = 0; i < height; ++i){
uchar* pdata = image_resized.ptr<uchar>(i);
for(int j = 0; j < width; ++j){
temp = 3*j;
idx = i*width+j;
input_data[idx] = (pdata[temp+2]);
input_data[idx+size] = (pdata[temp+1]);
input_data[idx+2*size] = (pdata[temp+0]);
}
}
//cv::waitKey(0);
//cv::imshow("image",image_resized);
}

void loaddata_target(boost::shared_ptr<Net<float> >& net, cv::Mat image){
Blob<float>* input_layer = net->input_blobs()[0];
int width, height;
width = input_layer->width();
height = input_layer->height();
int size = width*height;
//cv::Mat image = cv::imread(image_path,-1);
cv::Mat image_resized;
cv::resize(image, image_resized, cv::Size(height, width));
float* input_data = input_layer->mutable_cpu_data();
int temp,idx;
for(int i = 0; i < height; ++i){
uchar* pdata = image_resized.ptr<uchar>(i);
for(int j = 0; j < width; ++j){
temp = 3*j;
idx = i*width+j;
input_data[idx] = (pdata[temp+2])-104;
input_data[idx+size] = (pdata[temp+1])-117;
input_data[idx+2*size] = (pdata[temp+0])-123;
}
}
//cv::imshow("image",image_resized);
}

void loaddata_image(boost::shared_ptr<Net<float> >& net, cv::Mat image){
Blob<float>* input_layer = net->input_blobs()[1];
int width, height;
width = input_layer->width();
height = input_layer->height();
int size = width*height;
//cv::Mat image = cv::imread(image_path,-1);
cv::Mat image_resized;
cv::resize(image, image_resized, cv::Size(height, width));
float* input_data = input_layer->mutable_cpu_data();
int temp,idx;
for(int i = 0; i < height; ++i){
uchar* pdata = image_resized.ptr<uchar>(i);
for(int j = 0; j < width; ++j){
temp = 3*j;
idx = i*width+j;
input_data[idx] = (pdata[temp+2])-104;
input_data[idx+size] = (pdata[temp+1])-117;
input_data[idx+2*size] = (pdata[temp+0])-123;
}
}
//cv::waitKey(0);
//cv::imshow("image",image_resized);
}


deploy.prototxt

name: "CaffeNet"

input: "target"
input: "image"

#target
input_dim: 1
input_dim: 3
input_dim: 227
input_dim: 227

#image
input_dim: 1
input_dim: 3
input_dim: 227
input_dim: 227

layer {
name: "conv1"
type: "Convolution"
bottom: "target"
top: "conv1"
param {
lr_mult: 0
decay_mult: 1
}
param {
lr_mult: 0
decay_mult: 0
}
convolution_param {
num_output: 96
kernel_size: 11
stride: 4
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "relu1"
type: "ReLU"
bottom: "conv1"
top: "conv1"
}
layer {
name: "pool1"
type: "Pooling"
bottom: "conv1"
top: "pool1"
pooling_param {
pool: MAX
kernel_size: 3
stride: 2
}
}
layer {
name: "norm1"
type: "LRN"
bottom: "pool1"
top: "norm1"
lrn_param {
local_size: 5
alpha: 0.0001
beta: 0.75
}
}
layer {
name: "conv2"
type: "Convolution"
bottom: "norm1"
top: "conv2"
param {
lr_mult: 0
decay_mult: 1
}
param {
lr_mult: 0
decay_mult: 0
}
convolution_param {
num_output: 256
pad: 2
kernel_size: 5
group: 2
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 1
}
}
}
layer {
name: "relu2"
type: "ReLU"
bottom: "conv2"
top: "conv2"
}
layer {
name: "pool2"
type: "Pooling"
bottom: "conv2"
top: "pool2"
pooling_param {
pool: MAX
kernel_size: 3
stride: 2
}
}
layer {
name: "norm2"
type: "LRN"
bottom: "pool2"
top: "norm2"
lrn_param {
local_size: 5
alpha: 0.0001
beta: 0.75
}
}
layer {
name: "conv3"
type: "Convolution"
bottom: "norm2"
top: "conv3"
param {
lr_mult: 0
decay_mult: 1
}
param {
lr_mult: 0
decay_mult: 0
}
convolution_param {
num_output: 384
pad: 1
kernel_size: 3
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "relu3"
type: "ReLU"
bottom: "conv3"
top: "conv3"
}
layer {
name: "conv4"
type: "Convolution"
bottom: "conv3"
top: "conv4"
param {
lr_mult: 0
decay_mult: 1
}
param {
lr_mult: 0
decay_mult: 0
}
convolution_param {
num_output: 384
pad: 1
kernel_size: 3
group: 2
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 1
}
}
}
layer {
name: "relu4"
type: "ReLU"
bottom: "conv4"
top: "conv4"
}
layer {
name: "conv5"
type: "Convolution"
bottom: "conv4"
top: "conv5"
param {
lr_mult: 0
decay_mult: 1
}
param {
lr_mult: 0
decay_mult: 0
}
convolution_param {
num_output: 256
pad: 1
kernel_size: 3
group: 2
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 1
}
}
}
layer {
name: "relu5"
type: "ReLU"
bottom: "conv5"
top: "conv5"
}
layer {
name: "pool5"
type: "Pooling"
bottom: "conv5"
top: "pool5"
pooling_param {
pool: MAX
kernel_size: 3
stride: 2
}
}

layer {
name: "conv1_p"
type: "Convolution"
bottom: "image"
top: "conv1_p"
param {
lr_mult: 0
decay_mult: 1
}
param {
lr_mult: 0
decay_mult: 0
}
convolution_param {
num_output: 96
kernel_size: 11
stride: 4
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "relu1_p"
type: "ReLU"
bottom: "conv1_p"
top: "conv1_p"
}
layer {
name: "pool1_p"
type: "Pooling"
bottom: "conv1_p"
top: "pool1_p"
pooling_param {
pool: MAX
kernel_size: 3
stride: 2
}
}
layer {
name: "norm1_p"
type: "LRN"
bottom: "pool1_p"
top: "norm1_p"
lrn_param {
local_size: 5
alpha: 0.0001
beta: 0.75
}
}
layer {
name: "conv2_p"
type: "Convolution"
bottom: "norm1_p"
top: "conv2_p"
param {
lr_mult: 0
decay_mult: 1
}
param {
lr_mult: 0
decay_mult: 0
}
convolution_param {
num_output: 256
pad: 2
kernel_size: 5
group: 2
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 1
}
}
}
layer {
name: "relu2_p"
type: "ReLU"
bottom: "conv2_p"
top: "conv2_p"
}
layer {
name: "pool2_p"
type: "Pooling"
bottom: "conv2_p"
top: "pool2_p"
pooling_param {
pool: MAX
kernel_size: 3
stride: 2
}
}
layer {
name: "norm2_p"
type: "LRN"
bottom: "pool2_p"
top: "norm2_p"
lrn_param {
local_size: 5
alpha: 0.0001
beta: 0.75
}
}
layer {
name: "conv3_p"
type: "Convolution"
bottom: "norm2_p"
top: "conv3_p"
param {
lr_mult: 0
decay_mult: 1
}
param {
lr_mult: 0
decay_mult: 0
}
convolution_param {
num_output: 384
pad: 1
kernel_size: 3
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "relu3_p"
type: "ReLU"
bottom: "conv3_p"
top: "conv3_p"
}
layer {
name: "conv4_p"
type: "Convolution"
bottom: "conv3_p"
top: "conv4_p"
param {
lr_mult: 0
decay_mult: 1
}
param {
lr_mult: 0
decay_mult: 0
}
convolution_param {
num_output: 384
pad: 1
kernel_size: 3
group: 2
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 1
}
}
}
layer {
name: "relu4_p"
type: "ReLU"
bottom: "conv4_p"
top: "conv4_p"
}
layer {
name: "conv5_p"
type: "Convolution"
bottom: "conv4_p"
top: "conv5_p"
param {
lr_mult: 0
decay_mult: 1
}
param {
lr_mult: 0
decay_mult: 0
}
convolution_param {
num_output: 256
pad: 1
kernel_size: 3
group: 2
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 1
}
}
}
layer {
name: "relu5_p"
type: "ReLU"
bottom: "conv5_p"
top: "conv5_p"
}
layer {
name: "pool5_p"
type: "Pooling"
bottom: "conv5_p"
top: "pool5_p"
pooling_param {
pool: MAX
kernel_size: 3
stride: 2
}
}

layer {
name: "concat"
type: "Concat"
bottom: "pool5"
bottom: "pool5_p"
top: "pool5_concat"
concat_param {
axis: 1
}
}

layer {
name: "fc6-new"
type: "InnerProduct"
bottom: "pool5_concat"
top: "fc6"
param {
lr_mult: 10
decay_mult: 1
}
param {
lr_mult: 20
decay_mult: 0
}
inner_product_param {
num_output: 4096
weight_filler {
type: "gaussian"
std: 0.005
}
bias_filler {
type: "constant"
value: 1
}
}
}
layer {
name: "relu6"
type: "ReLU"
bottom: "fc6"
top: "fc6"
}
layer {
name: "drop6"
type: "Dropout"
bottom: "fc6"
top: "fc6"
dropout_param {
dropout_ratio: 0.5
}
}
layer {
name: "fc7-new"
type: "InnerProduct"
bottom: "fc6"
top: "fc7"
param {
lr_mult: 10
decay_mult: 1
}
param {
lr_mult: 20
decay_mult: 0
}
inner_product_param {
num_output: 4096
weight_filler {
type: "gaussian"
std: 0.005
}
bias_filler {
type: "constant"
value: 1
}
}
}
layer {
name: "relu7"
type: "ReLU"
bottom: "fc7"
top: "fc7"
}
layer {
name: "drop7"
type: "Dropout"
bottom: "fc7"
top: "fc7"
dropout_param {
dropout_ratio: 0.5
}
}
layer {
name: "fc7-newb"
type: "InnerProduct"
bottom: "fc7"
top: "fc7b"
param {
lr_mult: 10
decay_mult: 1
}
param {
lr_mult: 20
decay_mult: 0
}
inner_product_param {
num_output: 4096
weight_filler {
type: "gaussian"
std: 0.005
}
bias_filler {
type: "constant"
value: 1
}
}
}
layer {
name: "relu7b"
type: "ReLU"
bottom: "fc7b"
top: "fc7b"
}
layer {
name: "drop7b"
type: "Dropout"
bottom: "fc7b"
top: "fc7b"
dropout_param {
dropout_ratio: 0.5
}
}

layer {
name: "fc8-shapes"
type: "InnerProduct"
bottom: "fc7b"
top: "fc8"
param {
lr_mult: 10
decay_mult: 1
}
param {
lr_mult: 20
decay_mult: 0
}
inner_product_param {
num_output: 4
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 0
}
}
}
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: