SSD中ssd_detect调用caffe解读
2017-03-25 09:02
281 查看
SSD目录下,examples/ssd/下有一ssd_detect.cpp文件,编译连接后生成ssd_deetct.bin可以加以调用,该文件主要是对cpp_classification.cpp的改写,是一个利用C++调用Caffe的主体步骤,下面对其加以解读。
// This is a demo code for using a SSD model to do detection. // The code is modified from examples/cpp_classification/classification.cpp. // Usage: // ssd_detect [FLAGS] model_file weights_file list_file // // where model_file is the .prototxt file defining the network architecture, and // weights_file is the .caffemodel file containing the network parameters, and // list_file contains a list of image files with the format as follows: // folder/img1.JPEG // folder/img2.JPEG // list_file can also contain a list of video files with the format as follows: // folder/video1.mp4 // folder/video2.mp4 // #include <caffe/caffe.hpp> #ifdef USE_OPENCV #include <opencv2/core/core.hpp> #include <opencv2/highgui/highgui.hpp> #include <opencv2/imgproc/imgproc.hpp> #endif // USE_OPENCV #include <algorithm> #include <iomanip> #include <iosfwd> #include <memory> #include <string> #include <utility> #include <vector> #ifdef USE_OPENCV using namespace caffe; // NOLINT(build/namespaces) //定义检测器 class Detector { public: Detector(const string& model_file, const string& weights_file, const string& mean_file, const string& mean_value); std::vector<vector<float> > Detect(const cv::Mat& img); private: void SetMean(const string& mean_file, const string& mean_value); //对mean_进行初始化 void WrapInputLayer(std::vector<cv::Mat>* input_channels); //将 input_channels与网络输入绑定 void Preprocess(const cv::Mat& img, std::vector<cv::Mat>* input_channels); //一系列的预处理 private: shared_ptr<Net<float> > net_; //网络指针 cv::Size input_geometry_; //输入图片的size int num_channels_; //输入图片的channel cv::Mat mean_; //均值图片 }; //构造函数 Detector::Detector(const string& model_file, const string& weights_file, const string& mean_file, const string& mean_value) { #ifdef CPU_ONLY //定义工作模式CPU or GPU Caffe::set_mode(Caffe::CPU); #else Caffe::set_mode(Caffe::GPU); #endif /* Load the network. */ net_.reset(new Net<float>(model_file, TEST)); //从model_file中读取网络结构,初始化网络 net_->CopyTrainedLayersFrom(weights_file); //从权值文件中读取网络参数,初始化net_ CHECK_EQ(net_->num_inputs(), 1) << "Network should have exactly one input."; CHECK_EQ(net_->num_outputs(), 1) << "Network should have exactly one output."; Blob<float>* input_layer = net_->input_blobs()[0]; num_channels_ = input_layer->channels(); //读取输入图片的channel CHECK(num_channels_ == 3 || num_channels_ == 1) << "Input layer should have 1 or 3 channels."; input_geometry_ = cv::Size(input_layer->width(), input_layer->height()); //读取输入图片的size /* Load the binaryproto mean file. */ SetMean(mean_file, mean_value); //初始化均值图片,至此所有成员变量都被初始化了 } std::vector<vector<float> > Detector::Detect(const cv::Mat& img) { //检测器,输入图片,返回结果,每个vector代表一个结果,
//存有位置及信任程度信息 Blob<float>* input_layer = net_->input_blobs()[0]; input_layer->Reshape(1, num_channels_, input_geometry_.height, input_geometry_.width); /* Forward dimension change to all layers. */ net_->Reshape(); //对网络进行reshape std::vector<cv::Mat> input_channels; WrapInputLayer(&input_channels); //这地方比较有意思,将网络输入与input_channels绑定 ,后面再提 Preprocess(img, &input_channels); //一些列预处理,后面详解 net_->Forward(); //网络的前向传播 /* Copy the output layer to a std::vector */ Blob<float>* result_blob = net_->output_blobs()[0]; const float* result = result_blob->cpu_data(); //读取输出信息 const int num_det = result_blob->height(); //由此可见,只用了最后最后两维存储信息(只有一张图片,到现在没搞清前两维存什么)
//height:检测到的数量。width:检测到的每个目标的信息 vector<vector<float> > detections; for (int k = 0; k < num_det; ++k) { if (result[0] == -1) { //-1代表是背景 // Skip invalid detection. result += 7; //由此可见。每个目标占7个位置,后面解释每个位置的意思 continue; } vector<float> detection(result, result + 7); //vector的构造方法之一 detections.push_back(detection); result += 7; } return detections; } /* Load the mean file in binaryproto format. */ void Detector::SetMean(const string& mean_file, const string& mean_value) { //设置均值文件,Caffe中有两种设置均值文件的方式,
//mean_file or mean_value,mean_file类似于用caffe做图像分类时需要
//提供的lmdb文件,是关于每个像素点的均值,也就是对所有图片关于像素点
//均值, mean_value只有三个值,分别代表三个通道的均值,此函数初始化了
//均值图片mean_这一成员变量,此处不再详解 cv::Scalar channel_mean; if (!mean_file.empty()) { CHECK(mean_value.empty()) << "Cannot specify mean_file and mean_value at the same time"; BlobProto blob_proto; ReadProtoFromBinaryFileOrDie(mean_file.c_str(), &blob_proto); /* Convert from BlobProto to Blob<float> */ Blob<float> mean_blob; mean_blob.FromProto(blob_proto); CHECK_EQ(mean_blob.channels(), num_channels_) << "Number of channels of mean file doesn't match input layer."; /* The format of the mean file is planar 32-bit float BGR or grayscale. */ std::vector<cv::Mat> channels; float* data = mean_blob.mutable_cpu_data(); //data is a pointer that point the mean_blob for (int i = 0; i < num_channels_; ++i) { /* Extract an individual channel. */ cv::Mat channel(mean_blob.height(), mean_blob.width(), CV_32FC1, data); // 注意这种构造方式,将Mat的头指针与data同指向,而不进行拷贝,可去
4000 //docs.opencv.org见详细解释 channels.push_back(channel); data += mean_blob.height() * mean_blob.width(); } /* Merge the separate channels into a single image. */ cv::Mat mean; cv::merge(channels, mean); /* Compute the global mean pixel value and create a mean image * filled with this value. */ channel_mean = cv::mean(mean); mean_ = cv::Mat(input_geometry_, mean.type(), channel_mean); } if (!mean_value.empty()) { CHECK(mean_file.empty()) << "Cannot specify mean_file and mean_value at the same time"; stringstream ss(mean_value); vector<float> values; string item; while (getline(ss, item, ',')) { float value = std::atof(item.c_str()); values.push_back(value); } CHECK(values.size() == 1 || values.size() == num_channels_) << "Specify either 1 mean_value or as many as channels: " << num_channels_; std::vector<cv::Mat> channels; for (int i = 0; i < num_channels_; ++i) { /* Extract an individual channel. */ cv::Mat channel(input_geometry_.height, input_geometry_.width, CV_32FC1, cv::Scalar(values[i])); //利用scalar对Mat进行初始化的方式很好,
//例如Mat(height,width,CV_32F2,Scalar(1,2)),2个channel的图片,一层为1,
//一层为2 channels.push_back(channel); } cv::merge(channels, mean_); } } /* Wrap the input layer of the network in separate cv::Mat objects * (one per channel). This way we save one memcpy operation and we * don't need to rely on cudaMemcpy2D. The last preprocessing * operation will write the separate channels directly to the input //bang ding * layer. */ void Detector::WrapInputLayer(std::vector<cv::Mat>* input_channels) { //这里将输入input_channels与网络的输入绑定(wrap) Blob<float>* input_layer = net_->input_blobs()[0]; int width = input_layer->width(); int height = input_layer->height(); float* input_data = input_layer->mutable_cpu_data(); for (int i = 0; i < input_layer->channels(); ++i) { cv::Mat channel(height, width, CV_32FC1, input_data); //就是这一步完成了绑定,将Mat的头指针与input_data指向相同,也就意味着,
//向Mat里写东西,就等同于向网络的输入写数据 input_channels->push_back(channel); input_data += width * height; } } void Detector::Preprocess(const cv::Mat& img, std::vector<cv::Mat>* input_channels) { /* Convert the input image to the input image format of the network. */ //各种初始化 cv::Mat sample; //输入图片的channel与网络规定的channel不同,怎么办? if (img.channels() == 3 && num_channels_ == 1) cv::cvtColor(img, sample, cv::COLOR_BGR2GRAY); else if (img.channels() == 4 && num_channels_ == 1) cv::cvtColor(img, sample, cv::COLOR_BGRA2GRAY); else if (img.channels() == 4 && num_channels_ == 3) cv::cvtColor(img, sample, cv::COLOR_BGRA2BGR); else if (img.channels() == 1 && num_channels_ == 3) cv::cvtColor(img, sample, cv::COLOR_GRAY2BGR); else sample = img; cv::Mat sample_resized; //输入图片的size,与网络规定不同,怎么办 if (sample.size() != input_geometry_) cv::resize(sample, sample_resized, input_geometry_); else sample_resized = sample; cv::Mat sample_float; //将像素值转化为float if (num_channels_ == 3) sample_resized.convertTo(sample_float, CV_32FC3); else sample_resized.convertTo(sample_float, CV_32FC1); cv::Mat sample_normalized; cv::subtract(sample_float, mean_, sample_normalized); //减去均值图像,0均值化 /* This operation will write the separate BGR planes directly to the * input layer of the network because it is wrapped by the cv::Mat * objects in input_channels. */ cv::split(sample_normalized, *input_channels); //这一步完成数据输入,感觉放在外面更容易理解,结合Detect方法看,上面提到
//input_channel已经和网络输入绑定,即指向相同,所以将数据写入input_channel的
//同时,就写入了网络输入 CHECK(reinterpret_cast<float*>(input_channels->at(0).data) == net_->input_blobs()[0]->cpu_data()) << "Input channels are not wrapping the input layer of the network."; } //一堆定义命令行输入的指令,配置Caffe时用到的一个包,好像是gflags实现的,第一个参数是名称,第二个是默认值,第三个是解释
//例如在命令行通过 -mean_value=" "就可以对mean_value进行赋值 DEFINE_string(mean_file, "", "The mean file used to subtract from the input image."); DEFINE_string(mean_value, "104,117,123", "If specified, can be one value or can be same as image channels" " - would subtract from the corresponding channel). Separated by ','." "Either mean_file or mean_value should be provided, not both."); DEFINE_string(file_type, "image", "The file type in the list_file. Currently support image and video."); DEFINE_string(out_file, "", "If provided, store the detection results in the out_file."); DEFINE_double(confidence_threshold, 0.01, "Only store detections with score higher than the threshold."); DEFINE_string(detect_type, "trace", "Do detection:detect Do tracing :trace"); int main(int argc, char** argv) { ::google::InitGoogleLogging(argv[0]); // Print output to stderr (while still logging) FLAGS_alsologtostderr = 1; #ifndef GFLAGS_GFLAGS_H_ namespace gflags = google; #endif gflags::SetUsageMessage("Do detection using SSD mode.\n" "Usage:\n" " ssd_detect [FLAGS] model_file weights_file list_file\n"); gflags::ParseCommandLineFlags(&argc, &argv, true); if (argc < 4) { gflags::ShowUsageWithFlagsRestrict(argv[0], "examples/ssd/ssd_detect"); return 1; } //这里不再细讲,都是常规的操作 const string& model_file = argv[1]; const string& weights_file = argv[2]; const string& mean_file = FLAGS_mean_file; //通过这种方式,读入上一步注释的赋值 const string& mean_value = FLAGS_mean_value; const string& file_type = FLAGS_file_type; const string& out_file = FLAGS_out_file; const float confidence_threshold = FLAGS_confidence_threshold; const string& detect_type=FLAGS_detect_type; // Initialize the network. Detector detector(model_file, weights_file, mean_file, mean_value); // Set the output mode. std::streambuf* buf = std::cout.rdbuf(); std::ofstream outfile; if (!out_file.empty()) { outfile.open(out_file.c_str()); if (outfile.good()) { buf = outfile.rdbuf(); } } std::ostream out(buf); // Process image one by one. std::ifstream infile(argv[3]); std::string file; while (infile >> file) { if (file_type == "image") { cv::Mat img = cv::imread(file, -1); CHECK(!img.empty()) << "Unable to decode image " << file; std::vector<vector<float> > detections = detector.Detect(img); /* Print the detection results. */ for (int i = 0; i < detections.size(); ++i) { const vector<float>& d = detections[i]; // Detection format: [image_id, label, score, xmin, ymin, xmax, ymax].这里指明了那七个值 CHECK_EQ(d.size(), 7); const float score = d[2]; //第三个是confidence if (score >= confidence_threshold) { out << file << " "; out << static_cast<int>(d[1]) << " "; out << score << " "; out << static_cast<int>(d[3] * img.cols) << " "; out << static_cast<int>(d[4] * img.rows) << " "; out << static_cast<int>(d[5] * img.cols) << " "; out << static_cast<int>(d[6] * img.rows) << std::endl; } } } else if (file_type == "video" && detect_type=="trace") { cv::VideoCapture cap(file); if (!cap.isOpened()) { LOG(FATAL) << "Failed to open video: " << file; } cv::Mat img; int frame_count = 0; while (true) { bool success = cap.read(img); if (!success) { LOG(INFO) << "Process " << frame_count << " frames from " << file; break; } CHECK(!img.empty()) << "Error when read frame"; std::vector<vector<float> > detections = detector.Detect(img); /* Print the detection results. */ for (int i = 0; i < detections.size(); ++i) { const vector<float>& d = detections[i]; // Detection format: [image_id, label, score, xmin, ymin, xmax, ymax]. CHECK_EQ(d.size(), 7); const float score = d[2]; if (score >= confidence_threshold) { out << file << "_"; out << std::setfill('0') << std::setw(6) << frame_count << " "; out << static_cast<int>(d[1]) << " "; out << score << " "; out << static_cast<int>(d[3] * img.cols) << " "; out << static_cast<int>(d[4] * img.rows) << " "; out << static_cast<int>(d[5] * img.cols) << " "; out << static_cast<int>(d[6] * img.rows) << std::endl; } } ++frame_count; } if (cap.isOpened()) { cap.release(); } } else if(file_type=="video" && detect_type=="detect"){ cv::VideoCapture cap(file); bool detected=false; if (!cap.isOpened()) { LOG(FATAL) << "Failed to open video: " << file; } cv::Mat img; int frame_count = 0; while (true) { bool success = cap.read(img); if (!success) { LOG(INFO) << "Process " << frame_count << " frames from " << file; break; } CHECK(!img.empty()) << "Error when read frame"; std::vector<vector<float> > detections = detector.Detect(img); /* Print the detection results. */ for (int i = 0; i < detections.size(); ++i) { const vector<float>& d = detections[i]; // Detection format: [image_id, label, score, xmin, ymin, xmax, ymax]. CHECK_EQ(d.size(), 7); const float score = d[2]; if (score >= confidence_threshold) { out << file << "_"; out << std::setfill('0') << std::setw(6) << frame_count << " "; out << static_cast<int>(d[1]) << " "; out << score << " "; out << static_cast<int>(d[3] * img.cols) << " "; out << static_cast<int>(d[4] * img.rows) << " "; out << static_cast<int>(d[5] * img.cols) << " "; out << static_cast<int>(d[6] * img.rows) << std::endl; detected=true; break; } } ++frame_count; if(detected==true) break; } if (cap.isOpened()) { cap.release(); } }else { LOG(FATAL) << "Unknown file_type: " << file_type; } } return 0; } #else int main(int argc, char** argv) { LOG(FATAL) << "This example requires OpenCV; compile with USE_OPENCV."; } #endif // USE_OPENCV至此,解释完了ssd_detect.cpp的内容,类似的可以看与之大体相同的cpp_classification的内容,也在examples下面,通过这个函数,就可以实现对于训练好的网络的调用,后期将会对如何利用C++对网络进行训练详解
相关文章推荐
- 【转】SSD的caffe源码解读 -- 数据增强
- 计算机视觉caffe之路第五篇:关于ssd_detect.cpp的使用方法
- Ubutnu系统下用QT等IDE调用Caffe、SSD框架,编译Caffe实现分类检测等工程应用的方法
- SSD的caffe源码解读 -- 数据增强
- 如何将caffe中的所需.o文件编译成可调用的.so文件
- 深度学习(九)caffe预测、特征可视化python接口调用
- Caffe代码解读(四):solver_param
- 自己写程序调用caffe库(classify例子)
- Caffe源码解读(十二):自定义数据输入层
- Jetson TX1 开发教程(7)--TensorRT加速Caffe-SSD
- Caffe源码解读:pooling_layer的前向传播与反向传播
- Ubuntu16.04,CUDA8.0,Caffe-ssd,OpenCV3.1配置指南(二)
- Caffe代码解读1--从一段命令行开始
- Windows 7+VS2013将caffe编译成动态链接库并进行调用
- 关于Windows下caffe-ssd编译需要修改的地方
- SSD+caffe︱Single Shot MultiBox Detector 目标检测+fine-tuning(二)
- Android端调用Caffe模型实现CNN分类
- 深度学习之caffe入门一一配置SSD中遇到的问题
- caffe之SSD算法词袋解析
- JETSON TX2安装caffe-SSD、tensorflow