opencv HOG中detectMultiScale函数详解
2015-06-06 11:55
447 查看
函数作用:进行多尺度目标检测
973 //返回测试图片中水平方向和垂直方向共有多少个检测窗口,不能整除的话,多于的边界会不被计算在内?
974 Size HOGCache::windowsInImage(Size
imageSize, Size winStride) const
975 {
976 return Size((imageSize.width
- winSize.width)/winStride.width + 1,
977 (imageSize.height
- winSize.height)/winStride.height + 1);
978 }
979
980
981 //给定图片的大小,已经检测窗口滑动的大小和测试图片中的检测窗口的索引,得到该索引处
982 //检测窗口的尺寸,包括坐标信息
983 Rect HOGCache::getWindow(Size
imageSize, Size winStride, int idx) const
984 {
985 int nwindowsX
= (imageSize.width - winSize.width)/winStride.width + 1;
986 int y
= idx / nwindowsX;//商
987 int x
= idx - nwindowsX*y;//余数
988 return Rect(
x*winStride.width, y*winStride.height, winSize.width, winSize.height );
989 }
990
991
992 void HOGDescriptor::compute(const Mat&
img, vector<float>& descriptors,
993 Size
winStride, Size padding,
994 const vector<Point>&
locations) const
995 {
996 //Size()表示长和宽都是0
997 if(
winStride == Size() )
998 winStride
= cellSize;
999 //gcd为求最大公约数,如果采用默认值的话,则2者相同
1000 Size cacheStride(gcd(winStride.width,
blockStride.width),
1001 gcd(winStride.height,
blockStride.height));
1002 size_t nwindows =
locations.size();
1003 //alignSize(m,
n)返回n的倍数大于等于m的最小值
1004 padding.width = (int)alignSize(std::max(padding.width, 0),
cacheStride.width);
1005 padding.height = (int)alignSize(std::max(padding.height, 0),
cacheStride.height);
1006 Size paddedImgSize(img.cols
+ padding.width*2, img.rows + padding.height*2);
1007
1008 HOGCache cache(this,
img, padding, padding, nwindows == 0, cacheStride);
1009
1010 if(
!nwindows )
1011 //Mat::area()表示为Mat的面积
1012 nwindows = cache.windowsInImage(paddedImgSize,
winStride).area();
1013
1014 const HOGCache::BlockData*
blockData = &cache.blockData[0];
1015
1016 int nblocks
= cache.nblocks.area();
1017 int blockHistogramSize
= cache.blockHistogramSize;
1018 size_t dsize = getDescriptorSize();//一个hog的描述长度
1019 //resize()为改变矩阵的行数,如果减少矩阵的行数则只保留减少后的
1020 //那些行,如果是增加行数,则保留所有的行。
1021 //这里将描述子长度扩展到整幅图片
1022 descriptors.resize(dsize*nwindows);
1023
1024 for(
size_t i = 0; i < nwindows; i++ )
1025 {
1026 //descriptor为第i个检测窗口的描述子首位置。
1027 float*
descriptor = &descriptors[i*dsize];
1028
1029 Point pt0;
1030 //非空
1031 if(
!locations.empty() )
1032 {
1033 pt0 = locations[i];
1034 //非法的点
1035 if(
pt0.x < -padding.width || pt0.x > img.cols + padding.width - winSize.width ||
1036 pt0.y
< -padding.height || pt0.y > img.rows + padding.height - winSize.height )
1037 continue;
1038 }
1039 //locations为空
1040 else
1041 {
1042 //pt0为没有扩充前图像对应的第i个检测窗口
1043 pt0 = cache.getWindow(paddedImgSize,
winStride, (int)i).tl() - Point(padding);
1044 CV_Assert(pt0.x
% cacheStride.width == 0 && pt0.y % cacheStride.height == 0);
1045 }
1046
1047 for( int j
= 0; j < nblocks; j++ )
1048 {
1049 const HOGCache::BlockData&
bj = blockData[j];
1050 //pt为block的左上角相对检测图片的坐标
1051 Point pt =
pt0 + bj.imgOffset;
1052
1053 //dst为该block在整个测试图片的描述子的位置
1054 float*
dst = descriptor + bj.histOfs;
1055 const float*
src = cache.getBlock(pt, dst);
1056 if(
src != dst )
1057 #ifdef HAVE_IPP
1058 ippsCopy_32f(src,dst,blockHistogramSize);
1059 #else
1060 for( int k
= 0; k < blockHistogramSize; k++ )
1061 dst[k]
= src[k];
1062 #endif
1063 }
1064 }
1065 }
1066
1067
1068 void HOGDescriptor::detect(const Mat&
img,
1069 vector<Point>& hits,
vector<double>& weights, double hitThreshold,
1070 Size winStride, Size
padding, const vector<Point>& locations) const
1071 {
1072 //hits里面存的是符合检测到目标的窗口的左上角顶点坐标
1073 hits.clear();
1074 if(
svmDetector.empty() )//svm算子不能为空,因为这是HOGDescriptor类的成员函数,里面用了很多成员变量
1075 return;
1076
1077 if(
winStride == Size() )//如果窗口步长为0
,则将其设为cell的大小
1078 winStride = cellSize;
1079 Size cacheStride(gcd(winStride.width,
blockStride.width), //CacheStride为winStride和BlockStride的最大公约数
1080 gcd(winStride.height,
blockStride.height));
1081 size_t nwindows =
locations.size();//locations为预先传入的窗口子集,在这个子集中求目标,这个版本中没有用
1082 padding.width = (int)alignSize(std::max(padding.width, 0),
cacheStride.width);//将padding改成大于等于padding
,但是可以被cacheStride整除的最小数
1083 padding.height = (int)alignSize(std::max(padding.height, 0),
cacheStride.height);
1084 Size paddedImgSize(img.cols
+ padding.width*2, img.rows + padding.height*2);//padding 以后的图片大小
1085 //这个结构的应该是应该是保存HOG描述子和其一些列参数的,构造函数会将一切数据都算好
1086 HOGCache cache(this,
img, padding, padding, nwindows == 0, cacheStride);
1087
1088 if(
!nwindows )
1089 nwindows = cache.windowsInImage(paddedImgSize,
winStride).area();//图片包含的检测窗口的个数
1090 //BlockData结构体是对应的block数据的偏移量。histOfs和imgOffset.其中histOfs表示为该block对整个滑动窗口内hog描述算子的贡献那部分向量的起始位置;imgOffset为该block在滑动窗口图片中的坐标(左上角坐标)。
1091 const HOGCache::BlockData*
blockData = &cache.blockData[0];
1092
1093 int nblocks
= cache.nblocks.area();//每个检测窗口的block数量
1094 int blockHistogramSize
= cache.blockHistogramSize;//每个block直方图的维数
1095 size_t dsize = getDescriptorSize();
1096
1097 double rho
= svmDetector.size() > dsize ? svmDetector[dsize] : 0;//判断有没有加偏移量,rho
1098 vector<float>
blockHist(blockHistogramSize);
1099
1100 for(
size_t i = 0; i < nwindows; i++ )//遍历每一个window将其得分与hitThreshold看其是否是目标物
1101 {
1102 Point pt0;
1103 if(
!locations.empty() )
1104 {
1105 pt0 = locations[i];
1106 if(
pt0.x < -padding.width || pt0.x > img.cols + padding.width - winSize.width ||
1107 pt0.y
< -padding.height || pt0.y > img.rows + padding.height - winSize.height )
1108 continue;
1109 }
1110 else
1111 { //给定padding后图片的大小,返回第i个滑动窗口在原图片中的坐标信息,得到该索引处
1112 pt0 = cache.getWindow(paddedImgSize, winStride, (int)i).tl()
- Point(padding);
1113 CV_Assert(pt0.x
% cacheStride.width == 0 && pt0.y % cacheStride.height == 0);
1114 }
1115 double s
= rho;
1116 //svmVec指向svmDetector最前面那个元素
1117 const float*
svmVec = &svmDetector[0];
1118 #ifdef HAVE_IPP
1119 int j;
1120 #else
1121 int j,
k;
1122 #endif
1123 for(
j = 0; j < nblocks; j++, svmVec += blockHistogramSize )
1124 {
1125 const HOGCache::BlockData&
bj = blockData[j];//当前block在window中的偏移量
1126 Point pt =
pt0 + bj.imgOffset;//pt0为window在待检测图片中的偏移量,pt是当前block在图片中的偏移量
1127
1128 //vec为测试图片pt处的block贡献的描述子指针
1129 const float*
vec = cache.getBlock(pt, &blockHist[0]);//函数返回一个block描述子的指针
1130 #ifdef HAVE_IPP
1131 Ipp32f partSum;
1132 ippsDotProd_32f(vec,svmVec,blockHistogramSize,&partSum);
1133 s += (double)partSum;
1134 #else
1135 for(
k = 0; k <= blockHistogramSize - 4;
k += 4 ) //描述子与svm向量相乘
1136 //const
float* svmVec = &svmDetector[0];
1137 s += vec[k]*svmVec[k]
+ vec[k+1]*svmVec[k+1]
+
1138 vec[k+2]*svmVec[k+2]
+ vec[k+3]*svmVec[k+3];
1139 for(
; k < blockHistogramSize; k++ )
1140 s += vec[k]*svmVec[k];
1141 #endif
1142 }
1143 if(
s >= hitThreshold )//s是上一个for循环中每个block累加的结果,s即当前window的检测得分
1144 {
1145 hits.push_back(pt0);
1146 weights.push_back(s);
1147 }
1148 }
1149 }
1150
1151 //不用保留检测到目标的可信度,即权重
1152 void HOGDescriptor::detect(const Mat&
img, vector<Point>& hits, double hitThreshold,
1153 Size
winStride, Size padding, const vector<Point>& locations) const
1154 {
1155 vector<double>
weightsV;
1156 detect(img, hits,
weightsV, hitThreshold, winStride, padding, locations);
1157 }
1158
1159 struct HOGInvoker
1160 {
1161 HOGInvoker( const HOGDescriptor*
_hog, const Mat& _img,
1162 double _hitThreshold,
Size _winStride, Size _padding,
1163 const double*
_levelScale, ConcurrentRectVector* _vec,
1164 ConcurrentDoubleVector*
_weights=0, ConcurrentDoubleVector* _scales=0 )
1165 {
1166 hog = _hog;
1167 img = _img;
1168 hitThreshold =
_hitThreshold;
1169 winStride = _winStride;
1170 padding = _padding;
1171 levelScale = _levelScale;
1172 vec = _vec;
1173 weights = _weights;
1174 scales = _scales;
1175 }
1176
1177 void operator()( const BlockedRange&
range ) const
1178 {
1179 int i,
i1 = range.begin(), i2 = range.end();
1180 double minScale
= i1 > 0 ? levelScale[i1] : i2 > 1 ?
levelScale[i1+1] : std::max(img.cols, img.rows);//当i1=0,i2=1时 minScale取max(img.cols,
img.rows)
1181 //缩放的最大尺寸,缩放之后的图像不会达到这个尺寸
1182 Size maxSz(cvCeil(img.cols/minScale),
cvCeil(img.rows/minScale));
1183 Mat smallerImgBuf(maxSz,
img.type());//当i1==0时smallerImgBuf的大小为1*1,可能是因为i1==0时没有尺寸缩放,没有尺寸缩放时不需要smallerImgBuf来初始化
1184 vector<Point> locations;
1185 vector<double>
hitsWeights;
1186
1187 for(
i = i1; i < i2; i++ )
1188 {
1189 double scale
= levelScale[i];
1190 Size sz(cvRound(img.cols/scale),
cvRound(img.rows/scale));
1191 //smallerImg只是构造一个指针,并没有复制数据
1192 Mat smallerImg(sz,
img.type(), smallerImgBuf.data);
1193 //没有尺寸缩放
1194 if(
sz == img.size() )
1195 smallerImg
= Mat(sz, img.type(), img.data, img.step);
1196 //有尺寸缩放
1197 else
1198 resize(img,
smallerImg, sz);
1199 //检测的实际函数,该函数实际上是将返回的值存在locations和histWeights中
1200 //其中locations存的是目标区域的左上角坐标
1201 hog->detect(smallerImg,
locations, hitsWeights, hitThreshold, winStride, padding);
1202 Size scaledWinSize
= Size(cvRound(hog->winSize.width*scale), cvRound(hog->winSize.height*scale));//计算目标区域的大小
1203 for(
size_t j = 0; j < locations.size(); j++ )
1204 {
1205 //保存目标区域
1206 vec->push_back(Rect(cvRound(locations[j].x*scale),
1207 cvRound(locations[j].y*scale),
1208 scaledWinSize.width,
scaledWinSize.height));
1209 //保存缩放尺寸
1210 if (scales)
{
1211 scales->push_back(scale);
1212 }
1213 }
1214 //保存svm计算后的结果值,weight指针有效才保存
1215 if (weights
&& (!hitsWeights.empty()))
1216 {
1217 for (size_t
j = 0; j < locations.size(); j++)
1218 {
1219 weights->push_back(hitsWeights[j]);
1220 }
1221 }
1222 }
1223 }
1224
1225 const HOGDescriptor*
hog;
1226 Mat img;
1227 double hitThreshold;
1228 Size winStride;
1229 Size padding;
1230 const double*
levelScale;
1232 ConcurrentRectVector*
vec;
1234 ConcurrentDoubleVector*
weights;
1235 ConcurrentDoubleVector*
scales;
1236 };
1237
1238
1239 void HOGDescriptor::detectMultiScale(
1240 const Mat&
img, vector<Rect>& foundLocations, vector<double>& foundWeights,
1241 double hitThreshold,
Size winStride, Size padding,
1242 double scale0, double finalThreshold, bool useMeanshiftGrouping) const
1243 {
1244 double scale
= 1.;
1245 int levels
= 0;
1246
1247 vector<double>
levelScale;//保存图片将要缩放的尺度
1249 //nlevels默认的是64层 scale0是图像缩小参数
1250 for(
levels = 0; levels < nlevels; levels++ )
1251 {
1252 levelScale.push_back(scale);
1257 //只考虑测试图片尺寸比检测窗口尺寸大以及scale0>1的情况,
//不符合要求中断循环。所以nlevel大一点没关系(并不会特别影响速度),关键的参数其实是scale0
1253 if(
cvRound(img.cols/scale) < winSize.width ||
1254 cvRound(img.rows/scale)
< winSize.height ||
1255 scale0 <= 1 )
1256 break;
1258 scale *= scale0;
1259 }
1260 levels = std::max(levels, 1);
1261 levelScale.resize(levels);
1262
1263 ConcurrentRectVector
allCandidates;
1264 ConcurrentDoubleVector
tempScales;
1265 ConcurrentDoubleVector
tempWeights;
1266 vector<double>
foundScales;
1267
1268 //TBB并行计算,会将参数range 传到HOGInvoker结构体的()重载函数中,在这个里面对各个尺度的目标图片进行检测
1269 parallel_for(Range(0,
(int)levelScale.size()),
1270 HOGInvoker(this,
img, hitThreshold, winStride, padding, &levelScale[0], &allCandidates, &tempWeights, &tempScales));
1271 //将tempScales中的内容复制到foundScales中;这个参数其实没有什么用,保存的是检测到目标的图像对应的尺度
1272 std::copy(tempScales.begin(),
tempScales.end(), back_inserter(foundScales));
1274 foundLocations.clear();
1275 //将候选目标窗口保存在foundLocations中
1276 std::copy(allCandidates.begin(),
allCandidates.end(), back_inserter(foundLocations));
1277 foundWeights.clear();
1278 //将候选目标可信度保存在foundWeights中
1279 std::copy(tempWeights.begin(),
tempWeights.end(), back_inserter(foundWeights));
1280 //对矩形框进行聚类
1281 if ( useMeanshiftGrouping )
1282 {
1283 groupRectangles_meanshift(foundLocations,
foundWeights, foundScales, finalThreshold, winSize);
1284 }
1285 else
1286 {
1288 groupRectangles(foundLocations,
(int)finalThreshold, 0.2);
1289 }
1290 }
1291
1292 //不考虑目标的置信度,通过调用包含置信度的版本
1293 void HOGDescriptor::detectMultiScale(const Mat&
img, vector<Rect>& foundLocations,
1294 double hitThreshold,
Size winStride, Size padding,
1295 double scale0, double finalThreshold, bool useMeanshiftGrouping) const
1296 {
1297 vector<double>
foundWeights;
1298 detectMultiScale(img,
foundLocations, foundWeights, hitThreshold, winStride,
1299 padding,
scale0, finalThreshold, useMeanshiftGrouping);
1300 }
973 //返回测试图片中水平方向和垂直方向共有多少个检测窗口,不能整除的话,多于的边界会不被计算在内?
974 Size HOGCache::windowsInImage(Size
imageSize, Size winStride) const
975 {
976 return Size((imageSize.width
- winSize.width)/winStride.width + 1,
977 (imageSize.height
- winSize.height)/winStride.height + 1);
978 }
979
980
981 //给定图片的大小,已经检测窗口滑动的大小和测试图片中的检测窗口的索引,得到该索引处
982 //检测窗口的尺寸,包括坐标信息
983 Rect HOGCache::getWindow(Size
imageSize, Size winStride, int idx) const
984 {
985 int nwindowsX
= (imageSize.width - winSize.width)/winStride.width + 1;
986 int y
= idx / nwindowsX;//商
987 int x
= idx - nwindowsX*y;//余数
988 return Rect(
x*winStride.width, y*winStride.height, winSize.width, winSize.height );
989 }
990
991
992 void HOGDescriptor::compute(const Mat&
img, vector<float>& descriptors,
993 Size
winStride, Size padding,
994 const vector<Point>&
locations) const
995 {
996 //Size()表示长和宽都是0
997 if(
winStride == Size() )
998 winStride
= cellSize;
999 //gcd为求最大公约数,如果采用默认值的话,则2者相同
1000 Size cacheStride(gcd(winStride.width,
blockStride.width),
1001 gcd(winStride.height,
blockStride.height));
1002 size_t nwindows =
locations.size();
1003 //alignSize(m,
n)返回n的倍数大于等于m的最小值
1004 padding.width = (int)alignSize(std::max(padding.width, 0),
cacheStride.width);
1005 padding.height = (int)alignSize(std::max(padding.height, 0),
cacheStride.height);
1006 Size paddedImgSize(img.cols
+ padding.width*2, img.rows + padding.height*2);
1007
1008 HOGCache cache(this,
img, padding, padding, nwindows == 0, cacheStride);
1009
1010 if(
!nwindows )
1011 //Mat::area()表示为Mat的面积
1012 nwindows = cache.windowsInImage(paddedImgSize,
winStride).area();
1013
1014 const HOGCache::BlockData*
blockData = &cache.blockData[0];
1015
1016 int nblocks
= cache.nblocks.area();
1017 int blockHistogramSize
= cache.blockHistogramSize;
1018 size_t dsize = getDescriptorSize();//一个hog的描述长度
1019 //resize()为改变矩阵的行数,如果减少矩阵的行数则只保留减少后的
1020 //那些行,如果是增加行数,则保留所有的行。
1021 //这里将描述子长度扩展到整幅图片
1022 descriptors.resize(dsize*nwindows);
1023
1024 for(
size_t i = 0; i < nwindows; i++ )
1025 {
1026 //descriptor为第i个检测窗口的描述子首位置。
1027 float*
descriptor = &descriptors[i*dsize];
1028
1029 Point pt0;
1030 //非空
1031 if(
!locations.empty() )
1032 {
1033 pt0 = locations[i];
1034 //非法的点
1035 if(
pt0.x < -padding.width || pt0.x > img.cols + padding.width - winSize.width ||
1036 pt0.y
< -padding.height || pt0.y > img.rows + padding.height - winSize.height )
1037 continue;
1038 }
1039 //locations为空
1040 else
1041 {
1042 //pt0为没有扩充前图像对应的第i个检测窗口
1043 pt0 = cache.getWindow(paddedImgSize,
winStride, (int)i).tl() - Point(padding);
1044 CV_Assert(pt0.x
% cacheStride.width == 0 && pt0.y % cacheStride.height == 0);
1045 }
1046
1047 for( int j
= 0; j < nblocks; j++ )
1048 {
1049 const HOGCache::BlockData&
bj = blockData[j];
1050 //pt为block的左上角相对检测图片的坐标
1051 Point pt =
pt0 + bj.imgOffset;
1052
1053 //dst为该block在整个测试图片的描述子的位置
1054 float*
dst = descriptor + bj.histOfs;
1055 const float*
src = cache.getBlock(pt, dst);
1056 if(
src != dst )
1057 #ifdef HAVE_IPP
1058 ippsCopy_32f(src,dst,blockHistogramSize);
1059 #else
1060 for( int k
= 0; k < blockHistogramSize; k++ )
1061 dst[k]
= src[k];
1062 #endif
1063 }
1064 }
1065 }
1066
1067
1068 void HOGDescriptor::detect(const Mat&
img,
1069 vector<Point>& hits,
vector<double>& weights, double hitThreshold,
1070 Size winStride, Size
padding, const vector<Point>& locations) const
1071 {
1072 //hits里面存的是符合检测到目标的窗口的左上角顶点坐标
1073 hits.clear();
1074 if(
svmDetector.empty() )//svm算子不能为空,因为这是HOGDescriptor类的成员函数,里面用了很多成员变量
1075 return;
1076
1077 if(
winStride == Size() )//如果窗口步长为0
,则将其设为cell的大小
1078 winStride = cellSize;
1079 Size cacheStride(gcd(winStride.width,
blockStride.width), //CacheStride为winStride和BlockStride的最大公约数
1080 gcd(winStride.height,
blockStride.height));
1081 size_t nwindows =
locations.size();//locations为预先传入的窗口子集,在这个子集中求目标,这个版本中没有用
1082 padding.width = (int)alignSize(std::max(padding.width, 0),
cacheStride.width);//将padding改成大于等于padding
,但是可以被cacheStride整除的最小数
1083 padding.height = (int)alignSize(std::max(padding.height, 0),
cacheStride.height);
1084 Size paddedImgSize(img.cols
+ padding.width*2, img.rows + padding.height*2);//padding 以后的图片大小
1085 //这个结构的应该是应该是保存HOG描述子和其一些列参数的,构造函数会将一切数据都算好
1086 HOGCache cache(this,
img, padding, padding, nwindows == 0, cacheStride);
1087
1088 if(
!nwindows )
1089 nwindows = cache.windowsInImage(paddedImgSize,
winStride).area();//图片包含的检测窗口的个数
1090 //BlockData结构体是对应的block数据的偏移量。histOfs和imgOffset.其中histOfs表示为该block对整个滑动窗口内hog描述算子的贡献那部分向量的起始位置;imgOffset为该block在滑动窗口图片中的坐标(左上角坐标)。
1091 const HOGCache::BlockData*
blockData = &cache.blockData[0];
1092
1093 int nblocks
= cache.nblocks.area();//每个检测窗口的block数量
1094 int blockHistogramSize
= cache.blockHistogramSize;//每个block直方图的维数
1095 size_t dsize = getDescriptorSize();
1096
1097 double rho
= svmDetector.size() > dsize ? svmDetector[dsize] : 0;//判断有没有加偏移量,rho
1098 vector<float>
blockHist(blockHistogramSize);
1099
1100 for(
size_t i = 0; i < nwindows; i++ )//遍历每一个window将其得分与hitThreshold看其是否是目标物
1101 {
1102 Point pt0;
1103 if(
!locations.empty() )
1104 {
1105 pt0 = locations[i];
1106 if(
pt0.x < -padding.width || pt0.x > img.cols + padding.width - winSize.width ||
1107 pt0.y
< -padding.height || pt0.y > img.rows + padding.height - winSize.height )
1108 continue;
1109 }
1110 else
1111 { //给定padding后图片的大小,返回第i个滑动窗口在原图片中的坐标信息,得到该索引处
1112 pt0 = cache.getWindow(paddedImgSize, winStride, (int)i).tl()
- Point(padding);
1113 CV_Assert(pt0.x
% cacheStride.width == 0 && pt0.y % cacheStride.height == 0);
1114 }
1115 double s
= rho;
1116 //svmVec指向svmDetector最前面那个元素
1117 const float*
svmVec = &svmDetector[0];
1118 #ifdef HAVE_IPP
1119 int j;
1120 #else
1121 int j,
k;
1122 #endif
1123 for(
j = 0; j < nblocks; j++, svmVec += blockHistogramSize )
1124 {
1125 const HOGCache::BlockData&
bj = blockData[j];//当前block在window中的偏移量
1126 Point pt =
pt0 + bj.imgOffset;//pt0为window在待检测图片中的偏移量,pt是当前block在图片中的偏移量
1127
1128 //vec为测试图片pt处的block贡献的描述子指针
1129 const float*
vec = cache.getBlock(pt, &blockHist[0]);//函数返回一个block描述子的指针
1130 #ifdef HAVE_IPP
1131 Ipp32f partSum;
1132 ippsDotProd_32f(vec,svmVec,blockHistogramSize,&partSum);
1133 s += (double)partSum;
1134 #else
1135 for(
k = 0; k <= blockHistogramSize - 4;
k += 4 ) //描述子与svm向量相乘
1136 //const
float* svmVec = &svmDetector[0];
1137 s += vec[k]*svmVec[k]
+ vec[k+1]*svmVec[k+1]
+
1138 vec[k+2]*svmVec[k+2]
+ vec[k+3]*svmVec[k+3];
1139 for(
; k < blockHistogramSize; k++ )
1140 s += vec[k]*svmVec[k];
1141 #endif
1142 }
1143 if(
s >= hitThreshold )//s是上一个for循环中每个block累加的结果,s即当前window的检测得分
1144 {
1145 hits.push_back(pt0);
1146 weights.push_back(s);
1147 }
1148 }
1149 }
1150
1151 //不用保留检测到目标的可信度,即权重
1152 void HOGDescriptor::detect(const Mat&
img, vector<Point>& hits, double hitThreshold,
1153 Size
winStride, Size padding, const vector<Point>& locations) const
1154 {
1155 vector<double>
weightsV;
1156 detect(img, hits,
weightsV, hitThreshold, winStride, padding, locations);
1157 }
1158
1159 struct HOGInvoker
1160 {
1161 HOGInvoker( const HOGDescriptor*
_hog, const Mat& _img,
1162 double _hitThreshold,
Size _winStride, Size _padding,
1163 const double*
_levelScale, ConcurrentRectVector* _vec,
1164 ConcurrentDoubleVector*
_weights=0, ConcurrentDoubleVector* _scales=0 )
1165 {
1166 hog = _hog;
1167 img = _img;
1168 hitThreshold =
_hitThreshold;
1169 winStride = _winStride;
1170 padding = _padding;
1171 levelScale = _levelScale;
1172 vec = _vec;
1173 weights = _weights;
1174 scales = _scales;
1175 }
1176
1177 void operator()( const BlockedRange&
range ) const
1178 {
1179 int i,
i1 = range.begin(), i2 = range.end();
1180 double minScale
= i1 > 0 ? levelScale[i1] : i2 > 1 ?
levelScale[i1+1] : std::max(img.cols, img.rows);//当i1=0,i2=1时 minScale取max(img.cols,
img.rows)
1181 //缩放的最大尺寸,缩放之后的图像不会达到这个尺寸
1182 Size maxSz(cvCeil(img.cols/minScale),
cvCeil(img.rows/minScale));
1183 Mat smallerImgBuf(maxSz,
img.type());//当i1==0时smallerImgBuf的大小为1*1,可能是因为i1==0时没有尺寸缩放,没有尺寸缩放时不需要smallerImgBuf来初始化
1184 vector<Point> locations;
1185 vector<double>
hitsWeights;
1186
1187 for(
i = i1; i < i2; i++ )
1188 {
1189 double scale
= levelScale[i];
1190 Size sz(cvRound(img.cols/scale),
cvRound(img.rows/scale));
1191 //smallerImg只是构造一个指针,并没有复制数据
1192 Mat smallerImg(sz,
img.type(), smallerImgBuf.data);
1193 //没有尺寸缩放
1194 if(
sz == img.size() )
1195 smallerImg
= Mat(sz, img.type(), img.data, img.step);
1196 //有尺寸缩放
1197 else
1198 resize(img,
smallerImg, sz);
1199 //检测的实际函数,该函数实际上是将返回的值存在locations和histWeights中
1200 //其中locations存的是目标区域的左上角坐标
1201 hog->detect(smallerImg,
locations, hitsWeights, hitThreshold, winStride, padding);
1202 Size scaledWinSize
= Size(cvRound(hog->winSize.width*scale), cvRound(hog->winSize.height*scale));//计算目标区域的大小
1203 for(
size_t j = 0; j < locations.size(); j++ )
1204 {
1205 //保存目标区域
1206 vec->push_back(Rect(cvRound(locations[j].x*scale),
1207 cvRound(locations[j].y*scale),
1208 scaledWinSize.width,
scaledWinSize.height));
1209 //保存缩放尺寸
1210 if (scales)
{
1211 scales->push_back(scale);
1212 }
1213 }
1214 //保存svm计算后的结果值,weight指针有效才保存
1215 if (weights
&& (!hitsWeights.empty()))
1216 {
1217 for (size_t
j = 0; j < locations.size(); j++)
1218 {
1219 weights->push_back(hitsWeights[j]);
1220 }
1221 }
1222 }
1223 }
1224
1225 const HOGDescriptor*
hog;
1226 Mat img;
1227 double hitThreshold;
1228 Size winStride;
1229 Size padding;
1230 const double*
levelScale;
1232 ConcurrentRectVector*
vec;
1234 ConcurrentDoubleVector*
weights;
1235 ConcurrentDoubleVector*
scales;
1236 };
1237
1238
1239 void HOGDescriptor::detectMultiScale(
1240 const Mat&
img, vector<Rect>& foundLocations, vector<double>& foundWeights,
1241 double hitThreshold,
Size winStride, Size padding,
1242 double scale0, double finalThreshold, bool useMeanshiftGrouping) const
1243 {
1244 double scale
= 1.;
1245 int levels
= 0;
1246
1247 vector<double>
levelScale;//保存图片将要缩放的尺度
1249 //nlevels默认的是64层 scale0是图像缩小参数
1250 for(
levels = 0; levels < nlevels; levels++ )
1251 {
1252 levelScale.push_back(scale);
1257 //只考虑测试图片尺寸比检测窗口尺寸大以及scale0>1的情况,
//不符合要求中断循环。所以nlevel大一点没关系(并不会特别影响速度),关键的参数其实是scale0
1253 if(
cvRound(img.cols/scale) < winSize.width ||
1254 cvRound(img.rows/scale)
< winSize.height ||
1255 scale0 <= 1 )
1256 break;
1258 scale *= scale0;
1259 }
1260 levels = std::max(levels, 1);
1261 levelScale.resize(levels);
1262
1263 ConcurrentRectVector
allCandidates;
1264 ConcurrentDoubleVector
tempScales;
1265 ConcurrentDoubleVector
tempWeights;
1266 vector<double>
foundScales;
1267
1268 //TBB并行计算,会将参数range 传到HOGInvoker结构体的()重载函数中,在这个里面对各个尺度的目标图片进行检测
1269 parallel_for(Range(0,
(int)levelScale.size()),
1270 HOGInvoker(this,
img, hitThreshold, winStride, padding, &levelScale[0], &allCandidates, &tempWeights, &tempScales));
1271 //将tempScales中的内容复制到foundScales中;这个参数其实没有什么用,保存的是检测到目标的图像对应的尺度
1272 std::copy(tempScales.begin(),
tempScales.end(), back_inserter(foundScales));
1274 foundLocations.clear();
1275 //将候选目标窗口保存在foundLocations中
1276 std::copy(allCandidates.begin(),
allCandidates.end(), back_inserter(foundLocations));
1277 foundWeights.clear();
1278 //将候选目标可信度保存在foundWeights中
1279 std::copy(tempWeights.begin(),
tempWeights.end(), back_inserter(foundWeights));
1280 //对矩形框进行聚类
1281 if (
useMeanshiftGrouping )
1282 {
1283 groupRectangles_meanshift(foundLocations,
foundWeights, foundScales, finalThreshold, winSize);
1284 }
1285 else
1286 {
1288 groupRectangles(foundLocations,
(int)finalThreshold, 0.2);
1289 }
1290 }
1291
1292 //不考虑目标的置信度,通过调用包含置信度的版本
1293 void HOGDescriptor::detectMultiScale(const Mat&
img, vector<Rect>& foundLocations,
1294 double hitThreshold,
Size winStride, Size padding,
1295 double scale0, double finalThreshold, bool useMeanshiftGrouping) const
1296 {
1297 vector<double>
foundWeights;
1298 detectMultiScale(img,
foundLocations, foundWeights, hitThreshold, winStride,
1299 padding,
scale0, finalThreshold, useMeanshiftGrouping);
1300 }
函数接口 void HOGDescriptor::detectMultiScale( const Mat& img, vector<Rect>& foundLocations, vector<double>& foundWeights, double hitThreshold, Size winStride, Size padding, double scale0, double finalThreshold, bool useMeanshiftGrouping) const 参数注释<1>img:源图像。<2>foundlocations:检测出的物体的边缘。<3>foundWeights: 检测窗口得分<4>hit_threshold:阀值,特征向量和SVM划分超平面的距离,大于这个值的才作为目标返回。<4>win_stride:窗口步长,必须是block步长的整数倍。<5>padding:图片边缘补齐参数,gpu版本必须是(0,0)。<6>scale0:检测窗口增长参数。<7>finalThreshold:检测结果聚类参数<8>useMeanshiftGrouping:聚类方式选择的参数
代码注释:
973 //返回测试图片中水平方向和垂直方向共有多少个检测窗口,不能整除的话,多于的边界会不被计算在内?
974 Size HOGCache::windowsInImage(Size
imageSize, Size winStride) const
975 {
976 return Size((imageSize.width
- winSize.width)/winStride.width + 1,
977 (imageSize.height
- winSize.height)/winStride.height + 1);
978 }
979
980
981 //给定图片的大小,已经检测窗口滑动的大小和测试图片中的检测窗口的索引,得到该索引处
982 //检测窗口的尺寸,包括坐标信息
983 Rect HOGCache::getWindow(Size
imageSize, Size winStride, int idx) const
984 {
985 int nwindowsX
= (imageSize.width - winSize.width)/winStride.width + 1;
986 int y
= idx / nwindowsX;//商
987 int x
= idx - nwindowsX*y;//余数
988 return Rect(
x*winStride.width, y*winStride.height, winSize.width, winSize.height );
989 }
990
991
992 void HOGDescriptor::compute(const Mat&
img, vector<float>& descriptors,
993 Size
winStride, Size padding,
994 const vector<Point>&
locations) const
995 {
996 //Size()表示长和宽都是0
997 if(
winStride == Size() )
998 winStride
= cellSize;
999 //gcd为求最大公约数,如果采用默认值的话,则2者相同
1000 Size cacheStride(gcd(winStride.width,
blockStride.width),
1001 gcd(winStride.height,
blockStride.height));
1002 size_t nwindows =
locations.size();
1003 //alignSize(m,
n)返回n的倍数大于等于m的最小值
1004 padding.width = (int)alignSize(std::max(padding.width, 0),
cacheStride.width);
1005 padding.height = (int)alignSize(std::max(padding.height, 0),
cacheStride.height);
1006 Size paddedImgSize(img.cols
+ padding.width*2, img.rows + padding.height*2);
1007
1008 HOGCache cache(this,
img, padding, padding, nwindows == 0, cacheStride);
1009
1010 if(
!nwindows )
1011 //Mat::area()表示为Mat的面积
1012 nwindows = cache.windowsInImage(paddedImgSize,
winStride).area();
1013
1014 const HOGCache::BlockData*
blockData = &cache.blockData[0];
1015
1016 int nblocks
= cache.nblocks.area();
1017 int blockHistogramSize
= cache.blockHistogramSize;
1018 size_t dsize = getDescriptorSize();//一个hog的描述长度
1019 //resize()为改变矩阵的行数,如果减少矩阵的行数则只保留减少后的
1020 //那些行,如果是增加行数,则保留所有的行。
1021 //这里将描述子长度扩展到整幅图片
1022 descriptors.resize(dsize*nwindows);
1023
1024 for(
size_t i = 0; i < nwindows; i++ )
1025 {
1026 //descriptor为第i个检测窗口的描述子首位置。
1027 float*
descriptor = &descriptors[i*dsize];
1028
1029 Point pt0;
1030 //非空
1031 if(
!locations.empty() )
1032 {
1033 pt0 = locations[i];
1034 //非法的点
1035 if(
pt0.x < -padding.width || pt0.x > img.cols + padding.width - winSize.width ||
1036 pt0.y
< -padding.height || pt0.y > img.rows + padding.height - winSize.height )
1037 continue;
1038 }
1039 //locations为空
1040 else
1041 {
1042 //pt0为没有扩充前图像对应的第i个检测窗口
1043 pt0 = cache.getWindow(paddedImgSize,
winStride, (int)i).tl() - Point(padding);
1044 CV_Assert(pt0.x
% cacheStride.width == 0 && pt0.y % cacheStride.height == 0);
1045 }
1046
1047 for( int j
= 0; j < nblocks; j++ )
1048 {
1049 const HOGCache::BlockData&
bj = blockData[j];
1050 //pt为block的左上角相对检测图片的坐标
1051 Point pt =
pt0 + bj.imgOffset;
1052
1053 //dst为该block在整个测试图片的描述子的位置
1054 float*
dst = descriptor + bj.histOfs;
1055 const float*
src = cache.getBlock(pt, dst);
1056 if(
src != dst )
1057 #ifdef HAVE_IPP
1058 ippsCopy_32f(src,dst,blockHistogramSize);
1059 #else
1060 for( int k
= 0; k < blockHistogramSize; k++ )
1061 dst[k]
= src[k];
1062 #endif
1063 }
1064 }
1065 }
1066
1067
1068 void HOGDescriptor::detect(const Mat&
img,
1069 vector<Point>& hits,
vector<double>& weights, double hitThreshold,
1070 Size winStride, Size
padding, const vector<Point>& locations) const
1071 {
1072 //hits里面存的是符合检测到目标的窗口的左上角顶点坐标
1073 hits.clear();
1074 if(
svmDetector.empty() )//svm算子不能为空,因为这是HOGDescriptor类的成员函数,里面用了很多成员变量
1075 return;
1076
1077 if(
winStride == Size() )//如果窗口步长为0
,则将其设为cell的大小
1078 winStride = cellSize;
1079 Size cacheStride(gcd(winStride.width,
blockStride.width), //CacheStride为winStride和BlockStride的最大公约数
1080 gcd(winStride.height,
blockStride.height));
1081 size_t nwindows =
locations.size();//locations为预先传入的窗口子集,在这个子集中求目标,这个版本中没有用
1082 padding.width = (int)alignSize(std::max(padding.width, 0),
cacheStride.width);//将padding改成大于等于padding
,但是可以被cacheStride整除的最小数
1083 padding.height = (int)alignSize(std::max(padding.height, 0),
cacheStride.height);
1084 Size paddedImgSize(img.cols
+ padding.width*2, img.rows + padding.height*2);//padding 以后的图片大小
1085 //这个结构的应该是应该是保存HOG描述子和其一些列参数的,构造函数会将一切数据都算好
1086 HOGCache cache(this,
img, padding, padding, nwindows == 0, cacheStride);
1087
1088 if(
!nwindows )
1089 nwindows = cache.windowsInImage(paddedImgSize,
winStride).area();//图片包含的检测窗口的个数
1090 //BlockData结构体是对应的block数据的偏移量。histOfs和imgOffset.其中histOfs表示为该block对整个滑动窗口内hog描述算子的贡献那部分向量的起始位置;imgOffset为该block在滑动窗口图片中的坐标(左上角坐标)。
1091 const HOGCache::BlockData*
blockData = &cache.blockData[0];
1092
1093 int nblocks
= cache.nblocks.area();//每个检测窗口的block数量
1094 int blockHistogramSize
= cache.blockHistogramSize;//每个block直方图的维数
1095 size_t dsize = getDescriptorSize();
1096
1097 double rho
= svmDetector.size() > dsize ? svmDetector[dsize] : 0;//判断有没有加偏移量,rho
1098 vector<float>
blockHist(blockHistogramSize);
1099
1100 for(
size_t i = 0; i < nwindows; i++ )//遍历每一个window将其得分与hitThreshold看其是否是目标物
1101 {
1102 Point pt0;
1103 if(
!locations.empty() )
1104 {
1105 pt0 = locations[i];
1106 if(
pt0.x < -padding.width || pt0.x > img.cols + padding.width - winSize.width ||
1107 pt0.y
< -padding.height || pt0.y > img.rows + padding.height - winSize.height )
1108 continue;
1109 }
1110 else
1111 { //给定padding后图片的大小,返回第i个滑动窗口在原图片中的坐标信息,得到该索引处
1112 pt0 = cache.getWindow(paddedImgSize, winStride, (int)i).tl()
- Point(padding);
1113 CV_Assert(pt0.x
% cacheStride.width == 0 && pt0.y % cacheStride.height == 0);
1114 }
1115 double s
= rho;
1116 //svmVec指向svmDetector最前面那个元素
1117 const float*
svmVec = &svmDetector[0];
1118 #ifdef HAVE_IPP
1119 int j;
1120 #else
1121 int j,
k;
1122 #endif
1123 for(
j = 0; j < nblocks; j++, svmVec += blockHistogramSize )
1124 {
1125 const HOGCache::BlockData&
bj = blockData[j];//当前block在window中的偏移量
1126 Point pt =
pt0 + bj.imgOffset;//pt0为window在待检测图片中的偏移量,pt是当前block在图片中的偏移量
1127
1128 //vec为测试图片pt处的block贡献的描述子指针
1129 const float*
vec = cache.getBlock(pt, &blockHist[0]);//函数返回一个block描述子的指针
1130 #ifdef HAVE_IPP
1131 Ipp32f partSum;
1132 ippsDotProd_32f(vec,svmVec,blockHistogramSize,&partSum);
1133 s += (double)partSum;
1134 #else
1135 for(
k = 0; k <= blockHistogramSize - 4;
k += 4 ) //描述子与svm向量相乘
1136 //const
float* svmVec = &svmDetector[0];
1137 s += vec[k]*svmVec[k]
+ vec[k+1]*svmVec[k+1]
+
1138 vec[k+2]*svmVec[k+2]
+ vec[k+3]*svmVec[k+3];
1139 for(
; k < blockHistogramSize; k++ )
1140 s += vec[k]*svmVec[k];
1141 #endif
1142 }
1143 if(
s >= hitThreshold )//s是上一个for循环中每个block累加的结果,s即当前window的检测得分
1144 {
1145 hits.push_back(pt0);
1146 weights.push_back(s);
1147 }
1148 }
1149 }
1150
1151 //不用保留检测到目标的可信度,即权重
1152 void HOGDescriptor::detect(const Mat&
img, vector<Point>& hits, double hitThreshold,
1153 Size
winStride, Size padding, const vector<Point>& locations) const
1154 {
1155 vector<double>
weightsV;
1156 detect(img, hits,
weightsV, hitThreshold, winStride, padding, locations);
1157 }
1158
1159 struct HOGInvoker
1160 {
1161 HOGInvoker( const HOGDescriptor*
_hog, const Mat& _img,
1162 double _hitThreshold,
Size _winStride, Size _padding,
1163 const double*
_levelScale, ConcurrentRectVector* _vec,
1164 ConcurrentDoubleVector*
_weights=0, ConcurrentDoubleVector* _scales=0 )
1165 {
1166 hog = _hog;
1167 img = _img;
1168 hitThreshold =
_hitThreshold;
1169 winStride = _winStride;
1170 padding = _padding;
1171 levelScale = _levelScale;
1172 vec = _vec;
1173 weights = _weights;
1174 scales = _scales;
1175 }
1176
1177 void operator()( const BlockedRange&
range ) const
1178 {
1179 int i,
i1 = range.begin(), i2 = range.end();
1180 double minScale
= i1 > 0 ? levelScale[i1] : i2 > 1 ?
levelScale[i1+1] : std::max(img.cols, img.rows);//当i1=0,i2=1时 minScale取max(img.cols,
img.rows)
1181 //缩放的最大尺寸,缩放之后的图像不会达到这个尺寸
1182 Size maxSz(cvCeil(img.cols/minScale),
cvCeil(img.rows/minScale));
1183 Mat smallerImgBuf(maxSz,
img.type());//当i1==0时smallerImgBuf的大小为1*1,可能是因为i1==0时没有尺寸缩放,没有尺寸缩放时不需要smallerImgBuf来初始化
1184 vector<Point> locations;
1185 vector<double>
hitsWeights;
1186
1187 for(
i = i1; i < i2; i++ )
1188 {
1189 double scale
= levelScale[i];
1190 Size sz(cvRound(img.cols/scale),
cvRound(img.rows/scale));
1191 //smallerImg只是构造一个指针,并没有复制数据
1192 Mat smallerImg(sz,
img.type(), smallerImgBuf.data);
1193 //没有尺寸缩放
1194 if(
sz == img.size() )
1195 smallerImg
= Mat(sz, img.type(), img.data, img.step);
1196 //有尺寸缩放
1197 else
1198 resize(img,
smallerImg, sz);
1199 //检测的实际函数,该函数实际上是将返回的值存在locations和histWeights中
1200 //其中locations存的是目标区域的左上角坐标
1201 hog->detect(smallerImg,
locations, hitsWeights, hitThreshold, winStride, padding);
1202 Size scaledWinSize
= Size(cvRound(hog->winSize.width*scale), cvRound(hog->winSize.height*scale));//计算目标区域的大小
1203 for(
size_t j = 0; j < locations.size(); j++ )
1204 {
1205 //保存目标区域
1206 vec->push_back(Rect(cvRound(locations[j].x*scale),
1207 cvRound(locations[j].y*scale),
1208 scaledWinSize.width,
scaledWinSize.height));
1209 //保存缩放尺寸
1210 if (scales)
{
1211 scales->push_back(scale);
1212 }
1213 }
1214 //保存svm计算后的结果值,weight指针有效才保存
1215 if (weights
&& (!hitsWeights.empty()))
1216 {
1217 for (size_t
j = 0; j < locations.size(); j++)
1218 {
1219 weights->push_back(hitsWeights[j]);
1220 }
1221 }
1222 }
1223 }
1224
1225 const HOGDescriptor*
hog;
1226 Mat img;
1227 double hitThreshold;
1228 Size winStride;
1229 Size padding;
1230 const double*
levelScale;
1232 ConcurrentRectVector*
vec;
1234 ConcurrentDoubleVector*
weights;
1235 ConcurrentDoubleVector*
scales;
1236 };
1237
1238
1239 void HOGDescriptor::detectMultiScale(
1240 const Mat&
img, vector<Rect>& foundLocations, vector<double>& foundWeights,
1241 double hitThreshold,
Size winStride, Size padding,
1242 double scale0, double finalThreshold, bool useMeanshiftGrouping) const
1243 {
1244 double scale
= 1.;
1245 int levels
= 0;
1246
1247 vector<double>
levelScale;//保存图片将要缩放的尺度
1249 //nlevels默认的是64层 scale0是图像缩小参数
1250 for(
levels = 0; levels < nlevels; levels++ )
1251 {
1252 levelScale.push_back(scale);
1257 //只考虑测试图片尺寸比检测窗口尺寸大以及scale0>1的情况,
//不符合要求中断循环。所以nlevel大一点没关系(并不会特别影响速度),关键的参数其实是scale0
1253 if(
cvRound(img.cols/scale) < winSize.width ||
1254 cvRound(img.rows/scale)
< winSize.height ||
1255 scale0 <= 1 )
1256 break;
1258 scale *= scale0;
1259 }
1260 levels = std::max(levels, 1);
1261 levelScale.resize(levels);
1262
1263 ConcurrentRectVector
allCandidates;
1264 ConcurrentDoubleVector
tempScales;
1265 ConcurrentDoubleVector
tempWeights;
1266 vector<double>
foundScales;
1267
1268 //TBB并行计算,会将参数range 传到HOGInvoker结构体的()重载函数中,在这个里面对各个尺度的目标图片进行检测
1269 parallel_for(Range(0,
(int)levelScale.size()),
1270 HOGInvoker(this,
img, hitThreshold, winStride, padding, &levelScale[0], &allCandidates, &tempWeights, &tempScales));
1271 //将tempScales中的内容复制到foundScales中;这个参数其实没有什么用,保存的是检测到目标的图像对应的尺度
1272 std::copy(tempScales.begin(),
tempScales.end(), back_inserter(foundScales));
1274 foundLocations.clear();
1275 //将候选目标窗口保存在foundLocations中
1276 std::copy(allCandidates.begin(),
allCandidates.end(), back_inserter(foundLocations));
1277 foundWeights.clear();
1278 //将候选目标可信度保存在foundWeights中
1279 std::copy(tempWeights.begin(),
tempWeights.end(), back_inserter(foundWeights));
1280 //对矩形框进行聚类
1281 if ( useMeanshiftGrouping )
1282 {
1283 groupRectangles_meanshift(foundLocations,
foundWeights, foundScales, finalThreshold, winSize);
1284 }
1285 else
1286 {
1288 groupRectangles(foundLocations,
(int)finalThreshold, 0.2);
1289 }
1290 }
1291
1292 //不考虑目标的置信度,通过调用包含置信度的版本
1293 void HOGDescriptor::detectMultiScale(const Mat&
img, vector<Rect>& foundLocations,
1294 double hitThreshold,
Size winStride, Size padding,
1295 double scale0, double finalThreshold, bool useMeanshiftGrouping) const
1296 {
1297 vector<double>
foundWeights;
1298 detectMultiScale(img,
foundLocations, foundWeights, hitThreshold, winStride,
1299 padding,
scale0, finalThreshold, useMeanshiftGrouping);
1300 }
973 //返回测试图片中水平方向和垂直方向共有多少个检测窗口,不能整除的话,多于的边界会不被计算在内?
974 Size HOGCache::windowsInImage(Size
imageSize, Size winStride) const
975 {
976 return Size((imageSize.width
- winSize.width)/winStride.width + 1,
977 (imageSize.height
- winSize.height)/winStride.height + 1);
978 }
979
980
981 //给定图片的大小,已经检测窗口滑动的大小和测试图片中的检测窗口的索引,得到该索引处
982 //检测窗口的尺寸,包括坐标信息
983 Rect HOGCache::getWindow(Size
imageSize, Size winStride, int idx) const
984 {
985 int nwindowsX
= (imageSize.width - winSize.width)/winStride.width + 1;
986 int y
= idx / nwindowsX;//商
987 int x
= idx - nwindowsX*y;//余数
988 return Rect(
x*winStride.width, y*winStride.height, winSize.width, winSize.height );
989 }
990
991
992 void HOGDescriptor::compute(const Mat&
img, vector<float>& descriptors,
993 Size
winStride, Size padding,
994 const vector<Point>&
locations) const
995 {
996 //Size()表示长和宽都是0
997 if(
winStride == Size() )
998 winStride
= cellSize;
999 //gcd为求最大公约数,如果采用默认值的话,则2者相同
1000 Size cacheStride(gcd(winStride.width,
blockStride.width),
1001 gcd(winStride.height,
blockStride.height));
1002 size_t nwindows =
locations.size();
1003 //alignSize(m,
n)返回n的倍数大于等于m的最小值
1004 padding.width = (int)alignSize(std::max(padding.width, 0),
cacheStride.width);
1005 padding.height = (int)alignSize(std::max(padding.height, 0),
cacheStride.height);
1006 Size paddedImgSize(img.cols
+ padding.width*2, img.rows + padding.height*2);
1007
1008 HOGCache cache(this,
img, padding, padding, nwindows == 0, cacheStride);
1009
1010 if(
!nwindows )
1011 //Mat::area()表示为Mat的面积
1012 nwindows = cache.windowsInImage(paddedImgSize,
winStride).area();
1013
1014 const HOGCache::BlockData*
blockData = &cache.blockData[0];
1015
1016 int nblocks
= cache.nblocks.area();
1017 int blockHistogramSize
= cache.blockHistogramSize;
1018 size_t dsize = getDescriptorSize();//一个hog的描述长度
1019 //resize()为改变矩阵的行数,如果减少矩阵的行数则只保留减少后的
1020 //那些行,如果是增加行数,则保留所有的行。
1021 //这里将描述子长度扩展到整幅图片
1022 descriptors.resize(dsize*nwindows);
1023
1024 for(
size_t i = 0; i < nwindows; i++ )
1025 {
1026 //descriptor为第i个检测窗口的描述子首位置。
1027 float*
descriptor = &descriptors[i*dsize];
1028
1029 Point pt0;
1030 //非空
1031 if(
!locations.empty() )
1032 {
1033 pt0 = locations[i];
1034 //非法的点
1035 if(
pt0.x < -padding.width || pt0.x > img.cols + padding.width - winSize.width ||
1036 pt0.y
< -padding.height || pt0.y > img.rows + padding.height - winSize.height )
1037 continue;
1038 }
1039 //locations为空
1040 else
1041 {
1042 //pt0为没有扩充前图像对应的第i个检测窗口
1043 pt0 = cache.getWindow(paddedImgSize,
winStride, (int)i).tl() - Point(padding);
1044 CV_Assert(pt0.x
% cacheStride.width == 0 && pt0.y % cacheStride.height == 0);
1045 }
1046
1047 for( int j
= 0; j < nblocks; j++ )
1048 {
1049 const HOGCache::BlockData&
bj = blockData[j];
1050 //pt为block的左上角相对检测图片的坐标
1051 Point pt =
pt0 + bj.imgOffset;
1052
1053 //dst为该block在整个测试图片的描述子的位置
1054 float*
dst = descriptor + bj.histOfs;
1055 const float*
src = cache.getBlock(pt, dst);
1056 if(
src != dst )
1057 #ifdef HAVE_IPP
1058 ippsCopy_32f(src,dst,blockHistogramSize);
1059 #else
1060 for( int k
= 0; k < blockHistogramSize; k++ )
1061 dst[k]
= src[k];
1062 #endif
1063 }
1064 }
1065 }
1066
1067
1068 void HOGDescriptor::detect(const Mat&
img,
1069 vector<Point>& hits,
vector<double>& weights, double hitThreshold,
1070 Size winStride, Size
padding, const vector<Point>& locations) const
1071 {
1072 //hits里面存的是符合检测到目标的窗口的左上角顶点坐标
1073 hits.clear();
1074 if(
svmDetector.empty() )//svm算子不能为空,因为这是HOGDescriptor类的成员函数,里面用了很多成员变量
1075 return;
1076
1077 if(
winStride == Size() )//如果窗口步长为0
,则将其设为cell的大小
1078 winStride = cellSize;
1079 Size cacheStride(gcd(winStride.width,
blockStride.width), //CacheStride为winStride和BlockStride的最大公约数
1080 gcd(winStride.height,
blockStride.height));
1081 size_t nwindows =
locations.size();//locations为预先传入的窗口子集,在这个子集中求目标,这个版本中没有用
1082 padding.width = (int)alignSize(std::max(padding.width, 0),
cacheStride.width);//将padding改成大于等于padding
,但是可以被cacheStride整除的最小数
1083 padding.height = (int)alignSize(std::max(padding.height, 0),
cacheStride.height);
1084 Size paddedImgSize(img.cols
+ padding.width*2, img.rows + padding.height*2);//padding 以后的图片大小
1085 //这个结构的应该是应该是保存HOG描述子和其一些列参数的,构造函数会将一切数据都算好
1086 HOGCache cache(this,
img, padding, padding, nwindows == 0, cacheStride);
1087
1088 if(
!nwindows )
1089 nwindows = cache.windowsInImage(paddedImgSize,
winStride).area();//图片包含的检测窗口的个数
1090 //BlockData结构体是对应的block数据的偏移量。histOfs和imgOffset.其中histOfs表示为该block对整个滑动窗口内hog描述算子的贡献那部分向量的起始位置;imgOffset为该block在滑动窗口图片中的坐标(左上角坐标)。
1091 const HOGCache::BlockData*
blockData = &cache.blockData[0];
1092
1093 int nblocks
= cache.nblocks.area();//每个检测窗口的block数量
1094 int blockHistogramSize
= cache.blockHistogramSize;//每个block直方图的维数
1095 size_t dsize = getDescriptorSize();
1096
1097 double rho
= svmDetector.size() > dsize ? svmDetector[dsize] : 0;//判断有没有加偏移量,rho
1098 vector<float>
blockHist(blockHistogramSize);
1099
1100 for(
size_t i = 0; i < nwindows; i++ )//遍历每一个window将其得分与hitThreshold看其是否是目标物
1101 {
1102 Point pt0;
1103 if(
!locations.empty() )
1104 {
1105 pt0 = locations[i];
1106 if(
pt0.x < -padding.width || pt0.x > img.cols + padding.width - winSize.width ||
1107 pt0.y
< -padding.height || pt0.y > img.rows + padding.height - winSize.height )
1108 continue;
1109 }
1110 else
1111 { //给定padding后图片的大小,返回第i个滑动窗口在原图片中的坐标信息,得到该索引处
1112 pt0 = cache.getWindow(paddedImgSize, winStride, (int)i).tl()
- Point(padding);
1113 CV_Assert(pt0.x
% cacheStride.width == 0 && pt0.y % cacheStride.height == 0);
1114 }
1115 double s
= rho;
1116 //svmVec指向svmDetector最前面那个元素
1117 const float*
svmVec = &svmDetector[0];
1118 #ifdef HAVE_IPP
1119 int j;
1120 #else
1121 int j,
k;
1122 #endif
1123 for(
j = 0; j < nblocks; j++, svmVec += blockHistogramSize )
1124 {
1125 const HOGCache::BlockData&
bj = blockData[j];//当前block在window中的偏移量
1126 Point pt =
pt0 + bj.imgOffset;//pt0为window在待检测图片中的偏移量,pt是当前block在图片中的偏移量
1127
1128 //vec为测试图片pt处的block贡献的描述子指针
1129 const float*
vec = cache.getBlock(pt, &blockHist[0]);//函数返回一个block描述子的指针
1130 #ifdef HAVE_IPP
1131 Ipp32f partSum;
1132 ippsDotProd_32f(vec,svmVec,blockHistogramSize,&partSum);
1133 s += (double)partSum;
1134 #else
1135 for(
k = 0; k <= blockHistogramSize - 4;
k += 4 ) //描述子与svm向量相乘
1136 //const
float* svmVec = &svmDetector[0];
1137 s += vec[k]*svmVec[k]
+ vec[k+1]*svmVec[k+1]
+
1138 vec[k+2]*svmVec[k+2]
+ vec[k+3]*svmVec[k+3];
1139 for(
; k < blockHistogramSize; k++ )
1140 s += vec[k]*svmVec[k];
1141 #endif
1142 }
1143 if(
s >= hitThreshold )//s是上一个for循环中每个block累加的结果,s即当前window的检测得分
1144 {
1145 hits.push_back(pt0);
1146 weights.push_back(s);
1147 }
1148 }
1149 }
1150
1151 //不用保留检测到目标的可信度,即权重
1152 void HOGDescriptor::detect(const Mat&
img, vector<Point>& hits, double hitThreshold,
1153 Size
winStride, Size padding, const vector<Point>& locations) const
1154 {
1155 vector<double>
weightsV;
1156 detect(img, hits,
weightsV, hitThreshold, winStride, padding, locations);
1157 }
1158
1159 struct HOGInvoker
1160 {
1161 HOGInvoker( const HOGDescriptor*
_hog, const Mat& _img,
1162 double _hitThreshold,
Size _winStride, Size _padding,
1163 const double*
_levelScale, ConcurrentRectVector* _vec,
1164 ConcurrentDoubleVector*
_weights=0, ConcurrentDoubleVector* _scales=0 )
1165 {
1166 hog = _hog;
1167 img = _img;
1168 hitThreshold =
_hitThreshold;
1169 winStride = _winStride;
1170 padding = _padding;
1171 levelScale = _levelScale;
1172 vec = _vec;
1173 weights = _weights;
1174 scales = _scales;
1175 }
1176
1177 void operator()( const BlockedRange&
range ) const
1178 {
1179 int i,
i1 = range.begin(), i2 = range.end();
1180 double minScale
= i1 > 0 ? levelScale[i1] : i2 > 1 ?
levelScale[i1+1] : std::max(img.cols, img.rows);//当i1=0,i2=1时 minScale取max(img.cols,
img.rows)
1181 //缩放的最大尺寸,缩放之后的图像不会达到这个尺寸
1182 Size maxSz(cvCeil(img.cols/minScale),
cvCeil(img.rows/minScale));
1183 Mat smallerImgBuf(maxSz,
img.type());//当i1==0时smallerImgBuf的大小为1*1,可能是因为i1==0时没有尺寸缩放,没有尺寸缩放时不需要smallerImgBuf来初始化
1184 vector<Point> locations;
1185 vector<double>
hitsWeights;
1186
1187 for(
i = i1; i < i2; i++ )
1188 {
1189 double scale
= levelScale[i];
1190 Size sz(cvRound(img.cols/scale),
cvRound(img.rows/scale));
1191 //smallerImg只是构造一个指针,并没有复制数据
1192 Mat smallerImg(sz,
img.type(), smallerImgBuf.data);
1193 //没有尺寸缩放
1194 if(
sz == img.size() )
1195 smallerImg
= Mat(sz, img.type(), img.data, img.step);
1196 //有尺寸缩放
1197 else
1198 resize(img,
smallerImg, sz);
1199 //检测的实际函数,该函数实际上是将返回的值存在locations和histWeights中
1200 //其中locations存的是目标区域的左上角坐标
1201 hog->detect(smallerImg,
locations, hitsWeights, hitThreshold, winStride, padding);
1202 Size scaledWinSize
= Size(cvRound(hog->winSize.width*scale), cvRound(hog->winSize.height*scale));//计算目标区域的大小
1203 for(
size_t j = 0; j < locations.size(); j++ )
1204 {
1205 //保存目标区域
1206 vec->push_back(Rect(cvRound(locations[j].x*scale),
1207 cvRound(locations[j].y*scale),
1208 scaledWinSize.width,
scaledWinSize.height));
1209 //保存缩放尺寸
1210 if (scales)
{
1211 scales->push_back(scale);
1212 }
1213 }
1214 //保存svm计算后的结果值,weight指针有效才保存
1215 if (weights
&& (!hitsWeights.empty()))
1216 {
1217 for (size_t
j = 0; j < locations.size(); j++)
1218 {
1219 weights->push_back(hitsWeights[j]);
1220 }
1221 }
1222 }
1223 }
1224
1225 const HOGDescriptor*
hog;
1226 Mat img;
1227 double hitThreshold;
1228 Size winStride;
1229 Size padding;
1230 const double*
levelScale;
1232 ConcurrentRectVector*
vec;
1234 ConcurrentDoubleVector*
weights;
1235 ConcurrentDoubleVector*
scales;
1236 };
1237
1238
1239 void HOGDescriptor::detectMultiScale(
1240 const Mat&
img, vector<Rect>& foundLocations, vector<double>& foundWeights,
1241 double hitThreshold,
Size winStride, Size padding,
1242 double scale0, double finalThreshold, bool useMeanshiftGrouping) const
1243 {
1244 double scale
= 1.;
1245 int levels
= 0;
1246
1247 vector<double>
levelScale;//保存图片将要缩放的尺度
1249 //nlevels默认的是64层 scale0是图像缩小参数
1250 for(
levels = 0; levels < nlevels; levels++ )
1251 {
1252 levelScale.push_back(scale);
1257 //只考虑测试图片尺寸比检测窗口尺寸大以及scale0>1的情况,
//不符合要求中断循环。所以nlevel大一点没关系(并不会特别影响速度),关键的参数其实是scale0
1253 if(
cvRound(img.cols/scale) < winSize.width ||
1254 cvRound(img.rows/scale)
< winSize.height ||
1255 scale0 <= 1 )
1256 break;
1258 scale *= scale0;
1259 }
1260 levels = std::max(levels, 1);
1261 levelScale.resize(levels);
1262
1263 ConcurrentRectVector
allCandidates;
1264 ConcurrentDoubleVector
tempScales;
1265 ConcurrentDoubleVector
tempWeights;
1266 vector<double>
foundScales;
1267
1268 //TBB并行计算,会将参数range 传到HOGInvoker结构体的()重载函数中,在这个里面对各个尺度的目标图片进行检测
1269 parallel_for(Range(0,
(int)levelScale.size()),
1270 HOGInvoker(this,
img, hitThreshold, winStride, padding, &levelScale[0], &allCandidates, &tempWeights, &tempScales));
1271 //将tempScales中的内容复制到foundScales中;这个参数其实没有什么用,保存的是检测到目标的图像对应的尺度
1272 std::copy(tempScales.begin(),
tempScales.end(), back_inserter(foundScales));
1274 foundLocations.clear();
1275 //将候选目标窗口保存在foundLocations中
1276 std::copy(allCandidates.begin(),
allCandidates.end(), back_inserter(foundLocations));
1277 foundWeights.clear();
1278 //将候选目标可信度保存在foundWeights中
1279 std::copy(tempWeights.begin(),
tempWeights.end(), back_inserter(foundWeights));
1280 //对矩形框进行聚类
1281 if (
useMeanshiftGrouping )
1282 {
1283 groupRectangles_meanshift(foundLocations,
foundWeights, foundScales, finalThreshold, winSize);
1284 }
1285 else
1286 {
1288 groupRectangles(foundLocations,
(int)finalThreshold, 0.2);
1289 }
1290 }
1291
1292 //不考虑目标的置信度,通过调用包含置信度的版本
1293 void HOGDescriptor::detectMultiScale(const Mat&
img, vector<Rect>& foundLocations,
1294 double hitThreshold,
Size winStride, Size padding,
1295 double scale0, double finalThreshold, bool useMeanshiftGrouping) const
1296 {
1297 vector<double>
foundWeights;
1298 detectMultiScale(img,
foundLocations, foundWeights, hitThreshold, winStride,
1299 padding,
scale0, finalThreshold, useMeanshiftGrouping);
1300 }
相关文章推荐
- docker 自制镜像
- Opentstack 安装中遇到的错误锦集
- 第十四周项目三 OOP版电子词典
- linux下杀死进程(kill)的N种方法
- hadoop的关键进程
- hadoop的关键进程
- hadoop的关键进程
- hadoop的关键进程 分类: A1_HADOOP 2015-06-06 11:37 52人阅读 评论(0) 收藏
- OpenStack Live Migration (转)
- 收藏网站
- 不配置环境变量运行tomcat
- Linux/Unix
- Shell awk 求标准差
- 【转】 linux内核移植和网卡驱动(二)
- 【转】 linux内核移植和驱动添加(三)
- 更改linux文件的拥有者及用户组(chown和chgrp)
- PHP网站301定向方法详解hg0088
- linux—select详解
- 关于linux中so文件的深入认识
- Linux用户管理(2)