您的位置：首页 > 其它

FASText(上) : Fast 角点

2016-07-14 17:07 309 查看

0导语

本文主要介绍论文FASText:Efficient Unconstrained Scene Text Detector[pdf][code],其核心思想是定制化fast角点使其更有利于场景文字的检测。因此本文主要分上下两篇，第一篇先介绍fast角点，并剖析其opencv（2.4.10）的代码，下篇再介绍FSAText.

首先我们直观感受下FASText和Fast角点，参数阈值为12，FASText用的是FASText12，fast的是TYPE_7_12。并且根据论文的数据，它的proposal比MSER要少一半，但是检出率recall要高25%，并且速度还是4倍以上，全面优于MSER。

FASText(3013个角点)

Fast(7924个角点)

1 Fast特征点
1.1角点候选

Fast角点的思想很简单，很早就已经出来了[2],但是请注意opencv(2.4.10)中NMS的做法法跟论文中有些差别,后面我们会看到。
Fast中对特征点的定义是这样的：
如图3，以当前点（绿色点）为中心，某个半径r画圆。考查圆周上的点（红色），假设它们一共有k个。如果红色的点中有连续超过n个点都大于（或小于）当前点像素值加上某一阈值t的和,那么当前坐标就会被认为是角点的候选。opencv中k和n有3种配置，默认的是k
= 16,n = 9,还有k=12, n=7和k=8,n=5这两种配置

上面的例子中如果我们假设k=16,n=9,t=10的话，那么我们就可以找到这么一组点（黄色），其满足上面的所有条件

1)在圆周上且是连续的
2)点的个数10,超过9
3)它们的像素值都低于中心点t以上

所以该点就是潜在角点。从原理上看，fast角点的不具备scale不变性，但基本具有旋转不变和平移不变性。

1.2 NMS
再次强调论文和opencv采用的是不同的思路

第二步也是最后一步就是普通的NMS，但是这里面比较麻烦的是这么去评价一个点的好坏，论文中用的是

其中

意思就是先挑选出圆周上所有比中心点大阈值t以上的点集G和比它小阈值t以上的点集L

G = {,} L = {138,135，82，96，109，71，121，117，140，117，74，110，137，141}

求绝对差并减掉t

G = {}, L ={5,8,61,47,34,72,22,26,3,26,69,33,6,2};

求和

G = 0, L = 414

取两者的最大值414作为分数

但是opencv代码中用的是另外一种思路。类似于木桶原理，它选的是符合条件的点集中与中心点相差最小的像素点，并用他们之间的绝对差作为这个角点的分数。上面的例子中，黄色的点与中心点相差最下的是140，因此分数是13.

本文就讲到这，由于本人水平有限，错误与纰漏还请指正。附上opencv的代码，去掉了优化部分，以方便阅读。

<pre name="code" class="cpp">#include <opencv/cv.hpp>
#include <vector>

namespace TestFastKeyPoint
{
using namespace cv;
void makeOffsets(int pixel[25], int rowStride, int patternSize)
{
static const int offsets16[][2] =
{
{ 0, 3 }, { 1, 3 }, { 2, 2 }, { 3, 1 }, { 3, 0 }, { 3, -1 }, { 2, -2 }, { 1, -3 },
{ 0, -3 }, { -1, -3 }, { -2, -2 }, { -3, -1 }, { -3, 0 }, { -3, 1 }, { -2, 2 }, { -1, 3 }
};

static const int offsets12[][2] =
{
{ 0, 2 }, { 1, 2 }, { 2, 1 }, { 2, 0 }, { 2, -1 }, { 1, -2 },
{ 0, -2 }, { -1, -2 }, { -2, -1 }, { -2, 0 }, { -2, 1 }, { -1, 2 }
};

static const int offsets8[][2] =
{
{ 0, 1 }, { 1, 1 }, { 1, 0 }, { 1, -1 },
{ 0, -1 }, { -1, -1 }, { -1, 0 }, { -1, 1 }
};

const int(*offsets)[2] = patternSize == 16 ? offsets16 :
patternSize == 12 ? offsets12 :
patternSize == 8 ? offsets8 : 0;

CV_Assert(pixel && offsets);

int k = 0;
for (; k < patternSize; k++)
pixel[k] = offsets[k][0] + offsets[k][1] * rowStride;
//主要是为了在做NMS的时候方便，因为16个构成一个圆形，如果我从第16个点开始，那么我就可以直接往下遍历这个数组直接获取1-8号点的像素
for (; k < 25; k++)
pixel[k] = pixel[k - patternSize];
}

template<int patternSize>
int cornerScore(const uchar* ptr, const int pixel[], int threshold);

template<>
int cornerScore<12>(const uchar* ptr, const int pixel[], int threshold)
{
const int K = 6, N = K * 3 + 1;
int k, v = ptr[0];
short d[N + 4];
for (k = 0; k < N; k++)
d[k] = (short)(v - ptr[pixel[k]]);

int a0 = threshold;
for (k = 0; k < 12; k += 2)
{
int a = std::min((int)d[k + 1], (int)d[k + 2]);
if (a <= a0)
continue;
a = std::min(a, (int)d[k + 3]);
a = std::min(a, (int)d[k + 4]);
a = std::min(a, (int)d[k + 5]);
a = std::min(a, (int)d[k + 6]);
a0 = std::max(a0, std::min(a, (int)d[k]));
a0 = std::max(a0, std::min(a, (int)d[k + 7]));
}

int b0 = -a0;
for (k = 0; k < 12; k += 2)
{
int b = std::max((int)d[k + 1], (int)d[k + 2]);
b = std::max(b, (int)d[k + 3]);
b = std::max(b, (int)d[k + 4]);
if (b >= b0)
continue;
b = std::max(b, (int)d[k + 5]);
b = std::max(b, (int)d[k + 6]);

b0 = std::min(b0, std::max(b, (int)d[k]));
b0 = std::min(b0, std::max(b, (int)d[k + 7]));
}

threshold = -b0 - 1;

return threshold;
}

template<>
int cornerScore<8>(const uchar* ptr, const int pixel[], int threshold)
{
const int K = 4, N = K * 3 + 1;
int k, v = ptr[0];
short d
;
for (k = 0; k < N; k++)
d[k] = (short)(v - ptr[pixel[k]]);

int a0 = threshold;
for (k = 0; k < 8; k += 2)
{
int a = std::min((int)d[k + 1], (int)d[k + 2]);
if (a <= a0)
continue;
a = std::min(a, (int)d[k + 3]);
a = std::min(a, (int)d[k + 4]);
a0 = std::max(a0, std::min(a, (int)d[k]));
a0 = std::max(a0, std::min(a, (int)d[k + 5]));
}

int b0 = -a0;
for (k = 0; k < 8; k += 2)
{
int b = std::max((int)d[k + 1], (int)d[k + 2]);
b = std::max(b, (int)d[k + 3]);
if (b >= b0)
continue;
b = std::max(b, (int)d[k + 4]);

b0 = std::min(b0, std::max(b, (int)d[k]));
b0 = std::min(b0, std::max(b, (int)d[k + 5]));
}

threshold = -b0 - 1;

return threshold;
}
template<>
int cornerScore<16>(const uchar* ptr, const int pixel[], int threshold)
{
const int K = 8, N = K * 3 + 1;
int k, v = ptr[0];
short d
;
for (k = 0; k < N; k++)
d[k] = (short)(v - ptr[pixel[k]]);//求像素差

//正的差
int a0 = threshold;
for (k = 0; k < 16; k += 2)
{
int a = std::min((int)d[k + 1], (int)d[k + 2]);
a = std::min(a, (int)d[k + 3]);
if (a <= a0)
continue;
a = std::min(a, (int)d[k + 4]);
a = std::min(a, (int)d[k + 5]);
a = std::min(a, (int)d[k + 6]);
a = std::min(a, (int)d[k + 7]);
a = std::min(a, (int)d[k + 8]);
a0 = std::max(a0, std::min(a, (int)d[k]));
a0 = std::max(a0, std::min(a, (int)d[k + 9]));
}

int b0 = -a0;
for (k = 0; k < 16; k += 2)
{
int b = std::max((int)d[k + 1], (int)d[k + 2]);
b = std::max(b, (int)d[k + 3]);
b = std::max(b, (int)d[k + 4]);
b = std::max(b, (int)d[k + 5]);
if (b >= b0)
continue;
b = std::max(b, (int)d[k + 6]);
b = std::max(b, (int)d[k + 7]);
b = std::max(b, (int)d[k + 8]);

b0 = std::min(b0, std::max(b, (int)d[k]));
b0 = std::min(b0, std::max(b, (int)d[k + 9]));
}

threshold = -b0 - 1;

return threshold;
}

template<int patternSize>
void FAST_t(InputArray _img, std::vector<KeyPoint>& keypoints, int threshold, bool nonmax_suppression)
{
Mat img = _img.getMat();
const int K = patternSize / 2, N = patternSize + K + 1;
int i, j, k, pixel[25];
makeOffsets(pixel, (int)img.step, patternSize);

keypoints.clear();

threshold = std::min(std::max(threshold, 0), 255);
//两个像素值相差的范围是在-255到255，所以设个512的数组
//像素相差在阈值内的为0
//像素大于中心点一定阈值的为2
//像素小于中心店一定阈值的为1
uchar threshold_tab[512];
for (i = -255; i <= 255; i++)
threshold_tab[i + 255] = (uchar)(i < -threshold ? 1 : i > threshold ? 2 : 0);

AutoBuffer<uchar> _buf((img.cols + 16) * 3 * (sizeof(int)+sizeof(uchar)) + 128);
uchar* buf[3];
//存的是分数，由于采用了木桶原理，分数不会超过255，因此用一个字节就可以了
buf[0] = _buf; buf[1] = buf[0] + img.cols; buf[2] = buf[1] + img.cols;
int* cpbuf[3];
cpbuf[0] = (int*)alignPtr(buf[2] + img.cols, sizeof(int)) + 1;//存的是角点的坐标 1是为了存cornerpos[-1] = ncorners;
cpbuf[1] = cpbuf[0] + img.cols + 1;
cpbuf[2] = cpbuf[1] + img.cols + 1;
memset(buf[0], 0, img.cols * 3);

for (i = 3; i < img.rows - 2; i++)
{
const uchar* ptr = img.ptr<uchar>(i) +3;
uchar* curr = buf[(i - 3) % 3];
int* cornerpos = cpbuf[(i - 3) % 3];
memset(curr, 0, img.cols);
int ncorners = 0;

if (i < img.rows - 3)
{
j = 3;
for (; j < img.cols - 3; j++, ptr++)
{
int v = ptr[0];
const uchar* tab = &threshold_tab[0] - v + 255;
int d = tab[ptr[pixel[0]]] | tab[ptr[pixel[8]]];// examine 1 and 9,  tab 结果是0 说明类似

if (d == 0)
continue;

d &= tab[ptr[pixel[2]]] | tab[ptr[pixel[10]]];//
d &= tab[ptr[pixel[4]]] | tab[ptr[pixel[12]]];//5 13
d &= tab[ptr[pixel[6]]] | tab[ptr[pixel[14]]];//7 15

if (d == 0)
continue;

d &= tab[ptr[pixel[1]]] | tab[ptr[pixel[9]]];
d &= tab[ptr[pixel[3]]] | tab[ptr[pixel[11]]];
d &= tab[ptr[pixel[5]]] | tab[ptr[pixel[13]]];
d &= tab[ptr[pixel[7]]] | tab[ptr[pixel[15]]];

//如果是3说明全是一大一小，最优的情况也只会是只有连续8个大于或小于这个阈值,所以不会是
if (d & 1)
{
int vt = v - threshold, count = 0;

for (k = 0; k < N; k++)
{
int x = ptr[pixel[k]];
if (x < vt)
{
if (++count > K)
{
cornerpos[ncorners++] = j;
if (nonmax_suppression)
curr[j] = (uchar)cornerScore<patternSize>(ptr, pixel, threshold);
break;
}
}
else
count = 0;//重置，保证小于的点都是连续的
}
}

if (d & 2)
{
int vt = v + threshold, count = 0;

for (k = 0; k < N; k++)
{
int x = ptr[pixel[k]];
if (x > vt)
{
if (++count > K)
{
cornerpos[ncorners++] = j;
if (nonmax_suppression)
curr[j] = (uchar)cornerScore<patternSize>(ptr, pixel, threshold);
break;
}
}
else
count = 0;
}
}
}
}

cornerpos[-1] = ncorners;

if (i == 3)
continue;

const uchar* prev = buf[(i - 4 + 3) % 3];//+3是为了防止负数
const uchar* pprev = buf[(i - 5 + 3) % 3];
cornerpos = cpbuf[(i - 4 + 3) % 3];
ncorners = cornerpos[-1];

for (k = 0; k < ncorners; k++)//检查上一行的角点是否会被NMS抑制掉
{
j = cornerpos[k];
int score = prev[j];
if (!nonmax_suppression ||
(score > prev[j + 1] && score > prev[j - 1] &&
score > pprev[j - 1] && score > pprev[j] && score > pprev[j + 1] &&
score > curr[j - 1] && score > curr[j] && score > curr[j + 1]))
{
keypoints.push_back(KeyPoint((float)j, (float)(i - 1), 7.f, -1, (float)score));
}
}
}
}

void FASTX(InputArray _img, std::vector<KeyPoint>& keypoints, int threshold, bool nonmax_suppression, int type)
{
switch (type) {
case FastFeatureDetector::TYPE_5_8://前面的数都是后面的一半加一
FAST_t<8>(_img, keypoints, threshold, nonmax_suppression);
break;
case FastFeatureDetector::TYPE_7_12:
FAST_t<12>(_img, keypoints, threshold, nonmax_suppression);
break;
case FastFeatureDetector::TYPE_9_16:
FAST_t<16>(_img, keypoints, threshold, nonmax_suppression);
break;
}
}

void FAST(InputArray _img, std::vector<KeyPoint>& keypoints, int threshold, bool nonmax_suppression)
{
FASTX(_img, keypoints, threshold, nonmax_suppression, FastFeatureDetector::TYPE_9_16);
}
}

参考文献

[1]Buta M. FASText: Efficientunconstrained scene text detector[C]//2015 IEEE International Conference onComputer Vision (ICCV). IEEE, 2015: 1206-1214.

[2]Rosten E, Drummond T.Machine learning for high-speed corner detection[C]//European conference oncomputer vision. Springer Berlin Heidelberg, 2006: 430-443.

内容来自用户分享和网络整理，不保证内容的准确性，如有侵权内容，可联系管理员处理

标签：

相关文章推荐

新的分享

章节导航