您的位置：首页 > 其它

POJ 2774 解题报告

2015-04-29 05:10 176 查看

这道题可能是近期写过的最纠结的一道题了。之前一直没有看过suffix array，这次必须看了。

geeksforgeeks上面有通俗易懂的O(nlognlogn)的实现：http://www.geeksforgeeks.org/suffix-array-set-2-a-nlognlogn-algorithm/。但我不清楚是否卡时间。

最好的资料还是discuss中大家都提到的罗穗骞大神的实现：https://github.com/oeddyo/algorithm/blob/master/resources/%E7%89%9B%E4%BA%BA%E8%B0%88ACM%E7%BB%8F%E9%AA%8C(%E5%8C%85%E6%8B%AC%E5%9B%BD%E5%AE%B6%E9%9B%86%E8%AE%AD%E9%98%9F%E8%AE%BA%E6%96%87)/%E5%9B%BD%E5%AE%B6%E9%9B%86%E8%AE%AD%E9%98%9F%E8%AE%BA%E6%96%87/%E5%9B%BD%E5%AE%B6%E9%9B%86%E8%AE%AD%E9%98%9F2009%E8%AE%BA%E6%96%87%E9%9B%86/11.%E7%BD%97%E7%A9%97%E9%AA%9E%E3%80%8A%E5%90%8E%E7%BC%80%E6%95%B0%E7%BB%84%E2%80%94%E2%80%94%E5%A4%84%E7%90%86%E5%AD%97%E7%AC%A6%E4%B8%B2%E7%9A%84%E6%9C%89%E5%8A%9B%E5%B7%A5%E5%85%B7%E3%80%8B/%E5%90%8E%E7%BC%80%E6%95%B0%E7%BB%84%E2%80%94%E2%80%94%E5%A4%84%E7%90%86%E5%AD%97%E7%AC%A6%E4%B8%B2%E7%9A%84%E6%9C%89%E5%8A%9B%E5%B7%A5%E5%85%B7.pdf

后缀数组本不容易，所以需要费些功夫理解，虽然罗穗骞大神的解释已经非常通俗易懂了。

最终我只是大致了解了大神的程序，这里照搬了源程序（俗称“模板”）。

解题思路是将两个string合成一个，然后看后缀的最长共同前缀(longest common prefix, LCP)。这是后缀数组的一个常见应用。

需要注意的是，需要保证两个后缀来自不同的string，最简单的办法是在第一个string后面加一个从未出现过的字符，比如'$'。这里有很好的解释：http://poj.org/showmessage?message_id=85977。

由于模板程序需要在字符串后面加个`0`（sa中将排在第一位）。所以n, n - 1， 0， 1之类的要区分清楚。

贡献了很多WA和RE，不过也是个理解加深的过程。

thestoryofsnow

2774

Accepted

5640K

344MS

C++

5079B

/*
ID: thestor1
LANG: C++
TASK: poj2774
*/
#include <iostream>
#include <fstream>
#include <cmath>
#include <cstdio>
#include <cstring>
#include <limits>
#include <string>
#include <vector>
#include <list>
#include <set>
#include <map>
#include <queue>
#include <stack>
#include <algorithm>
#include <cassert>

// the largest size of each string
const int MAXS = 100000 + 1;
// each is size of MAXS, so 2 * MAXS
// pad '$' at the end of first string (so that the common prefix will not overlap)
// pad '\0' at the end of whole string
const int MAXN = 2 * MAXS + 2;

int wa[MAXN], wb[MAXN], wv[MAXN], wc[MAXN];
int sa[MAXN];

// r is the rank of parts, each of size l
// a = sa[i - 1], b = sa[i]
// compare if subsequent sa has the same "rank"
// "rank" consists of first part(r[a] == r[b]) and second part(r[a + l] == r[b + l]).
int cmp(int *r, int a, int b, int l)
{
return r[a] == r[b] && r[a + l] == r[b + l];
}

// r is the input char sequence (expressed as int[])
// r[n - 1] == 0 and r[i] > 0 (0 <= r < n - 1) for simplicity of computation
// n is the length of r
// m is the range of r, that is, 0 <= r[i] < m
void da(int *r, int *sa, int n, int m)
{
int i, j, p, *x = wa, *y = wb, *t;

// radix sort, j = 1
for (i = 0; i < m; i++)
{
wc[i] = 0;
}
for (i = 0; i < n;i++)
{
wc[x[i] = r[i]]++;
}
for (i = 1; i < m; i++)
{
wc[i] += wc[i - 1];
}
for (i = n - 1; i >= 0; i--)
{
sa[--wc[x[i]]] = i;
}

for (j = 1, p = 1; p < n; j *= 2, m = p)
{
// rank of second part can take advantage of sa
// i + j >= n for i in [n - j ~ n - 1]
// that is, this range do not have second part
// that is, second part should be smallest
for (p = 0, i = n - j; i < n; i++)
{
y[p++] = i;
}
for (i = 0; i < n; i++)
{
// this position can be second part (sa[i] - j >= 0)
if (sa[i] >= j)
{
// rank them according to sa
y[p++] = sa[i] - j;
}
}

// radix sort, according to first part rank (x)
// both parts (x and y) have size of j
// now the total size is 2 * j
for (i = 0; i < n; i++)
{
// get its first part
wv[i] = x[y[i]];
}
for (i = 0; i < m; i++)
{
wc[i] = 0;
}
for (i = 0; i < n; i++)
{
wc[wv[i]]++;
}
for (i = 1; i < m; i++)
{
wc[i] +=  wc[i - 1];
}
for (i = n - 1; i >= 0; i--)
{
sa[--wc[wv[i]]] = y[i];
}

// swap x and y
// after swap, y stands for current rank (based on two parts)
// x will be overwritten
t = x, x = y, y = t;

p = 1;
x[sa[0]] = 0;
for (i = 1; i < n; i++)
{
x[sa[i]] = cmp(y, sa[i - 1], sa[i], j) ? p - 1 : p++;
}
}

// in the end, sa[0] will be meaningless as it will be n - 1
// remember r[n - 1] is always 0 and others are larger than 0?
return;
}

// rank is the sequence number in sa (which is ranked)
// rank[sa[i]] = i

// height[i] is the longest common prefix of sa[i] and sa[i - 1]
int rank[MAXN], height[MAXN];
void calheight(int *r, int *sa, int n)
{
int i, j, k = 0;
for (i = 1; i < n; i++)
{
rank[sa[i]] = i;
}
// if we calculate height according to original sequence, that is, input r
// that is, define h[i] = height[rank[i]]
// then h[i] >= h[i - 1] - 1
// which means we can start with h[i - 1] - 1
// k = h[i - 1]
for (i = 0; i < n - 1; i++)
{
if (k > 0)
{
k--;
}
j = sa[rank[i] - 1];
while (r[i + k] == r[j + k])
{
k++;
}
height[rank[i]] = k;
}

// in the end, height[0] is meaningless
return;
}

int main()
{
char str[MAXN];
int r[MAXN];

scanf("%s", str);
int N1 = strlen(str);
str[N1] = 'z' + 1;
scanf(" %s", str + N1 + 1);
// printf("str:[%s]\nstr2:[%s]\n", str, str + N1 + 1);

int N = strlen(str) + 1;

for (int i = 0; i < N - 1; ++i)
{
r[i] = str[i] - 'a' + 1;
}
r[N - 1] = 0;

// for (int i = 0; i < N; ++i)
// {
// 	printf("%d ", r[i]);
// }
// printf("\n");

// from 'a' (1) to 'z' (26), then 'z' + 1 (27)
// m (unreachable upper bound) thus should be 28
da(r, sa, N, 28);

// for (int i = 0; i < N; ++i)
// {
// 	printf("%d: %s\n", sa[i], str + sa[i]);
// }
// printf("\n");

calheight(r, sa, N);

// for (int i = 0; i < N; ++i)
// {
// 	printf("%d ", height[i]);
// }
// printf("\n");

int ans = 0;
for (int i = 1; i < N; ++i)
{
if (height[i] > ans && ((sa[i] < N1 && sa[i - 1] > N1) || (sa[i] > N1 && sa[i - 1] < N1)))
{
// printf("sa[i - 1]:%d, str + sa[i - 1]:%s\n", sa[i - 1], str + sa[i - 1]);
// printf("sa[i]:%d, str + sa[i]:%s\n", sa[i], str + sa[i]);
// printf("height[i]:%d\n", height[i]);
// printf("\n");

// the common prefix can not go over the end of first string
// int h;
// if (sa[i] < N1)
// {
// 	h = std::min(height[i], N1 - sa[i]);
// }
// else
// {
// 	assert(sa[i - 1] < N1);
// 	h = std::min(height[i], N1 - sa[i - 1]);
// }
ans = height[i];
}
}

printf("%d\n", ans);

return 0;
}

内容来自用户分享和网络整理，不保证内容的准确性，如有侵权内容，可联系管理员处理

标签：

相关文章推荐

新的分享

章节导航