您的位置:首页 > 其它

poj 3693(后缀数组)

2015-10-21 01:47 239 查看
题意:一个由小写字母组成的字符串,问这个字符串内重复次数最多的连续子串,多组解输出字典序最小的。

题解:这里算法合集之《后缀数组——处理字符串的有力工具》例8讲的很明白,先枚举重复子串的长度为L,以s[0],s[L],s[2 * L]…为起点,有两个相邻的长度为L的串重复出现,那么就可以用height求两个后缀的lcp,lcp / L + 1是次数,但不一定是最大次数,还要从首字符向前匹配到不能匹配为止,一旦向前能再匹配出L - lcp % L个字符,说明次数还可以加1。如果重复次数相同用rank判断字典序。L - lcp % L的思路出自这里http://blog.csdn.net/acm_cxlove/article/details/7941205

#include <cstdio>
#include <cstring>
#include <algorithm>
#define F(x) ((x) / 3 + ((x) % 3 == 1 ? 0 : tb))
#define G(x) ((x) < tb ? (x) * 3 + 1 : ((x) - tb) * 3 + 2)
using namespace std;
const int N = 100005;
int wa
, wb
, ws
, wv
, sa[N * 3];
int rank[N * 3], height[N * 3], s
, f[N * 3][35];
char str
;

int c0(int *r, int a, int b) {
return r[a] == r[b] && r[a + 1] == r[b + 1] && r[a + 2] == r[b + 2];
}

int c12(int k, int *r, int a, int b) {
if (k == 2)
return r[a] < r[b] || r[a] == r[b] && c12(1, r, a + 1, b + 1);
return r[a] < r[b] || r[a] == r[b] && wv[a + 1] < wv[b + 1];
}

void sort(int *r, int *a, int *b, int n, int m) {
for (int i = 0; i < n; i++) wv[i] = r[a[i]];
for (int i = 0; i < m; i++) ws[i] = 0;
for (int i = 0; i < n; i++) ws[wv[i]]++;
for (int i = 1; i < m; i++) ws[i] += ws[i - 1];
for (int i = n - 1; i >= 0; i--) b[--ws[wv[i]]] = a[i];
}

void dc3(int *r, int *sa, int n, int m) {
int i, j, *rn = r + n, *san = sa + n, ta = 0, tb = (n + 1) / 3, tbc = 0, p;
r
= r[n + 1] = 0;
for (i = 0; i < n; i++) if (i % 3 != 0) wa[tbc++] = i;
sort(r + 2, wa, wb, tbc, m);
sort(r + 1, wb, wa, tbc, m);
sort(r, wa, wb, tbc, m);
for (p = 1, rn[F(wb[0])] = 0, i = 1; i < tbc; i++)
rn[F(wb[i])] = c0(r, wb[i - 1], wb[i]) ? p - 1 : p++;
if (p < tbc) dc3(rn, san, tbc, p);
else for (i = 0; i < tbc; i++) san[rn[i]] = i;
for (i = 0; i < tbc; i++) if (san[i] < tb) wb[ta++] = san[i] * 3;
if (n % 3 == 1) wb[ta++] = n - 1;
sort(r, wb, wa, ta, m);
for (i = 0; i < tbc; i++) wv[wb[i] = G(san[i])] = i;
for (i = 0, j = 0, p = 0; i < ta && j < tbc; p++)
sa[p] = c12(wb[j] % 3, r, wa[i], wb[j]) ? wa[i++] : wb[j++];
for (; i < ta; p++) sa[p] = wa[i++];
for (; j < tbc; p++) sa[p] = wb[j++];
}

void calheight(int *r, int *sa, int n) {
int i, j, k = 0;
for (i = 1; i <= n; i++) rank[sa[i]] = i;
for (i = 0; i < n; height[rank[i++]] = k)
for (k ? k-- : 0, j = sa[rank[i] - 1]; r[i + k] == r[j + k]; k++);
}

void RMQ_init(int cnt) {
for (int i = 0; i < cnt; i++)
f[i][0] = height[i];
for (int j = 1; (1 << j) <= cnt; j++)
for (int i = 0; i + (1 << j) - 1 < cnt; i++)
f[i][j] = min(f[i][j - 1], f[i + (1 << (j - 1))][j - 1]);
}

int RMQ(int L, int R) {
int k = 0;
while (1 << (k + 1) <= R - L + 1) k++;
return min(f[L][k], f[R - (1 << k) + 1][k]);
}

int main() {
int cas = 1;
while (scanf("%s", str) == 1 && str[0] != '#') {
int len = 0;
char c = 'z' + 1;
for (len = 0; str[len]; len++) {
s[len] = str[len] - 'a' + 1;
c = min(c, str[len]);
}
s[len] = 0;
dc3(s, sa, len + 1, 50);
calheight(s, sa, len);
RMQ_init(len + 1);
int maxx = 0, st = 0, Len = 0;
for (int i = 1; i <= len / 2; i++) {
for (int j = 0; j + i < len; j += i) {
int pos1 = rank[j], pos2 = rank[j + i];
if (pos1 > pos2) swap(pos1, pos2);
int temp = RMQ(pos1 + 1, pos2);
int num = temp / i + 1;
int r = i - temp % i;
int pos = j, cnt = 0;
for (int k = j - 1; k > j - i && str[k] == str[k + i]; k--) {
cnt++;
if (cnt == r)
num++, pos = k;
else if (rank[pos] > rank[k])
pos = k;
}
if (num > maxx) {
maxx = num;
Len = num * i;
st = pos;
}
else if (num == maxx && rank[st] > rank[pos]) {
st = pos;
Len = num * i;
}
}
}
if (!maxx)
printf("Case %d: %c\n", cas++, c);
else {
printf("Case %d: ", cas++);
for (int i = st; i < st + Len; i++)
printf("%c", str[i]);
printf("\n");
}
}
return 0;
}
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: