您的位置:首页 > 其它

POJ3693 Maximum repetition substring

2018-01-23 19:32 169 查看
Maximum repetition substring

Time Limit: 1000MSMemory Limit: 65536K
Total Submissions: 11130Accepted: 3431
Description

The repetition number of a string is defined as the maximum number R such that the string can be partitioned into R same consecutive substrings. For example, the repetition number of "ababab" is 3 and "ababa" is 1.

Given a string containing lowercase letters, you are to find a substring of it with maximum repetition number.

Input

The input consists of multiple test cases. Each test case contains exactly one line, which
gives a non-empty string consisting of lowercase letters. The length of the string will not be greater than 100,000.

The last test case is followed by a line containing a '#'.

Output

For
each test case, print a line containing the test case number( beginning
with 1) followed by the substring of maximum repetition number. If
there are multiple substrings of maximum repetition number, print the
lexicographically smallest one.

Sample Input

ccabababc
daabbccaa
#

Sample Output

Case 1: ababab
Case 2: aa

Source

【题解】

枚举循环节长度L,不难发现对于1,l+L,1+2L,1+3l,.......,l+kL<=n这些位置上的字符,一定存在相邻字符在两个相邻长度为L的循环节里。注意,这里仅仅说存在,没有说存在几个,可能这两个字符代表的长度上有多个重叠的不同循环节。但我们只需求出最大可行的循环节个数及其对应的循环节长度即可。

对于相邻的1 + kL, 1 + kL + L,求LCP,若L | LCP,则L满足要求,循环节个数为LCP / L + 1,长度为L;否则左移L - k%L,再求LCP,重复上述过程。

只需证明存在循环节长度为L的子串一定能被找到,比较显然,留给大家了

找的时候记录下所有可行的最大循环节个数的循环节长度,还需证明最大循环节个数所有的循环节长度都能被找到,相当于枚举循环节长度命中的位置,找到的是尽可能大的循环节个数,这也是显然的。

于是这个算法正确性就是显然的(显然了老半天我才显然出十分显然的证明来。。。。)

求出最大可行的循环节个数及其对应的循环节长度,枚举所有sa[i]和可行长度,判断即可

#include <iostream>
#include <cstdio>
#include <cstring>
#include <cstdlib>
#include <algorithm>
#include <queue>
#include <vector>
#include <cmath>
#define min(a, b) ((a) < (b) ? (a) : (b))
#define max(a, b) ((a) > (b) ? (a) : (b))
#define abs(a) ((a) < 0 ? (-1 * (a)) : (a))
template <class T>
inline void swap(T& a, T& b)
{
T tmp = a;a = b;b = tmp;
}
inline void read(int &x)
{
x = 0;char ch = getchar(), c = ch;
while(ch < '0' || ch > '9') c = ch, ch = getchar();
while(ch <= '9' && ch >= '0') x = x * 10 + ch - '0', ch = getchar();
if(c == '-') x = -x;
}

const int INF = 0x3f3f3f3f;
const int MAXN = 1000000 + 10;

int pow2[30], lo2[MAXN];

struct SuffixArray
{
char s[MAXN];int sa[MAXN], rank[MAXN], height[MAXN], t1[MAXN], t2[MAXN], n, c[MAXN];

int stmi[MAXN][30];

void clear(){n = 0;memset(sa, 0, sizeof(sa));}
void build_sa(int m)
{
int i, *x = t1, *y = t2;
for(i = 0;i <= m;++ i) c[i] = 0;
for(i = 1;i <= n;++ i) ++ c[x[i] = s[i]];
for(i = 1;i <= m;++ i) c[i] += c[i - 1];
for(i = n;i;-- i) sa[c[x[i]] --] = i;
for(int k = 1;k <= n;k <<= 1)
{
int p = 0;
for(i = n - k + 1;i <= n;++ i) y[++ p] = i;
for(i = 1;i <= n;++ i) if(sa[i] > k) y[++ p] = sa[i] - k;
for(i = 0;i <= m;++ i) c[i] = 0;
for(i = 1;i <= n;++ i) ++ c[x[y[i]]];
for(i = 1;i <= m;++ i) c[i] += c[i - 1];
for(i = n;i;-- i) sa[c[x[y[i]]] --] = y[i];
swap(x, y);p = 0,x[sa[1]] = ++ p;
for(i = 2;i <= n;++ i) x[sa[i]] = sa[i] + k <= n && sa[i - 1] + k <= n && y[sa[i]] == y[sa[i - 1]] && y[sa[i] + k] == y[sa[i - 1] + k] ? p : ++ p;
if(p >= n) break;m = p;
}
}
void build_height()
{
int i,j,k = 0;
for(i = 1;i <= n;++ i) rank[sa[i]] = i;
for(i = 1;i <= n;++ i)
{
if(k) -- k; if(rank[i] == 1) continue;
j = sa[rank[i] - 1];
while(i + k <= n && j + k <= n && s[i + k] == s[j + k]) ++ k;
height[rank[i]] = k;
}
}
void build_st()
{
for(int i = 1;i <= n;++ i) stmi[i][0] = height[i];
for(int j = 1;pow2[j] <= n;++ j)
for(int i = 1;i <= n;++ i)
if(i + pow2[j - 1] <= n) stmi[i][j] = min(stmi[i][j - 1], stmi[i + pow2[j - 1]][j - 1]);
else stmi[i][j] = stmi[i][j - 1];
}
int getmin(int x, int y)
{
return min(stmi[x][lo2[y - x + 1]], stmi[y - pow2[lo2[y - x + 1]] + 1][lo2[y - x + 1]]);
}
int LCP(int x, int y)
{
if(rank[x] + 1 > rank[y]) swap(x, y);
return getmin(rank[x] + 1, rank[y]);
}
}A;

int ca, ma, ans[MAXN], tot;

int main()
{
pow2[0] = 1;
for(int i = 1;i < 30;++ i) pow2[i] = pow2[i - 1] << 1;
lo2[1] = 0;
for(int i = 2;i <= 200000;++ i) lo2[i] = lo2[i >> 1] + 1;
while(scanf("%s", A.s + 1) != EOF && A.s[1] != '#')
{
++ ca, A.n = strlen(A.s + 1);ma = tot = 0;
A.build_sa('z' + 1);A.build_height();A.build_st();
for(register int L = 1;L <= A.n >> 1;++ L)
{
for(int l = 1,r = L + 1;r <= A.n;l += L, r += L)
{
int k = A.LCP(l, r), t = (L - k % L);
if(k % L == 0)
{
if(k / L + 1 > ma) ma = k / L + 1, ans[tot = 1] = L;
else if(k / L + 1 == ma) ans[++ tot] = L;
}
else if(l - t >= 1 && r - t >= 1)
{
k = A.LCP(l - t, r - t);
if(k && k % L == 0)
{
if(k / L + 1 > ma) ma = k / L + 1, ans[tot = 1] = L;
else if(k / L + 1 == ma) ans[++ tot] = L;
}
}
}
}
printf("Case %d: ", ca);
int flag = 0, s = 0, t = 0;
for(int i = 1;i <= A.n;++ i)
if(flag) break;
else
for(int j = 1;j <= tot;++ j)
if(A.sa[i] + ma * ans[j] - 1<= A.n && A.LCP(A.sa[i], A.sa[i] + ans[j]) >= (ma - 1) * ans[j])
{
flag = 1;s = A.sa[i];t = A.sa[i] + ma * ans[j] - 1;break;
}
for(int i = s;i <= t;++ i) printf("%c", A.s[i]);
putchar('\n');
A.clear();
}
return 0;
}


POJ3693
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: