您的位置:首页 > 其它

POJ 3693 Maximum repetition substring 后缀数组 暴力 rmq

2014-11-05 10:41 375 查看
题意:给出一个字符串,求出一个重复次数最多的且字典序最小的子串。

思路:前面给出了求出重复次最多的子串,但是该怎样找字典序最小的子串。

可以想到,sa数组是按照字典序排列的,我们可以记录满足最多重复次数的长度。然后,从前先后考虑sa数组,如果找到对应位置的lcp大于等于的长度,这个就是字典序最小的子串。

代码如下:

#include <cstdio>
#include <algorithm>
#include <cstring>

using namespace std;

struct ST{
} st;

struct DC3{
static const int maxn =1001000;//three times of length
int rank[maxn];//0 - n-1
int sa[maxn];//1 - n
int height[maxn];//1 - n
int wa[maxn],wb[maxn],wv[maxn],ws[maxn];
static const int MAX = 200100;
int p[MAX];
int d[MAX][20];
void rmq_init(int n){
p[0] = -1;
for(int i = 1; i <= n; ++i)
p[i] = i & (i-1)?p[i-1]:p[i-1]+1;
for(int i = 1; i <= n; ++i) d[i][0] = height[i];
for(int j = 1; j <= p
; ++j)
for(int i = 1; i + (1 << j) - 1 <= n; ++i)
d[i][j] = min(d[i][j-1],d[i+(1<<j-1)][j-1]);
}
int rmp_query(int l, int r){
int k = p[r - l + 1];
return min(d[l][k],d[r - (1<<k) + 1][k]);
}
int lcp(int l, int r){//l,r is the start postion of two suffix
l = rank[l], r = rank[r];//we should turn them to the index in sa
if(l > r) swap(l,r);l++;
return rmp_query(l,r);
}

#define F(x) ((x)/3 + ((x)%3 == 1 ? 0:tb))
#define G(x) ((x) < tb ? (x)*3+1 : ((x)-tb)*3 + 2)

int c0(int *r, int a, int b){
return r[a] == r[b] && r[a+1] == r[b+1] && r[a+2] == r[b+2];
}

int c12(int k, int *r, int a,int b){
if (k == 2)
return r[a] < r[b] || r[a] == r[b] && c12(1,r,a+1,b+1);
else
return r[a] < r[b] || r[a] == r[b] && wv[a+1] < wv[b+1];
}

void radix_sort(int *r, int *a,int *b,int n,int m) {
int i;
for (i = 0; i < n; i++)    wv[i] = r[a[i]];
for (i = 0; i < m; i++)    ws[i] = 0;
for (i = 0; i < n; i++)    ws[wv[i]]++;
for (i = 1; i < m; i++)    ws[i] += ws[i-1];
for (i = n-1; i >= 0; i--) b[--ws[wv[i]]] = a[i];
return ;
}

void dc3(int *r,int *sa,int n, int m){
int i,j,*rn = r + n, *san = sa + n;
int ta = 0, tb = (n+1)/3,tbc = 0, p;
r
= r[n+1] = 0;
for(i = 0; i < n; i++)
if(i%3 != 0) wa[tbc++] = i;

radix_sort(r+2, wa, wb, tbc, m);
radix_sort(r+1, wb, wa, tbc, m);
radix_sort(  r, wa, wb, tbc, m);

for (p = 1, rn[F(wb[0])] = 0, i = 1; i < tbc; i++)
rn[F(wb[i])] = c0(r,wb[i-1],wb[i]) ? p-1 : p++;

if(p < tbc) dc3(rn, san, tbc, p);
else
for (i = 0; i < tbc; i++) san[rn[i]] = i;

for(i = 0; i < tbc; i++)
if (san[i] < tb) wb[ta++] = san[i]*3;

if(n%3 == 1) wb[ta++] = n-1;

radix_sort(r, wb, wa, ta, m);
for(i = 0; i < tbc; i++)
wv[wb[i]=G(san[i])] = i;

for(i = 0,j = 0,p = 0; i < ta && j < tbc; p++)
sa[p] = c12(wb[j]%3,r,wa[i],wb[j]) ? wa[i++] : wb[j++];
for( ; i < ta; p++) sa[p] = wa[i++];
for ( ; j < tbc; p++) sa[p] = wb[j++];
return ;
}
void calc_sa(int *r, int n, int m){//attention: 1 <= r[i] <= m
r
= 0;//add zero, length : n + 1
dc3(r,sa,n+1,m);
}
void calc_height(int *r,int n){
int i,j,k = 0;
for (i = 0; i < n+1; i++)
rank[sa[i]] = i;
for (i = 0; i < n; height[rank[i++]] = k)//according to rank, only n times
for (k ? k-- : 0, j = sa[rank[i]-1]; r[i+k] == r[j+k]; k++)
;
return;
}
void print(int * r, int n){
for(int i = 1; i <= n; ++i){
for(int j = sa[i]; j < n; ++j)
putchar(r[j]);
putchar('\n');
}
}
void solve(int *r,int n){
int a[20000],cnt = 0;
int maxx = 0,pos,len;
for(int l = 1; l < n; ++l){
for(int i = 0; i + l < n; i += l){
int ans = lcp(i,i+l);
int j = i - (l - ans % l);
ans = ans / l + 1;
if(j >= 0 && lcp(j,j+l) >= l) ans++;
if(ans > maxx){
maxx = ans;
cnt = 0;
a[cnt++] = l;
}
else if(ans == maxx)
a[cnt++] = l;
}
}
for(int i = 1; i <= n; ++i){
for(int j = 0; j < cnt; ++j){
int l = a[j];
if(lcp(sa[i],sa[i]+l) >= (maxx - 1) * l){
len = l * maxx;
pos = sa[i];
goto aa;
}
}
}
aa:
for(int i = pos; i < pos + len; ++i)
putchar(r[i]);
putchar('\n');
}
} solver;

int T,N;
char str[100010];
int r[100010];
int main(void)
{
//freopen("input.txt","r",stdin);
int cas = 1;
while(scanf("%s",str),str[0] != '#'){
int N = strlen(str);
copy(str,str+N,r);
solver.calc_sa(r,N,256);
solver.calc_height(r,N);
solver.rmq_init(N);
//solver.print(r,N);
printf("Case %d: ",cas++);
solver.solve(r,N);
}
return 0;
}
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: