POJ 3693 Maximum repetition substring 后缀数组 暴力 rmq
2014-11-05 10:41
375 查看
题意:给出一个字符串,求出一个重复次数最多的且字典序最小的子串。
思路:前面给出了求出重复次最多的子串,但是该怎样找字典序最小的子串。
可以想到,sa数组是按照字典序排列的,我们可以记录满足最多重复次数的长度。然后,从前先后考虑sa数组,如果找到对应位置的lcp大于等于的长度,这个就是字典序最小的子串。
代码如下:
思路:前面给出了求出重复次最多的子串,但是该怎样找字典序最小的子串。
可以想到,sa数组是按照字典序排列的,我们可以记录满足最多重复次数的长度。然后,从前先后考虑sa数组,如果找到对应位置的lcp大于等于的长度,这个就是字典序最小的子串。
代码如下:
#include <cstdio> #include <algorithm> #include <cstring> using namespace std; struct ST{ } st; struct DC3{ static const int maxn =1001000;//three times of length int rank[maxn];//0 - n-1 int sa[maxn];//1 - n int height[maxn];//1 - n int wa[maxn],wb[maxn],wv[maxn],ws[maxn]; static const int MAX = 200100; int p[MAX]; int d[MAX][20]; void rmq_init(int n){ p[0] = -1; for(int i = 1; i <= n; ++i) p[i] = i & (i-1)?p[i-1]:p[i-1]+1; for(int i = 1; i <= n; ++i) d[i][0] = height[i]; for(int j = 1; j <= p ; ++j) for(int i = 1; i + (1 << j) - 1 <= n; ++i) d[i][j] = min(d[i][j-1],d[i+(1<<j-1)][j-1]); } int rmp_query(int l, int r){ int k = p[r - l + 1]; return min(d[l][k],d[r - (1<<k) + 1][k]); } int lcp(int l, int r){//l,r is the start postion of two suffix l = rank[l], r = rank[r];//we should turn them to the index in sa if(l > r) swap(l,r);l++; return rmp_query(l,r); } #define F(x) ((x)/3 + ((x)%3 == 1 ? 0:tb)) #define G(x) ((x) < tb ? (x)*3+1 : ((x)-tb)*3 + 2) int c0(int *r, int a, int b){ return r[a] == r[b] && r[a+1] == r[b+1] && r[a+2] == r[b+2]; } int c12(int k, int *r, int a,int b){ if (k == 2) return r[a] < r[b] || r[a] == r[b] && c12(1,r,a+1,b+1); else return r[a] < r[b] || r[a] == r[b] && wv[a+1] < wv[b+1]; } void radix_sort(int *r, int *a,int *b,int n,int m) { int i; for (i = 0; i < n; i++) wv[i] = r[a[i]]; for (i = 0; i < m; i++) ws[i] = 0; for (i = 0; i < n; i++) ws[wv[i]]++; for (i = 1; i < m; i++) ws[i] += ws[i-1]; for (i = n-1; i >= 0; i--) b[--ws[wv[i]]] = a[i]; return ; } void dc3(int *r,int *sa,int n, int m){ int i,j,*rn = r + n, *san = sa + n; int ta = 0, tb = (n+1)/3,tbc = 0, p; r = r[n+1] = 0; for(i = 0; i < n; i++) if(i%3 != 0) wa[tbc++] = i; radix_sort(r+2, wa, wb, tbc, m); radix_sort(r+1, wb, wa, tbc, m); radix_sort( r, wa, wb, tbc, m); for (p = 1, rn[F(wb[0])] = 0, i = 1; i < tbc; i++) rn[F(wb[i])] = c0(r,wb[i-1],wb[i]) ? p-1 : p++; if(p < tbc) dc3(rn, san, tbc, p); else for (i = 0; i < tbc; i++) san[rn[i]] = i; for(i = 0; i < tbc; i++) if (san[i] < tb) wb[ta++] = san[i]*3; if(n%3 == 1) wb[ta++] = n-1; radix_sort(r, wb, wa, ta, m); for(i = 0; i < tbc; i++) wv[wb[i]=G(san[i])] = i; for(i = 0,j = 0,p = 0; i < ta && j < tbc; p++) sa[p] = c12(wb[j]%3,r,wa[i],wb[j]) ? wa[i++] : wb[j++]; for( ; i < ta; p++) sa[p] = wa[i++]; for ( ; j < tbc; p++) sa[p] = wb[j++]; return ; } void calc_sa(int *r, int n, int m){//attention: 1 <= r[i] <= m r = 0;//add zero, length : n + 1 dc3(r,sa,n+1,m); } void calc_height(int *r,int n){ int i,j,k = 0; for (i = 0; i < n+1; i++) rank[sa[i]] = i; for (i = 0; i < n; height[rank[i++]] = k)//according to rank, only n times for (k ? k-- : 0, j = sa[rank[i]-1]; r[i+k] == r[j+k]; k++) ; return; } void print(int * r, int n){ for(int i = 1; i <= n; ++i){ for(int j = sa[i]; j < n; ++j) putchar(r[j]); putchar('\n'); } } void solve(int *r,int n){ int a[20000],cnt = 0; int maxx = 0,pos,len; for(int l = 1; l < n; ++l){ for(int i = 0; i + l < n; i += l){ int ans = lcp(i,i+l); int j = i - (l - ans % l); ans = ans / l + 1; if(j >= 0 && lcp(j,j+l) >= l) ans++; if(ans > maxx){ maxx = ans; cnt = 0; a[cnt++] = l; } else if(ans == maxx) a[cnt++] = l; } } for(int i = 1; i <= n; ++i){ for(int j = 0; j < cnt; ++j){ int l = a[j]; if(lcp(sa[i],sa[i]+l) >= (maxx - 1) * l){ len = l * maxx; pos = sa[i]; goto aa; } } } aa: for(int i = pos; i < pos + len; ++i) putchar(r[i]); putchar('\n'); } } solver; int T,N; char str[100010]; int r[100010]; int main(void) { //freopen("input.txt","r",stdin); int cas = 1; while(scanf("%s",str),str[0] != '#'){ int N = strlen(str); copy(str,str+N,r); solver.calc_sa(r,N,256); solver.calc_height(r,N); solver.rmq_init(N); //solver.print(r,N); printf("Case %d: ",cas++); solver.solve(r,N); } return 0; }
相关文章推荐
- POJ 3693 Maximum Repetition Substring <后缀数组 + RMQ>
- POJ 3693 Maximum Repetition Substring 后缀数组
- POJ 3693 Maximum repetition substring 后缀数组求重复次数最多子串
- 【POJ】3693 Maximum repetition substring 【后缀数组——求最长连续重复字串】
- 【后缀数组】 HDOJ 2459 && POJ 3693 Maximum repetition substring
- POJ-3693 Maximum repetition substring 后缀数组
- POJ 3693 Maximum repetition substring 后缀数组 + RMQ预处理
- POJ 3693 Maximum repetitionsubstring(后缀数组:循环子串)
- HDU 2459 PKU 3693 Maximum repetition substring 后缀数组 RMQ
- POJ 3693 Maximum repetition substring(后缀数组[重复次数最多的连续重复子串])
- POJ - 3693 Maximum repetition substring 后缀数组 分块
- POJ-3693-Maximum repetition substring(后缀数组-重复次数最多的连续重复子串)
- POJ 3693 Maximum repetition substring 后缀数组与区间最值的完美结合
- POJ 3693 Maximum repetition substring(后缀数组神题)
- POJ - 3693 Maximum repetition substring(后缀数组求重复次数最多的连续重复子串)
- POJ 3693 Maximum repetition substring(后缀数组求最长重复子串)
- Poj 3693 Maximum repetition substring|后缀数组|st表
- 【后缀数组】【poj 3693】Maximum repetition substring
- Poj 3693 & Hdu 2459 Maximum repetition substring (08合肥Online 后缀数组+RMQ 重复次数最多的连续重复子串)
- POJ 3693 Maximum repetition substring