您的位置:首页 > 其它

后缀数组 - poj2758 Checking the Text

2015-09-02 16:21 363 查看

题目:

http://poj.org/problem?id=2758

题意:

给一个字符串,提供两种操作,操作I:在 当前 第i个字符之前插入字符c;操作Q:求 原序列
后缀i,与后缀j的LCP,计算LCP时应把插入字符一起算上

思路:

确实没看出来这玩意能用后缀数组搞……

啊本题有好几个坑需要额外注意:

1. 插入时候的位置指的上当前序列;

2. 询问的位置指的是原序列,然后插入字符应计入比较;

3. 题目仅仅是说 Engligh Letter 而非 lower case letter,wa了半天才找到 - -|||;

4. I操作的步数非常少,最多200次

先抛开插入操作考虑询问操作,求一个序列任意两个后缀的LCP,显然后缀数组加RMQ,实现方式为对height数组做预处理,查询RMQ(rank[i],rank[j]);然后考虑插入,因为插入的操作数非常少,对于询问,可以先找到i和j的原序列LCP,再找到最近的一个插入在i,j后的字符距离i,j的位置k(这里找到最近的插入字符位置我是用set自带的平衡二叉树来加速查询),分情况讨论。

1. 如果LCP <  k-1,显然插入字符对查询值无影响;

2. 如果LCP >= k-1,则从k位置开始,用t1,t2逐个指向当前序列中的字符,直至t1,,t2指向的均为原序列中的字符为止,然后递归查询t1,t2

本题在思路上不是特别麻烦,难点在看清题目的表述,以及实现能力上

代码:

#include <iostream>
#include <algorithm>
#include <stdio.h>
#include <string.h>
#include <vector>
#include <set>
using namespace std;

const int MAXINT = 0x7fffffff;
const int MAXSIZE = 5 * 1e4 + 100;

vector<char> ins[MAXSIZE];
set<int> inset;

#define rep(i,n) for(int i = 0; i < n; i++)

int rk[MAXSIZE], sa[MAXSIZE], height[MAXSIZE], wa[MAXSIZE], res[MAXSIZE];
char w[MAXSIZE];
int len;

void getSa(int up) {
int *k = rk, *id = height, *r = res, *cnt = wa;
rep(i, up) cnt[i] = 0;
rep(i, len) cnt[k[i] = w[i]]++;
rep(i, up) cnt[i + 1] += cnt[i];
for (int i = len - 1; i >= 0; i--) {
sa[--cnt[k[i]]] = i;
}
int d = 1, p = 0;
while (p < len){
for (int i = len - d; i < len; i++) id[p++] = i;
rep(i, len) if (sa[i] >= d) id[p++] = sa[i] - d;
rep(i, len) r[i] = k[id[i]];
rep(i, up) cnt[i] = 0;
rep(i, len) cnt[r[i]]++;
rep(i, up) cnt[i + 1] += cnt[i];
for (int i = len - 1; i >= 0; i--) {
sa[--cnt[r[i]]] = id[i];
}
swap(k, r);
p = 0;
k[sa[0]] = p++;
rep(i, len - 1) {
if (sa[i] + d < len && sa[i + 1] + d < len && r[sa[i]] == r[sa[i + 1]] && r[sa[i] + d] == r[sa[i + 1] + d])
k[sa[i + 1]] = p - 1;
else k[sa[i + 1]] = p++;
}
if (p >= len) return;
d <<= 1, up = p, p = 0;
}
}

void getHeight() {
int i, k, h = 0;
rep(i, len) rk[sa[i]] = i;
rep(i, len) {
if (rk[i] == 0)
h = 0;
else {
k = sa[rk[i] - 1];
if (h) h--;
while (w[i + h] == w[k + h]) h++;
}
height[rk[i]] = h;
}
}

void getSuffix() {
len = strlen(w);
int up = 0;
rep(i, len) {
w[i] = w[i] - 'A' + 1;
up = up > w[i] ? up : w[i];
}
w[len] = 0;
getSa(up + 1);
getHeight();
}

int A[MAXSIZE];
int dmin[MAXSIZE][32];

void RMQ_init(int A[], int len){
//len 数组长度
for (int i = 0; i<len; ++i){
dmin[i][0] = A[i];
}
for (int j = 1; (1 << j) <= len; ++j)
for (int i = 0; i + (1 << j) - 1<len; ++i){
dmin[i][j] = min(dmin[i][j - 1], dmin[i + (1 << (j - 1))][j - 1]);
}
return;
}

int RMQ_min(int L, int R){
int k = 0;
while (1 << (k + 1) <= R - L + 1) k++;
return min(dmin[L][k], dmin[R - (1 << k) + 1][k]);
}

void sinsert(char c, int p){
bool flag = false;
int cnt = 0;
int k = -1;
for (int i = 0;i<len;++i){
if (cnt + ins[i].size() < p - 1) cnt += ins[i].size();
else{
k = p - cnt - 2;
if (ins[i].size() != 0){
ins[i].push_back(ins[i][ins[i].size()-1]);
for (int j = ins[i].size() - 1; j > k + 1; --j)
ins[i][j] = ins[i][j-1];
ins[i][k+1] = c;
}
else
ins[i].push_back(c);

inset.insert(i);
//cout<<"Insert: "<<i<<endl;
flag = true;
break;
}
cnt++;
}
if (!flag){
if (cnt + ins[len].size() < p) ins[len].push_back(c);
else{
k = p - cnt - 2;
if (ins[len].size() != 0)
for (int j = ins[len].size() - 1; j > k; --j)
ins[len][j+1] = ins[len][j];
ins[len][k+1] = c;
}
inset.insert(len);
//cout<<"Insert: "<<len<<endl;
}
}

int query(int s1,int s2){
//cout<<"query: "<<s1<<" "<<s2<<endl;
int ans = 0;
if (s1 == s2){
for (int i = s1 + 1;i<len;++i){
ans+=ins[i].size();
}
ans += len-s1;
ans += ins[len].size();
return ans;
}
if (s1>s2) swap(s1,s2);
int lcp;
if (rk[s1] < rk[s2]) lcp = RMQ_min(rk[s1] + 1,rk[s2]);
else lcp = RMQ_min(rk[s2] + 1,rk[s1]);
ans = lcp;

//cout<<"RMQ_lcp: "<<ans<<endl;

int t = MAXINT;
set<int>::iterator t1 = inset.upper_bound(s1);
set<int>::iterator t2 = inset.upper_bound(s2);
if (t1!=inset.end())
t = t < *t1-s1 ? t : *t1-s1;
if (t2!=inset.end())
t = t < *t2-s2 ? t : *t2-s2;

//cout<<"t: "<<t<<endl;

if (lcp > t-1){
int i = s1 + t;
int j = s2 + t;
int i1 = 0, j1 = 0;
ans = t;
bool flag = true;
while (flag && (ins[j].size() != 0 || ins[i].size() != 0)){
char ic, jc;
if (i1 < ins[i].size()){
ic = ins[i][i1];
i1++;
}
else{
if (i == len) {flag = false;break;}
ic = w[i];
i++;
i1 = 0;
}
//cout<<"ic: "<<ic<<endl;
if (j1 < ins[j].size()){
jc = ins[j][j1];
j1++;
}
else{
if (j == len) {flag = false;break;}
jc = w[j];
j++;
j1 = 0;
}
//cout<<"jc: "<<jc<<endl;

if (ic == jc) ans++;
else flag = false;
//if (i == len || j == len) flag = false;
}
if (!flag) return ans;
else {
return ans+query(i,j);
}
}
else return ans;
}

int main(){
int a,b;
char temp[5];
scanf("%s",w);
getSuffix();
RMQ_init(height,len);
int n;
scanf("%d",&n);
for (int i=0;i<n;++i){
char c;
scanf("%s",temp);
if (temp[0] == 'Q'){
scanf("%d %d",&a,&b);
int ans = query(a-1,b-1);
printf("%d\n", ans);
}
else{
c = getchar();
c = getchar();
scanf("%d",&a);
sinsert(c - 'A' + 1,a);
}
}
return 0;
}


4000
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: