POJ3450:Corporate Identity(后缀数组)
2015-06-09 19:50
381 查看
Description
Beside other services, ACM helps companies to clearly state their “corporate identity”, which includes company logo but also other signs, like trademarks. One of such companies is Internet Building Masters (IBM), which has recently asked ACM for a help with
their new identity. IBM do not want to change their existing logos and trademarks completely, because their customers are used to the old ones. Therefore, ACM will only change existing trademarks instead of creating new ones.
After several other proposals, it was decided to take all existing trademarks and find the longest common sequence of letters that is contained in all of them. This sequence will be graphically emphasized to form a new logo. Then, the old trademarks may
still be used while showing the new identity.
Your task is to find such a sequence.
Input
The input contains several tasks. Each task begins with a line containing a positive integer N, the number of trademarks (2 ≤ N ≤ 4000). The number is followed by N lines, each containing one trademark. Trademarks will be composed only from lowercase letters,
the length of each trademark will be at least 1 and at most 200 characters.
After the last trademark, the next task begins. The last task is followed by a line containing zero.
Output
For each task, output a single line containing the longest string contained as a substring in all trademarks. If there are several strings of the same length, print the one that is lexicographically smallest. If there is no such non-empty string, output
the words “IDENTITY LOST” instead.
Sample Input
Sample Output
Source
CTU Open 2007
题意:求多个字符串的最长公共子串
思路:虽然这道题的最优解不是后缀数组,但是还是学习一下后缀数组的做法,还是和以前一样二分长度,然后再分开计数
Beside other services, ACM helps companies to clearly state their “corporate identity”, which includes company logo but also other signs, like trademarks. One of such companies is Internet Building Masters (IBM), which has recently asked ACM for a help with
their new identity. IBM do not want to change their existing logos and trademarks completely, because their customers are used to the old ones. Therefore, ACM will only change existing trademarks instead of creating new ones.
After several other proposals, it was decided to take all existing trademarks and find the longest common sequence of letters that is contained in all of them. This sequence will be graphically emphasized to form a new logo. Then, the old trademarks may
still be used while showing the new identity.
Your task is to find such a sequence.
Input
The input contains several tasks. Each task begins with a line containing a positive integer N, the number of trademarks (2 ≤ N ≤ 4000). The number is followed by N lines, each containing one trademark. Trademarks will be composed only from lowercase letters,
the length of each trademark will be at least 1 and at most 200 characters.
After the last trademark, the next task begins. The last task is followed by a line containing zero.
Output
For each task, output a single line containing the longest string contained as a substring in all trademarks. If there are several strings of the same length, print the one that is lexicographically smallest. If there is no such non-empty string, output
the words “IDENTITY LOST” instead.
Sample Input
3 aabbaabb abbababb bbbbbabb 2 xyz abc 0
Sample Output
abb IDENTITY LOST
Source
CTU Open 2007
题意:求多个字符串的最长公共子串
思路:虽然这道题的最优解不是后缀数组,但是还是学习一下后缀数组的做法,还是和以前一样二分长度,然后再分开计数
#include <iostream> #include <stdio.h> #include <string.h> #include <stack> #include <queue> #include <map> #include <set> #include <vector> #include <math.h> #include <bitset> #include <algorithm> #include <climits> using namespace std; #define LS 2*i #define RS 2*i+1 #define UP(i,x,y) for(i=x;i<=y;i++) #define DOWN(i,x,y) for(i=x;i>=y;i--) #define MEM(a,x) memset(a,x,sizeof(a)) #define W(a) while(a) #define gcd(a,b) __gcd(a,b) #define LL long long #define N 1000005 #define MOD 1000000007 #define INF 0x3f3f3f3f #define EXP 1e-8 int wa ,wb ,wsf ,wv ,sa ; int rank ,height ,s ; //sa:字典序中排第i位的起始位置在str中第sa[i] //rank:就是str第i个位置的后缀是在字典序排第几 //height:字典序排i和i-1的后缀的最长公共前缀 int cmp(int *r,int a,int b,int k) { return r[a]==r[b]&&r[a+k]==r[b+k]; } void getsa(int *r,int *sa,int n,int m)//n要包含末尾添加的0 { int i,j,p,*x=wa,*y=wb,*t; for(i=0; i<m; i++) wsf[i]=0; for(i=0; i<n; i++) wsf[x[i]=r[i]]++; for(i=1; i<m; i++) wsf[i]+=wsf[i-1]; for(i=n-1; i>=0; i--) sa[--wsf[x[i]]]=i; p=1; j=1; for(; p<n; j*=2,m=p) { for(p=0,i=n-j; i<n; i++) y[p++]=i; for(i=0; i<n; i++) if(sa[i]>=j) y[p++]=sa[i]-j; for(i=0; i<n; i++) wv[i]=x[y[i]]; for(i=0; i<m; i++) wsf[i]=0; for(i=0; i<n; i++) wsf[wv[i]]++; for(i=1; i<m; i++) wsf[i]+=wsf[i-1]; for(i=n-1; i>=0; i--) sa[--wsf[wv[i]]]=y[i]; t=x; x=y; y=t; x[sa[0]]=0; for(p=1,i=1; i<n; i++) x[sa[i]]=cmp(y,sa[i-1],sa[i],j)? p-1:p++; } } void getheight(int *r,int n)//n不保存最后的0 { int i,j,k=0; for(i=1; i<=n; i++) rank[sa[i]]=i; for(i=0; i<n; i++) { if(k) k--; else k=0; j=sa[rank[i]-1]; while(r[i+k]==r[j+k]) k++; height[rank[i]]=k; } } char str ,ans ; int id ,vis[4005]; bool check(int mid,int n,int k) { int i,j,cnt = 0; MEM(vis,0); for(i = 2; i<=n; i++) { if(height[i]<mid) { MEM(vis,0); cnt = 0; continue; } if(!vis[id[sa[i-1]]]) { cnt++; vis[id[sa[i-1]]] = 1; } if(!vis[id[sa[i]]]) { cnt++; vis[id[sa[i]]] = 1; } if(cnt == k) { for(j = 0; j<mid; j++) ans[j] = s[sa[i]+j]; ans[mid] = '\0'; return 1; } } return 0; } int main() { int n,i,j,k,len; while(~scanf("%d",&k),k) { n = 0; for(i = 0; i<k; i++) { scanf("%s",str); len = strlen(str); for(j = 0; j<len; j++) { s = str[j]; id = i; n++; } s = '#'+i; id = '#'+i; n++; } s = 0; getsa(s,sa,n+1,5000); getheight(s,n); int l = 1,r = len,mid,flag = 0; while(l<=r) { mid = (l+r)/2; if(check(mid,n,k)) { flag = 1; l=mid+1; } else r=mid-1; } if(flag) printf("%s\n",ans); else printf("IDENTITY LOST\n"); } return 0; }
相关文章推荐
- js上传图片预览实现
- 论如何搞定可恶的口腔溃疡
- heap&stack
- 20150608位操作
- thinkphp使用flash上传验证登录问题
- MAC pro 安装和使用『brew』
- 《摇滚南京》——"人生下来就是孤独"
- poj - 3041 Asteroids (二分图最大匹配+匈牙利算法)
- 随笔3
- Java笔记01 编程基础与java入门
- WebBrowser之获取跳转页面的Document接口源码
- css3处理sprite背景图压缩来解决H5网页在手机浏览器下图标模糊的问题
- LevelDB场景分析2--Open
- 有序向量Vector
- 苹果应用商店审核指南
- 【Linux】 MySQL
- 新锐房地产销售管理系统(部分流程)技术解释(五) 销售管理_客户登记
- Github托管代码步骤
- c#选择填空题题库
- git乱码解决方案汇总