您的位置:首页 > 其它

最长公共子序列问题的改进算法

2012-07-05 18:06 204 查看
不同于动态规划法的一种新的求解最长公共子序列问题的方法,该算法主要是把求解公共字符串问题转化为求解矩阵L(p,m)的问题,在利用定理求解矩阵的元素过程中(1)while(i<k),L(k,i)=null,

(2)while(L(k,i)=k),L(k,i+1)=L(k,i+2)=…L(k,m)=k;

求出每列元素,一直到发现第p+1 行都为null 时退出循环,得出矩阵L(k,m)后,B[L(1,m-p+1)]B[L(2,m-p+2)]…B[L(p,m)]即为A 和B 的LCS,其中p 为LCS 的长度。



public function construct_LCS_Matrix(str1:String,str2:String):Array
{
const INT_MAX:int = 999999;
var matrix:Array=[[]];
var m:int = str1.length;
var n:int = str2.length;
var k:int;
for(k=0;k<=m;k++)
{
matrix[0][k]=-1;
}
for(k=1;k<=m;k++)
{
matrix[k]=[];
for(var i:int=0;i<=m;i++)
{
matrix[k][i] = INT_MAX;
if(i<k)
{
continue;
}
if(matrix[k][i-1]==k)
{
matrix[k][i] = k;
continue;
}
for(var j:int=0;j<n;j++)
{
if(str1.charAt(i-1)==str2.charAt(j)
&& j > matrix[k-1][i-1])
{
if(matrix[k][i-1]>-1)
{
matrix[k][i] = (j<matrix[k][i-1] ? j : matrix[k][i-1]);
}
else
{
matrix[k][i] = j;
}
break;
}
}
}
if(matrix[k][m]==INT_MAX)
{
break;
}
}
return matrix;
}


var a:String = 'gawegweew';
var b:String = 'egwaeweewwfgewa';
var arr:Array = ac.construct_LCS_Matrix(a,b);
var indexes:Array=[];
var subLen:int = arr[0].length-1;
for(i=0;i<arr.length-2;i++)
{
indexes.push(b.charAt(arr[arr.length-2-i][subLen-i]));
}
indexes.reverse();
trace(indexes.join(''));


参考Url:/article/1361446.html

javascript 版本

function LCS(A, B) {
var m = A.length;
var n = B.length;
var L = [[]];
var maxkk = [];
for (var i=0; i<=m; i++) {
L[0][i] = 0;
if (L[i] == null) {
L[i] = [];
}
L[i][0] = 0;
maxkk[i] = 0;
}
var k=1;
for (k=1; k<=m; k++) {
for (var i=1; i<=m; i++) {
L[k][i] = L[k][i-1];
if (i < k) {
L[k][i] = 0; // 无效值 //i<k 时,L(k,i)=null,N 代表无穷大
continue;
}
if (L[k][i-1]==k && i <= m) {
L[k][i]=k; //L(k,i)=k 时,L(k,i+1)=L(k,i+2)=…L(k,m)=k
maxkk[i]=k;
continue;
}
if (i > k && L[k][k]==0) {
continue;
}

for (var j = 1; j <= n; j ++) {
//定理4 的实现   递增的方式
if (A[i-1]==B[j-1]) {
// console.log("ding ling4: " + j + " " + maxkk[i])
if (j > maxkk[i]) {
if (L[k][i-1] > 0) {
L[k][i] = j < L[k][i-1] ? j : L[k][i-1];
} else {
L[k][i] = j;
}
maxkk[i] = L[k][i];
break;
}
}
} // for j
if (L[k][i] < L[k-1][i-1]) {
L[k][i] = 0;
}
} // for i
// console.log(L[k].join(","));
} // for k
// console.log("LCS matrix getted " + L.length);
var L2 = [];
for (var i=0; i<L.length; i++) {
if (i > 0) {
L[i].splice(0,1);
// console.log(L[i].join(","));
var sum = 0;
for (var j=0; j<L[i].length; j++) {
sum += L[i][j];
}
if (sum > 0) {
L2.push(L[i]);
// console.log(L[i].join(","));
}
}
}

function getMaximumSequence(userInput, answer) {
var userInputArr = userInput.split(" ");
var answerArr = answer.split(" ");
var matrix = LCS(userInputArr, answerArr);
if (matrix[0] == null) {
return "";
}
var subLen = matrix[0].length - 1;
var maximumSubs = [];
for (var j=0; j<matrix.length; j++) {
var zeroAppeared = false;
maximumSubs=[];
for(var i=0; i<matrix.length; i++) {
var line = matrix.length-1-i-j;
if (line < 0) {
break;
}
var index = matrix[line][subLen-i];
if (index == 0) {
zeroAppeared = true;
// console.log("zeroAppeared");
break;
}
maximumSubs.push(answerArr[index - 1]);
}
if (!zeroAppeared) {
break;
}
}

maximumSubs.reverse();
return maximumSubs.join(" ");
}


相关函数

var MISSING = 1;
var INCORRECT = 2;
var NEEDLESS = 3;
function verifyUserInput(userInput, answer) {
resultBox.innerHTML = answer;

var input = removeSymbols(userInput);
var rightOne = removeSymbols(answer);
var intersect = getMaximumSequence(input, rightOne);
console.log("intersect:"+intersect);
var inputArr = input.split(' ');
var rightArr = rightOne.split(' ');
var intersectArr = intersect.split(' ');
var len = inputArr.length;
if (len < rightArr.length) {
len = rightArr.length;
}
var errorObjMap = {};
var errorMapA = getErrorMap(inputArr, intersectArr);
var errorMapB = getErrorMap(rightArr, intersectArr);
for (var key in errorMapA) {
var error = {};
errorObjMap[key] = error;
error.incorrect = errorMapA[key];
if (errorMapB[key] == null) {
error.type = NEEDLESS;
} else {
error.correct = errorMapB[key];
error.type = INCORRECT;
}
}

for (var key in errorMapB) {
if (errorMapA[key] == null) {
var error = {};
error.correct = errorMapB[key];
error.type = MISSING;
errorObjMap[key] =  error;
}
}

var retStr="";
var errorStatistics = [[],[],[],[]];
for (var i=0; i<=intersectArr.length; i++) {
var error = errorObjMap[i];
if (error != null) {
switch (error.type) {
case MISSING:
retStr += " <b><font color='#009900'>" + error.correct + "</font></b> ";
errorStatistics[MISSING].push(error.correct);
break;
case INCORRECT:
retStr += " <font color='#990000'><del>" + error.incorrect + "</del></font color='#990000'> " +
"<b><font color='#009900'>" + error.correct + "</font></b> ";
errorStatistics[INCORRECT].push(error.correct);
break;
case NEEDLESS:
retStr += " <b><font color='#999900'><del>" + error.incorrect + "</del></font></b> ";
errorStatistics[NEEDLESS].push(error.incorrect);
break;
}
}
renderStatistics(errorStatistics);
var correct = intersectArr[i];
if (correct != null) {
retStr += " " + correct;
}
}
inputResult.innerHTML = retStr;
}

function test() {
var a = removeSymbols("OK, let's all heave it up at the same time. ");
var b = removeSymbols("ok let's   all  heave it up at the same time");
// console.log(getMaximumSequence("A B C A D F A G B", "A B H A I J K A G B"));
// console.log(getMaximumSequence("a b c d f g h i j k m", "a b c e f g h i j k n"));
// console.log(getMaximumSequence(a, b));
verifyUserInput(b, a);
}


function getErrorMap(strArr, intersectArr) {
var errorMap = {};
var index = 0;
for (var i=0; i<strArr.length; i++) {
if (index < intersectArr.length) {
if (intersectArr[index] != strArr[i]) {
if (errorMap[index] != null) {
errorMap[index] = errorMap[index] + " " + strArr[i];
} else {
errorMap[index] = strArr[i];
}
} else {
index++;
}
} else {
if (errorMap[index] != null) {
errorMap[index] = errorMap[index] + " " + strArr[i];
} else {
errorMap[index] = strArr[i];
}
}
}
return errorMap;
}

function removeSymbols(input) {
var str = input.trim();
str = str.replace(/\s+/g, ' '); //多个连续空格替换为一个空格
var arr = [];
for (var i=0; i<str.length; i++) {
var chr = str.charAt(i);
if (IsAlpha(chr)) {
arr.push(chr.toLocaleLowerCase());
}
if (chr == ' ' || chr == '-' || chr == '"' || chr == "'") {
arr.push(chr);
}
}
return arr.join('');
}

function IsAlpha(cCheck) {
return ((('a'<=cCheck) && (cCheck<='z')) || (('A'<=cCheck) && (cCheck<='Z')));
}

function getMaximumSequence(userInput, answer) {
var userInputArr = userInput.split(" ");
var answerArr = answer.split(" ");
var matrix = LCS(userInputArr, answerArr);
if (matrix[0] == null) {
return "";
}
var subLen = matrix[0].length - 1;
var maximumSubs = [];
for (var j=0; j<matrix.length; j++) {
var zeroAppeared = false;
maximumSubs=[];
for(var i=0; i<matrix.length; i++) {
var line = matrix.length-1-i-j;
if (line < 0) {
break;
}
var index = matrix[line][subLen-i];
if (index == 0) {
zeroAppeared = true;
// console.log("zeroAppeared");
break;
}
maximumSubs.push(answerArr[index - 1]);
}
if (!zeroAppeared) {
break;
}
}

maximumSubs.reverse();
return maximumSubs.join(" ");
}

function renderStatistics(errorData) {
wrongSpellSp.innerHTML= errorData[INCORRECT].length + " " + errorData[INCORRECT].join(',');
missSpellSp.innerHTML= errorData[MISSING].length + " " + errorData[MISSING].join(',');
needlessSpellSp.innerHTML= errorData[NEEDLESS].length + " " + errorData[NEEDLESS].join(',');
}
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: