您的位置:首页 > 编程语言 > C语言/C++

测测试代码高亮:Windiff 原理初探(C++源码)

2008-05-25 20:24 477 查看
C++语言: Windiff 原理初探(C++源码)
001 //"Windiff 原理初探(C++源码)
002 //详细说明文章参见:http://www.2maomao.com/blog/how-windiff-works-continued-1/
003
004 // mydiff.cpp
005 //
006 #include "stdafx.h"
007 #include <BaseTsd.h>
008 #include <iostream>
009 #include <fstream>
010 #include <string>
011 #include <map>
012 #include <utility>
013 #include <vector>
014 #include <functional>
015 #include <algorithm>
016 using namespace std;
017
018 bool MyPairCompFirst( pair<int, int> elem1, pair<int, int> elem2 )
019 {
020 return elem1.first < elem2.first;
021 }
022
023 bool MyPairCompSecond( pair<int, int> elem1, pair<int, int> elem2 )
024 {
025 return elem1.second < elem2.second;
026 }
027
028 int BiSearch(vector< pair<int, int> > &valPairs, int value)
029 {
030 int left = 0;
031 int right = valPairs.size() - 1;
032 int mid = 0;
033 while (left <= right)
034 {
035 mid = (left + right) / 2;
036 if (valPairs[mid].first == value)
037 return mid;
038
039 if (valPairs[mid].first < value)
040 left = mid + 1;
041 else
042 right = mid - 1;
043 }
044 return -1;
045 }
046
047 int _tmain(int argc, _TCHAR* argv[])
048 {
049 vector<int> valsNew;
050 vector<pair<int, int>> valsOld;
051 vector<pair<int, int>> valsOldBackup;
052
053 if (argc != 3)
054 {
055 printf(" Usage:/n");
056 printf(" mydiff fileNew fileOld/n");
057 return 0;
058 } else
059 {
060 //read the two input file build the hash table, turn string into values for compare
061 string line;
062 ifstream finNew(argv[1]);
063 if (!finNew)
064 {
065 cout << "failed to open file:" << argv[1] << endl;
066 exit(0);
067 }
068
069 map<string, int> diffLines;
070 char ch[10001];
071 while (finNew.getline(ch, 10000))
072 {
073 line = ch;
074 const char* p = line.c_str();
075 //chop off the leading and ending blank chars
076 int pos=0;
077 while (pos < (int)line.size() && (line[pos] == ' ' || line[pos] == '/t')) pos++;
078 int posLeft = pos;
079 pos = (int)line.size() - 1;
080 while (pos >= 0 && (line[pos] == ' ' || line[pos] == '/t')) pos--;
081 string temp = line.substr(posLeft, pos-posLeft);
082 //@@@@ if (temp.empty()) continue; //blank lines is not counted for this special case
083
084 if (diffLines.find(line) == diffLines.end())
085 diffLines[line] = diffLines.size(); //use size() as the unique value
086 valsNew.push_back(diffLines[line]);
087 }
088 finNew.close();
089
090 ifstream finOld(argv[2]);
091 if (!finOld)
092 {
093 cout << "failed to open file:" << argv[2] << endl;
094 exit(0);
095 }
096 while (finOld.getline(ch, 10000))
097 {
098 line = ch;
099 //chop off the leading and ending blank chars
100 int pos=0;
101 while (pos < line.size() && (line[pos] == ' ' || line[pos] == '/t')) pos++;
102 int posLeft = pos;
103 pos = line.size() - 1;
104 while (pos >= 0 && (line[pos] == ' ' || line[pos] == '/t')) pos--;
105 string temp = line.substr(posLeft, pos-posLeft);
106 //@@@@ if (temp.empty()) continue; //blank lines is not counted for this special case
107
108 if (diffLines.find(line) == diffLines.end())
109 diffLines[line] = diffLines.size(); //use size() as the unique value
110 valsOld.push_back(make_pair(diffLines[line], valsOld.size()));
111 }
112 finOld.close();
113 diffLines.clear();
114 } //Here diffLines and file handles should be released
115 valsOldBackup = valsOld;
116
117 //use the greedy method, each step we should find the most "RECENT" equal lines
118
119 //Sort the old file values, so we can use bi-search against it when we want to compare
120 sort(valsOld.begin(), valsOld.end(), MyPairCompFirst);
121
122 int posNew = 0;
123
124 //Search for the next
125 vector<int> genLinesNew;
126 vector<int> genLinesOld;
127 int searchedOldLineCount = 0;
128 while (posNew < valsNew.size() && valsOld.size() > 0)
129 {
130 // get the next equal one
131 // Calculate the "distance", (leftHand^2 + rightHand^2)
132 int leftHand = 0;
133 int rightHand = 0;
134 INT64 minValueBar = _I64_MAX;
135
136 bool bFound = false;
137 int lastFoundLeftHand = 0;
138 int lastFoundRightHand = 0;
139 while (leftHand + posNew < valsNew.size())
140 {
141 if (leftHand * leftHand > minValueBar) break; //Found! We can stop now!
142
143 int target = valsNew[posNew + leftHand];
144 int pos = BiSearch(valsOld, target);
145 if (pos < 0)
146 {
147 leftHand++;
148 continue;
149 }
150
151 //found a match
152 //get the most "recent" match
153 int posMin = INT_MAX;
154 int minPos = pos;
155 while (pos >= 0 && valsOld[pos].first == target)
156 {
157 if (valsOld[pos].second < posMin)
158 {
159 posMin = valsOld[pos].second;
160 minPos = pos;
161 }
162 pos--;
163 }
164 pos = minPos;
165
166 //process the current match
167 rightHand = valsOld[pos].second - searchedOldLineCount;
168 if (minValueBar > ((INT64)(leftHand)) * leftHand + rightHand * rightHand)
169 {
170 bFound = true;
171 lastFoundLeftHand = leftHand;
172 lastFoundRightHand = rightHand;
173 minValueBar = ((INT64)(leftHand)) * leftHand + rightHand * rightHand;
174 }
175 leftHand++;
176 }
177
178 if (bFound)
179 {
180 //@@@@@@@
181 leftHand = lastFoundLeftHand;
182 rightHand = lastFoundRightHand;
183 int left1 = posNew;
184 int left2 = left1 + leftHand;
185 int right1 = searchedOldLineCount;
186 int right2 = right1 + rightHand;
187 //@@@@Add?
188 //@@@@Delete?
189 //@@@@Change?
190 // printf("%d,%d,c,%d,%d,/n", left1, left2, right1, right2);
191 posNew += leftHand + 1;
192 //delete the searched old lines from the rest old lines
193 for (int i=searchedOldLineCount; i<searchedOldLineCount+rightHand+1; i++)
194 {
195 int target = valsOldBackup[i].first;
196 int pos = BiSearch(valsOld, target);
197 if (pos < 0)
198 {
199 printf ("ERROR! this should not happen!, something may be wrong in previous analysis!/n");
200 printf ("i=[%d], searchedOldLineCount=[%d]/n", i, searchedOldLineCount);
201 exit(1);
202 }
203
204 int posMin = INT_MAX;
205 int minPos = pos;
206 while (pos >= 0 && valsOld[pos].first == target)
207 {
208 if (valsOld[pos].second < posMin)
209 {
210 posMin = valsOld[pos].second;
211 minPos = pos;
212 }
213 pos--;
214 }
215 pos = minPos;
216
217 valsOld.erase(valsOld.begin() + pos);
218 }
219 searchedOldLineCount += rightHand + 1;
220 } else
221 {
222 //@@@@ should output the rest RightHand and LeftHand values as changed from L->R
223 printf("%d,%d,c,%d,%d,/n", posNew, valsNew.size() - 1, searchedOldLineCount, valsOldBackup.size() - 1);
224 break;
225 }
226 }
227 system("PAUSE");
228
229 //@@@@
230 //print out in batch file format to support console
231 }
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: