您的位置:首页 > 编程语言 > C语言/C++

C++里创建 Trie字典树(中文词典)(三)(联想)

2016-12-05 15:14 465 查看
  萌新做词典第三篇,做得不好,还请指正,谢谢大佬!

  今天把词典的联想做好了,也是比较low的,还改了之前的查询、遍历等代码。 Orz

  一样地先放上运行结果:

1 test1
2 ID : 2    char : 件    word : 编程软件
3 ID : 3    char : 习    word : 编程学习
4 ID : 4    char : 站    word : 编程学习网站
5 ID : 1    char : 门    word : 编程入门
6
7 test2
8 ID : 5    char : 练    word : 编程训练
9 ID : 1    char : 门    word : 编程入门
10 ID : 3    char : 习    word : 编程学习
11 ID : 4    char : 站    word : 编程学习网站
12 ID : 2    char : 件    word : 编程软件
13 find ID : 3    word : 编程学习
14
15 associate "编程" :
16 find!
17 训练
18 入门
19 学习
20 学习网站
21 软件


  测试用的test.cc

1 #include "Dictionary.h"
2 #include <iostream>
3 #include <fstream>
4 #include <string>
5 #include <json/json.h>
6
7 namespace ccx{
8
9 using std::endl;
10 using std::cout;
11 using std::pair;
12 using std::ofstream;
13 using std::ifstream;
14
15 Dictionary::Dictionary()
16 : _dictionary(new DictElem)
17 , _conf()
18 {
19     _dictionary->_wordId = 0;
20     _pcur = _dictionary;
21 }
22
23 void Dictionary::splitWord(const string & word, vector<string> & characters)
24 {
25     int num = word.size();
26     int i = 0;
27     while(i < num)
28     {
29         int size = 1;
30         if(word[i] & 0x80)
31         {
32             char temp = word[i];
33             temp <<= 1;
34             do{
35                 temp <<= 1;
36                 ++size;
37             }while(temp & 0x80);
38         }
39         string subWord;
40         subWord = word.substr(i, size);
41         characters.push_back(subWord);
42         i += size;
43     }
44 }
45
46 void Dictionary::AddWord(const string & word, int wordId)
47 {
48     vector<string> characters;
49     splitWord(word, characters);
50
51     vector<string>::iterator it_char;
52     it_char = characters.begin();
53     pDictElem root;
54     root = _dictionary;
55     for(; it_char != characters.end(); ++it_char)
56     {
57         WordIt it_word;
58         it_word = root->_words.find(*it_char);
59
60         if(it_word == root->_words.end())
61         {
62             pair<string, pDictElem> temp;
63             temp.first = *it_char;
64             pDictElem dictemp(new DictElem);
65             dictemp->_word = *it_char;
66             dictemp->_wordId = 0;
67             temp.second = dictemp;
68             root->_words.insert(temp);
69             root = dictemp;
70         }else{
71             root = it_word->second;
72         }
73     }
74     if(!root->_wordId)
75     {
76         root->_wordId = wordId;
77     }
78 }
79
80 void Dictionary::push(const string & word)
81 {
82     ++(_dictionary->_wordId);
83     AddWord(word, _dictionary->_wordId);
84 }
85
86 void Dictionary::push(vector<string> & words)
87 {
88     int size = words.size();
89     for(int i = 0; i < size; ++i)
90     {
91         push(words[i]);
92     }
93 }
94
95 int Dictionary::search(const string & word)
96 {
97     pDictElem root = _dictionary;
98     vector<string> temp;
99     splitWord(word, temp);
100
101     int ret = search(temp, root);
102     int size = temp.size();
103     if(ret != size)
104     {
105         return -1;
106     }
107     return root->_wordId;
108 }
109
110 int Dictionary::search(vector<string> & characters, pDictElem & root)
111 {
112     vector<string>::iterator it_char;
113     it_char = characters.begin();
114     root = _dictionary;
115     int i = 0;
116     for(; it_char != characters.end(); ++it_char, ++i)
117     {
118         WordIt it_word;
119         it_word = root->_words.find(*it_char);
120
121         if(it_word == root->_words.end())
122         {
123             break;
124         }else{
125             root = it_word->second;
126         }
127     }
128     return i;
129 }
130
131 bool Dictionary::associate(const string & word, vector<string> & data)
132 {
133     pDictElem root = _dictionary;
134     vector<string> temp;
135     splitWord(word, temp);
136
137     int ret = search(temp, root);
138     int size = temp.size();
139     if(ret != size)
140     {
141         return false;
142     }
143
144     list<WordIt> stackWord;
145     list<pDictElem> stackDict;
146     next(root, stackWord, stackDict);
147     while(root)
148     {
149         string temp = getCurWord(stackWord);
150         data.push_back(temp);
151         next(root, stackWord, stackDict);
152     }
153
154     if(!data.size())
155     {
156         return false;
157     }
158     return true;
159 }
160
161 //遍历用
162
163 void Dictionary::resetPoint(pDictElem pcur)
164 {
165     _pcur = pcur;
166     if(_stackDict.size())
167     {
168         _stackDict.clear();
169     }
170     if(_stackWord.size())
171     {
172         _stackWord.clear();
173     }
174     next();
175 }
176
177 void Dictionary::resetIt()
178 {
179     resetPoint(_dictionary);
180 }
181
182 void Dictionary::next()
183 {
184     next(_pcur, _stackWord, _stackDict);
185 }
186
187 void Dictionary::next(pDictElem & pcur, list<WordIt> & stackWord, list<pDictElem> & stackDict)
188 {
189     while(pcur)
190     {
191         nextWord(pcur, stackWord, stackDict);
192         if(!pcur || pcur->_wordId)
193         {
194             break;
195         }
196     }
197 }
198
199 void Dictionary::nextWord(pDictElem & pcur, list<WordIt> & stackWord, list<pDictElem> & stackDict)
200 {
201     if(pcur)
202     {
203         if(pcur->_words.size())
204         {
205             stackDict.push_back(pcur);
206             stackWord.push_back(pcur->_words.begin());
207             pcur = stackWord.back()->second;
208         }else{
209             ++(stackWord.back());
210         }
211         while(stackWord.back() == stackDict.back()->_words.end())
212         {
213             stackDict.pop_back();
214             stackWord.pop_back();
215             if(!stackDict.size())
216             {
217                 pcur = NULL;
218             }
219             ++(stackWord.back());
220         }
221         if(pcur)
222         {
223             pcur = stackWord.back()->second;
224         }
225     }
226 }
227
228 string Dictionary::getCurChar()
229 {
230     return _pcur->_word;
231 }
232
233 int Dictionary::getCurWordId()
234 {
235     return _pcur->_wordId;
236 }
237
238 string Dictionary::getCurWord()
239 {
240     return getCurWord(_stackWord);
241 }
242
243 string Dictionary::getCurWord(list<WordIt> & stackWord)
244 {
245     string temp;
246     list<WordIt>::iterator it_word;
247     it_word = stackWord.begin();
248
249     for(; it_word != stackWord.end(); ++it_word)
250     {
251         temp += (*it_word)->first;
252     }
253     return temp;
254 }
255
256 bool Dictionary::isEnd()
257 {
258     return _pcur == NULL;
259 }
260
261 void Dictionary::leading_in()//导入,失败没必要退出程序
262 {
263     ifstream ifs;
264     const char * path = _conf.getDictionaryPath().c_str();
265     ifs.open(path);
266     if(!ifs.good())
267     {
268         cout << "open Dictionary.json error(leading_in)" << endl;
269     }else{
270         Json::Value root;
271         Json::Reader reader;
272
273         if(!reader.parse(ifs, root, false))
274         {
275             cout << "json read Dictionary.json error" << endl;
276         }else{
277             int size = root.size();
278             for(int i = 0; i < size; ++i)
279             {
280                 string word = root[i]["Word"].asString();
281                 int wordId = root[i]["WordId"].asInt();
282                 AddWord(word, wordId);
283                 ++(_dictionary->_wordId);
284             }
285         }
286     }
287 }
288
289 void Dictionary::leading_out()
290 {
291     Json::Value root;
292     Json::FastWriter writer;
293
294     resetIt();
295
296     while(!isEnd())
297     {
298         Json::Value elem;
299         elem["Word"] = getCurWord();
300         elem["WordId"] = getCurWordId();
301         root.append(elem);
302         next();
303     }
304
305     string words;
306     words = writer.write(root);
307
308     ofstream ofs;
309     const char * path = _conf.getDictionaryPath().c_str();
310     ofs.open(path);
311     if(!ofs.good())
312     {
313         cout << "open Dictionary.json error(leading_out)" << endl;
314         ofs.open("Dictionary.tmp");
315         if(!ofs.good())
316         {
317             exit(EXIT_FAILURE);
318         }
319     }
320
321     ofs << words;
322     ofs.close();
323 }
324
325 }


View Code
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: