您的位置:首页 > 职场人生

面试题:一千万条短信中,找到重复次数最多的10条

2017-02-28 14:38 429 查看
1 #include<iostream>
2 #include<fstream>
3 #include <string>
4 #include <unordered_map>
5 #include <vector>
6 #include <algorithm>
7 #include <queue>
8
9 using namespace std;
10
11 typedef unsigned long int uint64_t;
12
13 //获取字符串的hash值
14 uint64_t getHash(const string& s) {
15     uint64_t hashVal = 0;
16     uint64_t a = 3589;
17     uint64_t b = 81369;
18     for (char ch : s) {
19         hashVal = hashVal * a + ch;
20         a = a * b;
21     }
22     return hashVal;
23 }
24
25 struct posAndCount {
26     int pos;
27     int count;
28     posAndCount(int a = 0, int b = 0) :
29             pos(a), count(b) {
30     };
31 };
32
33 struct NameHashPosAndCount {
34     uint64_t namehash;
35     int pos;
36     int count;
37
38     friend bool operator <(const NameHashPosAndCount& a,
39             const NameHashPosAndCount& b) {
40         return a.count > b.count;
41     }
42 };
43
44
45 //从N个数据中找出出现次数最多的N个
46 void MfromN() {
47     unordered_map<uint64_t, posAndCount> names;
48     unordered_map<uint64_t, posAndCount>::iterator it;
50     posAndCount pc;
51     int pos = 0;

52     ifstream fin("message.txt");
53     string l;
54     while (fin >> l) {
55         uint64_t hashVal = getHash(l);
56         it = names.find(hashVal);
57         if (it == names.end()) {
58             pc.pos = pos;
59             pc.count = 0;
60             names[hashVal] = pc;
61         } else {
62             names[hashVal].count++;
63         }
64         pos++;
65     }
66     fin.close();
67
68
69     //优先队列寻找出现次数最多的10条
70     priority_queue<NameHashPosAndCount> name10;
71     NameHashPosAndCount n;
72     for (it = names.begin(); it != names.end(); it++) {
73         if (name10.size() < 10) {
74             n.namehash = it->first;
75             n.pos = it->second.pos;
76             n.count = it->second.count;
77             name10.push(n);
78         } else {
79             if (name10.top().count < it->second.count) {
80                 name10.pop();
81                 n.namehash = it->first;
82                 n.pos = it->second.pos;
83                 n.count = it->second.count;
84                 name10.push(n);
85             }
86         }
87     }
88     vector<int> index;
89     while (!name10.empty()) {
90         index.push_back(name10.top().pos);
91         name10.pop();
92     }
93     sort(index.begin(),index.end());
94
95     ifstream fin2("message.txt");
96     int i = 0;
97     int k = 0;
98     while(k < 10 && fin2 >> l){
99         if(i == index[k]){
100             cout << l << endl;
101             k++;
102         }
103         i++;
104     }
105     fin2.close();
106 }
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: 
相关文章推荐