您的位置:首页 > 其它

mapreduce应用-找出扣扣共同好友

2017-10-16 20:06 459 查看
需求:找出扣扣共同好友

用户:好友,好友2,…

A:B,C,D,F,E,O

B:A,C,E,K

C:F,A,D,I

D:A,E,F,L

E:B,C,D,M,L

F:A,B,C,D,E,O,M

G:A,C,D,E,F

H:A,C,D,E,O

I:A,O

J:B,O

K:A,C,D

L:D,E,F

M:E,F,G

O:A,H,I,J

思路:先找出哪个好友是哪些用户的好友,比如B是A,E,F,J的共同好友B–>A,E,F,J

写成

<A-E,B><A-f,B><A-J,B>...<用户1-用户2,好友B>


接着传到reduce中为

<用户1-用户2,好友迭代器>
输出成<用户1-用户2,好友1,好友2,好友3...>


程序实现:第一步

public class QQFriendsFindStepOne {
static class QQFriendsFindStepOneMapper extends Mapper<LongWritable, Text, Text, Text>{
@Override
protected void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException {
//读取一行数据  A:B,C,D,F,E,O 用户:好友们
String values= value.toString();
//获取用户和好友
String[] user_friends = values.split(":");
String user = user_friends[0];
String[] friends = user_friends[1].split(",");
//输出<好友,用户>
for (String friend : friends) {
context.write(new Text(friend), new Text(user));
}
}
}
static class QQFriendsFindStepOneReducer extends Reducer<Text, Text, Text, Text>{
//进入的是<好友,拥有该好友的用户的迭代器>
@Override
protected void reduce(Text friend, Iterable<Text> users, Context context)
throws IOException, InterruptedException {
StringBuffer sb = new StringBuffer();
for (Text user : users) {
sb.append(user.toString()+",");
}
//输出<好友,用户1,用户2,...,用户n,><A       C,B,D,F,E,O,>
context.write(friend, new Text(sb.toString()));
}
}
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
Job job = Job.getInstance(conf);
//jar包位置
job.setJarByClass(QQFriendsFindStepOne.class);

job.setMapperClass(QQFriendsFindStepOneMapper.class);
job.setReducerClass(QQFriendsFindStepOneReducer.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);
//设置最终输出类型
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);

FileInputFormat.setInputPaths(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));

boolean ex = job.waitForCompletion(true);
System.exit(ex?0:1);
}
}


测试第一步输出:

将工程打包上传到hadoop集群

hdfs创建文件夹/friends/input

linux编辑文件friends.data,写入好友用户数据

将本地文件上传到/friends/input

运行程序查看输出结果

[root@mini1 ~]# hadoop fs -mkdir -p /friends/input
[root@mini1 ~]# vi friends.data
A:B,C,D,F,E,O
B:A,C,E,K
C:F,A,D,I
D:A,E,F,L
E:B,C,D,M,L
F:A,B,C,D,E,O,M
G:A,C,D,E,F
H:A,C,D,E,O
I:A,O
J:B,O
K:A,C,D
L:D,E,F
M:E,F,G
O:A,H,I,J
[root@mini1 ~]# hadoop fs -put friends.data /friends/input
[root@mini1 ~]# hadoop jar friends.jar com.scu.hadoop.sharefriends.QQFriendsFindStepOne  /friends/input /friends/output
[root@mini1 ~]# hadoop fs -cat /friends/input/part-r-00000
Java HotSpot(TM) Client VM warning: You have loaded library /root/apps/hadoop-2.6.4/lib/native/libhadoop.so.1.0.0 which might have disabled stack guard. The VM will try to fix the stack guard now.
It's highly recommended that you fix the library with 'execstack -c <libfile>', or link it with '-z noexecstack'.
cat: `/friends/input/part-r-00000': No such file or directory
[root@mini1 ~]# hadoop fs -cat /friends/output/part-r-00000
A       I,K,C,B,G,F,H,O,D,
B       A,F,J,E,
C       A,E,B,H,F,G,K,
D       G,C,K,A,L,F,E,H,
E       G,M,L,H,A,F,B,D,
F       L,M,D,C,G,A,
G       M,
H       O,
I       O,C,
J       O,
K       B,
L       D,E,
M       E,F,
O       A,H,I,J,F,


程序实现:第二步

public class QQFriendsFindStepTwo {
static class QQFriendsFindStepTwoMapper extends Mapper<LongWritable, Text, Text, Text>{
//传入数据为第一个程序输出文件的内容
//<好友,用户1,用户2,...,用户n,><A       C,B,D,F,E,O,>
@Override
protected void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException {
String line = value.toString();
//切割,获得用户和好友
String[] friend_users = line.split("\t");
String friend = friend_users[0];
String[] users = friend_users[1].split(",");
//一定要排序,否则可能出现重复的情况,比如<A-B,好友>和<B-A,好友>应该是一样的
//不排序就做为了不同的key传给了reduce
Arrays.sort(users);
StringBuffer sb = new StringBuffer();
//输出<用户a-用户b,好友>
for(int i=0;i<users.length-2;i++){
for(int j=i+1;j<users.length-1;j++){
context.write(new Text(users[i]+"-"+users[j]), new Text(friend));
}
}
}
}
static class QQFriendsFindStepTwoReducer extends Reducer<Text, Text, Text, Text>{
//传入的数据<用户a-用户b,好友迭代器>
@Override
protected void reduce(Text userTuser, Iterable<Text> friends, Context context)
throws IOException, InterruptedException {
StringBuffer sb = new StringBuffer();
for (Text friend : friends) {
sb.append(friend+",");
}
//写出数据格式<用户1-用户2,好友列表><A-B,C,D,E...>
context.write(userTuser, new Text(sb.toString()));
}
}
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
Job job = Job.getInstance(conf);
//jar包位置
job.setJarByClass(QQFriendsFindStepTwo.class);

job.setMapperClass(QQFriendsFindStepTwoMapper.class);
job.setReducerClass(QQFriendsFindStepTwoReducer.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);
//设置最终输出类型
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);

FileInputFormat.setInputPaths(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));

boolean ex = job.waitForCompletion(true);
System.exit(ex?0:1);
}
}


第二步测试:

重新将工程打包上传到hadoop集群

将第一步的输出文件作为该步的输入文件

[root@mini1 ~]# hadoop jar friends.jar com.scu.hadoop.sharefriends.QQFriendsFindStepTwo  /friends/output/part-r-00000 /friends/output2
[root@mini1 ~]# hadoop fs -cat /friends/output2/part-r-00000
A-B     C,E,
A-C     F,D,
A-D     E,F,
A-E     B,C,D,
A-F     C,D,B,E,O,
A-G     D,E,F,C,
A-H     E,O,C,D,
A-I     O,
A-K     D,
A-L     F,E,
B-C     A,
B-D     E,A,
B-E     C,
B-F     E,A,C,
B-G     C,E,A,
B-H     E,C,A,
B-I     A,
B-K     A,
B-L     E,
C-D     F,A,
C-E     D,
C-F     D,A,
C-G     F,A,D,
C-H     A,D,
C-I     A,
C-K     D,A,
C-L     F,
D-F     E,A,
D-G     A,E,F,
D-H     A,E,
D-I     A,
D-K     A,
D-L     F,E,
E-F     C,D,B,
E-G     D,C,
E-H     D,C,
E-K     D,
F-G     C,E,D,A,
F-H     C,A,D,E,O,
F-I     A,O,
F-K     D,A,
F-L     E,
G-H     D,E,C,A,
G-I     A,
G-K     A,D,
G-L     F,E,
H-I     A,O,
H-K     A,D,
H-L     E,
I-K     A,
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: