您的位置:首页 > 其它

MapReduce实现之 查找共同好友

2018-01-07 21:42 501 查看
输入:邻接表

100, 200 300 400 500 600
200, 100 300 400
300, 100 200 400 500
400, 100 200 300
500, 100 300
600, 100第一列表示用户,后面的表示用户的好友。
需求:查找两两用户的共同好友。

思路:1、key为两两用户,value为其中一个用户的所有好友

            2、求两个用户所有好友的交集

步骤:1、map:取每一行,组合user和其任一好友为key(key中的两个字段按字典序排列),user的所有好友为value

            2、reduce:求两个用户之间好友的交集

package dabook;

import hadoop.FriendRecom;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Set;
import java.util.TreeSet;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;

public class CommFriend {

public static Set<String> intersect(Set<String> set1, Set<String> set2){
if(set1==null || set2 == null){
return null;
}

Set<String> result = new TreeSet<String>();

Set<String> small = null;
Set<String> big = null;
if(set1.size() < set2.size()){
small = set1;
big = set2;
}
else {
small = set2;
big = set1;
}

for (String String : small) {
if(big.contains(String)){
result.add(String);
}
}
return result;
}

static class MyMapper extends Mapper<LongWritable, Text, Text, Text>{

private static Text outKey = new Text();
private static Text outValue = new Text();

@Override
protected void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException {
String [] input = value.toString().split(",");
if(input.length != 2){
return;
}

outValue.set(input[1]);
String [] sz = input[1].split(" ");
for (String string : sz) {
if(input[0].compareTo(string) < 0){
outKey.set("[" + input[0] + ", " + string + "]");
}
else {
outKey.set("[" + string + ", " + input[0] + "]");
}
context.write(outKey, outValue);
}
}

}

static class MyReducer extends Reducer<Text, Text, Text, Text>{

private Text outKey = new Text();
private Text outValue = new Text();

@Override
protected void reduce(Text key, Iterable<Text> value, Context context)
throws IOException, InterruptedException {

int len = 0;
Set<String> set1 = new TreeSet<String>();
Set<String> set2 = new TreeSet<String>();
ArrayList<String> arrayList = new ArrayList<String>();
for (Text text : value) {
arrayList.add(text.toString());
len++;
}

if(len != 2){
return;
}

String [] sz = arrayList.get(0).split(" ");
for (String s : sz) {
set1.add(s);
}

sz = arrayList.get(1).trim().split(" ");
for (String s : sz) {
set2.add(s);
}

Set<String> res = intersect(set1, set2);
if(res == null){
return;
}
StringBuilder sb = new StringBuilder();
for (String s : res) {
sb.append(s + ", ");
}

String substring = null;
if(sb.length() > 1){
substring = sb.substring(0, sb.length()-2);
}

if(substring != null){
this.outValue.set(substring);
context.write(key, outValue);
}
}
}

private static String inputPath = "dabook/commfriend";
private static String outputPath = "dabook/commfriend-out";

public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
Configuration conf = new Configuration();
String[] otherArgs = new GenericOptionsParser(conf, args)
.getRemainingArgs();
Job job = new Job(conf, "common friend");
job.setJarByClass(CommFriend.class);
job.setMapperClass(MyMapper.class);
job.setReducerClass(MyReducer.class);

job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
FileSystem fs = FileSystem.get(conf);
Path inPath = new Path(inputPath);
if (fs.exists(inPath)) {
FileInputFormat.addInputPath(job, inPath);
}

Path outPath = new Path(outputPath);
fs.delete(outPath, true);
FileOutputFormat.setOutputPath(job, outPath);

System.exit(job.waitForCompletion(true) ? 0 : 1);

}

private void test(){
Set<String> set1 = new TreeSet<String>();
Set<String> set2 = new TreeSet<String>();

set2.add("2");
set2.add("3");
set2.add("4");
set2.add("5");

set1.add("3");
set1.add("4");
set1.add("6");

Set<String> res = intersect(set1, set2);

for (String string : res) {
System.out.println(string);
}

}

}
输出:
[100, 200] 300, 400
[100, 300] 200, 400, 500
[100, 400] 200, 300
[100, 500] 300
[200, 300] 100, 400
[200, 400] 100, 300
[300, 400] 100, 200
[300, 500] 100
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: