您的位置:首页 > 其它

mapred找共同朋友,数据格式如下

2015-06-27 15:47 387 查看
import java.io.IOException;

import java.util.Set;

import java.util.StringTokenizer;

import java.util.TreeSet;

import org.apache.hadoop.conf.Configuration;

import org.apache.hadoop.fs.Path;

import org.apache.hadoop.io.Text;

import org.apache.hadoop.mapreduce.Job;

import org.apache.hadoop.mapreduce.Mapper;

import org.apache.hadoop.mapreduce.Reducer;

import org.apache.hadoop.mapreduce.Mapper.Context;

import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;

import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import org.apache.hadoop.util.GenericOptionsParser;

public class FindFriend {

public static class ChangeMapper extends Mapper<Object, Text, Text, Text> {

@Override

public void map(Object key, Text value, Context context) throws IOException, InterruptedException {

//对哪一行进行切分

StringTokenizer itr = new StringTokenizer(value.toString());

//自己

Text owner = new Text();

//排序set

Set<String> set = new TreeSet<String>();

//设置第一个key 比如A

owner.set(itr.nextToken());

while (itr.hasMoreTokens()) {

//然后set集合放下全部的,去重了

set.add(itr.nextToken());

}

//朋友

String[] friends = new String[set.size()];

friends = set.toArray(friends);

//转为数组

for (int i = 0; i < friends.length; i++) {

for (int j = i + 1; j < friends.length; j++) {

String outputkey = friends[i] + friends[j];

context.write(new Text(outputkey), owner);

}

}

}

}

public static class FindReducer extends Reducer<Text, Text, Text, Text> {

public void reduce(Text key, Iterable<Text> values,

Context context) throws IOException, InterruptedException {

String commonfriends = "";

for (Text val : values) {

if (commonfriends == "") {

commonfriends = val.toString();

} else {

commonfriends = commonfriends + ":" + val.toString();

}

}

context.write(key, new Text(commonfriends));

}

}

public static void main(String[] args) throws IOException,

InterruptedException, ClassNotFoundException {

Configuration conf = new Configuration();

String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();

if (otherArgs.length < 2) {

System.err.println("args error");

System.exit(2);

}

Job job = new Job(conf, "word count");

job.setJarByClass(FindFriend.class);

job.setMapperClass(ChangeMapper.class);

job.setCombinerClass(FindReducer.class);

job.setReducerClass(FindReducer.class);

job.setOutputKeyClass(Text.class);

job.setOutputValueClass(Text.class);

for (int i = 0; i < otherArgs.length - 1; ++i) {

FileInputFormat.addInputPath(job, new Path(otherArgs[i]));

}

FileOutputFormat.setOutputPath(job,

new Path(otherArgs[otherArgs.length - 1]));

System.exit(job.waitForCompletion(true) ? 0 : 1);

}

/**

* 现在能理解了吗?

* 1/ 你是A 的朋友,那么朋友也有A ,是相互的,朋友是相互关系, 比如: A 有 B C ,那么B 也有A, C 也有A

* 2/ 如果A 有 B C , 并且 D 也有B C ,那么显而易见, BC他们有共同的朋友,就是AD啊,

* 原因是: B 也有A ,C也有, B有D,C也有D, BC的共同好友就是A D。恩恩

* 3/所以我们把一个人的共同好友作为对子,作为键,这样,有相同的value就到了一起了

* 4/然后我们就把value合并了。

* 5/最关键的是能把朋友的关系理顺。

*

*/

}
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: