2018-07-29期 MapReduce实现对字符串进行排序
2018-07-30 08:58
525 查看
package cn.sjq.mr.sort.number;
import java.io.IOException;
import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.junit.Test;
/**
* MapReduce字符串排序,字符串排序是按照数据字典顺序进行排序
* 这里所有的类采用匿名内部类实现
* @author songjq
*
*/
public class StringSort {
/**
* 对字符串进行排序Mapper类
* @author songjq
*
*/
static class StringSortMapper extends Mapper<LongWritable, Text, Text, NullWritable> {
private Text tkey = new Text();
@Override
protected void map(LongWritable k1, Text v1, Context context)
throws IOException, InterruptedException {
String line = v1.toString();
//分词 格式:Wait Events Statistics
String[] split = StringUtils.split(line," ");
for(int i=0;i<split.length;i++) {
tkey.set(split[i]);
context.write(tkey, NullWritable.get());
}
}
}
/**
* MapReduce提交job类
* 这里采用MapReduce默认的比较器进行字符串的升序排序
* 执行结果:
* Activity
Advisory
Buffer
Cache
Cache
Dictionary
Events
IO
Instance
...
* @throws Exception
*/
@Test
public void StringSortJob() throws Exception {
Job job = Job.getInstance(new Configuration());
job.setJarByClass(StringSort.class);
job.setMapperClass(StringSortMapper.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(NullWritable.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(NullWritable.class);
//采用MapReduce默认排序规则
//job.setSortComparatorClass(cls);
FileInputFormat.setInputPaths(job, new Path("D:\\test\\tmp\\sort\\Strings.data"));
FileOutputFormat.setOutputPath(job, new Path("D:\\test\\tmp\\sort\\out5"));
job.waitForCompletion(true);
}
/**
* 自定义StringSortMyComparator比较器,继承Text.Comparator重写compare方法,实现对字符串降序排序
* @author songjq
*
*/
static class StringSortMyComparator extends Text.Comparator{
//只需要在super前面加 - 即可实现降序排序
@Override
public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) {
return - super.compare(b1, s1, l1, b2, s2, l2);
}
}
/**
* MapReduce提交job类
* 这里采用自定义比较器StringSortMyComparator实现对字符串的降序排序
* 执行结果:
* Latch
Instance
IO
Events
Dictionary
Cache
Cache
Buffer
Advisory
Activity
...
* @throws Exception
*/
@Test
public void StringSortUseMyComparatorJob() throws Exception {
Job job = Job.getInstance(new Configuration());
job.setJarByClass(StringSort.class);
job.setMapperClass(StringSortMapper.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(NullWritable.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(NullWritable.class);
//采用自定义比较器实现对字符串降序排序
job.setSortComparatorClass(StringSortMyComparator.class);
FileInputFormat.setInputPaths(job, new Path("D:\\test\\tmp\\sort\\Strings.data"));
FileOutputFormat.setOutputPath(job, new Path("D:\\test\\tmp\\sort\\out6"));
job.waitForCompletion(true);
}
}
import java.io.IOException;
import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.junit.Test;
/**
* MapReduce字符串排序,字符串排序是按照数据字典顺序进行排序
* 这里所有的类采用匿名内部类实现
* @author songjq
*
*/
public class StringSort {
/**
* 对字符串进行排序Mapper类
* @author songjq
*
*/
static class StringSortMapper extends Mapper<LongWritable, Text, Text, NullWritable> {
private Text tkey = new Text();
@Override
protected void map(LongWritable k1, Text v1, Context context)
throws IOException, InterruptedException {
String line = v1.toString();
//分词 格式:Wait Events Statistics
String[] split = StringUtils.split(line," ");
for(int i=0;i<split.length;i++) {
tkey.set(split[i]);
context.write(tkey, NullWritable.get());
}
}
}
/**
* MapReduce提交job类
* 这里采用MapReduce默认的比较器进行字符串的升序排序
* 执行结果:
* Activity
Advisory
Buffer
Cache
Cache
Dictionary
Events
IO
Instance
...
* @throws Exception
*/
@Test
public void StringSortJob() throws Exception {
Job job = Job.getInstance(new Configuration());
job.setJarByClass(StringSort.class);
job.setMapperClass(StringSortMapper.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(NullWritable.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(NullWritable.class);
//采用MapReduce默认排序规则
//job.setSortComparatorClass(cls);
FileInputFormat.setInputPaths(job, new Path("D:\\test\\tmp\\sort\\Strings.data"));
FileOutputFormat.setOutputPath(job, new Path("D:\\test\\tmp\\sort\\out5"));
job.waitForCompletion(true);
}
/**
* 自定义StringSortMyComparator比较器,继承Text.Comparator重写compare方法,实现对字符串降序排序
* @author songjq
*
*/
static class StringSortMyComparator extends Text.Comparator{
//只需要在super前面加 - 即可实现降序排序
@Override
public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) {
return - super.compare(b1, s1, l1, b2, s2, l2);
}
}
/**
* MapReduce提交job类
* 这里采用自定义比较器StringSortMyComparator实现对字符串的降序排序
* 执行结果:
* Latch
Instance
IO
Events
Dictionary
Cache
Cache
Buffer
Advisory
Activity
...
* @throws Exception
*/
@Test
public void StringSortUseMyComparatorJob() throws Exception {
Job job = Job.getInstance(new Configuration());
job.setJarByClass(StringSort.class);
job.setMapperClass(StringSortMapper.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(NullWritable.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(NullWritable.class);
//采用自定义比较器实现对字符串降序排序
job.setSortComparatorClass(StringSortMyComparator.class);
FileInputFormat.setInputPaths(job, new Path("D:\\test\\tmp\\sort\\Strings.data"));
FileOutputFormat.setOutputPath(job, new Path("D:\\test\\tmp\\sort\\out6"));
job.waitForCompletion(true);
}
}
相关文章推荐
- mapreduce 利用InverseMapper.class对key,value进行 交换实现词频排序 .
- java字符串数组进行大小排序的简单实现
- 从主函数输入十个不等长的字符串,编写函数,对这些串进行排序,在主调函数中输出排好序的串,指针实现。
- 3.11 用栈实现对一个字符串按升序进行排序
- Java实现对字符串中的数值进行排序操作示例
- 对多个字符串进行排序,用Java语言实现,不能使用现有的类
- 【Java学习笔记】实现Comparator接口来进行字符串逆向排序
- TreeSet是可以对字符串进行排序 的, 因为字符串已经实现了Comparable接口。
- java实现对一个字符串中的数值进行从小到大的排序
- mapreduce 利用InverseMapper.class对key,value进行 交换实现词频排序
- Java技巧——实现Comparator接口来进行字符串逆向排序
- 一个用VB实现的对任意字符串进行排列并排序的函数
- mapreduce 利用InverseMapper.class对key,value进行 交换实现词频排序
- 对字符串中字符出现的次数进行排序
- C语言-对输入n个字符串进行排序(无导入string.h)
- Java实现对中文字符串的排序功能实例代码
- 【转】SortedSet实现过滤重复字符串并排序
- MapReduce实现排序功能
- 第8周项目2.2-4 试编写算法实现将字符串S中所有字符颠倒过来重新排序 void Trans(SqString *&s, char c1, char c2);
- 基于JavaScript实现Json数据根据某个字段进行排序