Spark排序算法!! 使用java开发 自定义key值 进行二次排序 深入解析!

2016-02-28 20:36 701 查看


2 3

4 1

3 2

4 3

8 7

2 1


2 1

2 3

3 2

4 1

4 3

8 7

【源代码文件】SecondaySortApp.java SecondarySortKey.java

classSecondarySort :


JavaRDD<String> lines =sc.textFile("G://IMFBigDataSpark2016//tesdata//helloSpark.txt");


JavaPairRDD<SecondarySortKey, String>pairs = lines.mapToPair(new PairFunction<String, SecondarySortKey,String>()


JavaPairRDD<SecondarySortKey,String> sorted = pairs.sortByKey();


JavaRDD<String> SecondaySorted=sorted.map(new

public Stringcall(Tuple2<SecondarySortKey, String> sortedContent) throws Exception


SecondaySorted.foreach(newVoidFunction<String>() {






package com.dt.spark.SparkApps.cores;

import org.apache.spark.SparkConf;

import org.apache.spark.api.java.JavaPairRDD;

import org.apache.spark.api.java.JavaRDD;

import org.apache.spark.api.java.JavaSparkContext;

import org.apache.spark.api.java.function.Function;

import org.apache.spark.api.java.function.PairFunction;

import org.apache.spark.api.java.function.VoidFunction;

import scala.Tuple2;

public class SecondaySortApp {

public static void main(String[] args) {

SparkConf conf = new SparkConf().setAppName("SecondaySortApp").setMaster("local");

JavaSparkContext sc = new JavaSparkContext(conf); //其底层实际上就是Scala的SparkContext

JavaRDD<String> lines = sc.textFile("G://IMFBigDataSpark2016//tesdata//helloSpark.txt");

JavaPairRDD<SecondarySortKey, String> pairs = lines.mapToPair(new PairFunction<String, SecondarySortKey, String>() {

private static final long serialVersionUID = 1L;


public Tuple2<SecondarySortKey, String> call(String line) throws Exception {

String[] splited = line.split(" ");

SecondarySortKey key =new SecondarySortKey(Integer.valueOf(splited[0]),Integer.valueOf(splited[1]));

return new Tuple2<SecondarySortKey,String>(key,line);



JavaPairRDD<SecondarySortKey, String> sorted = pairs.sortByKey();

JavaRDD<String> SecondaySorted=sorted.map(new Function<Tuple2<SecondarySortKey,String>, String>() {




private static final long serialVersionUID = 1L;


public String call(Tuple2<SecondarySortKey, String> sortedContent) throws Exception {

// TODO Auto-generated method stub

System.out.println("sortedContent._1 "+(sortedContent._1).toString());

System.out.println("sortedContent._2 "+sortedContent._2);

return sortedContent._2;



SecondaySorted.foreach(new VoidFunction<String>() {


public void call(String sorted) throws Exception {







sortedContent._2 2 1

2 1


sortedContent._2 2 3

2 3


sortedContent._2 3 2

3 2


sortedContent._2 4 1

4 1


sortedContent._2 4 3

4 3


sortedContent._2 8 7

8 7

