Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit1b5926d

Browse files
author
James Lee
committed
improve several pair RDD examples
1 parent87fe8ce commit1b5926d

File tree

6 files changed

+27
-14
lines changed

6 files changed

+27
-14
lines changed

‎src/main/java/com/sparkTutorial/pairRdd/filter/AirportsNotInUsaSolution.java

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
packagecom.sparkTutorial.pairRdd.filter;
22

3+
importcom.sparkTutorial.rdd.commons.Utils;
34
importorg.apache.spark.SparkConf;
45
importorg.apache.spark.api.java.JavaPairRDD;
56
importorg.apache.spark.api.java.JavaRDD;
@@ -25,7 +26,7 @@ public static void main(String[] args) throws Exception {
2526
}
2627

2728
privatestaticPairFunction<String,String,String>getAirportNameAndCountryNamePair() {
28-
return (PairFunction<String,String,String>)line ->newTuple2<>(line.split(",")[1],
29-
line.split(",")[3]);
29+
return (PairFunction<String,String,String>)line ->newTuple2<>(line.split(Utils.COMMA_DELIMITER)[1],
30+
line.split(Utils.COMMA_DELIMITER)[3]);
3031
}
3132
}

‎src/main/java/com/sparkTutorial/pairRdd/groupbykey/AirportsProblem.javarenamed to‎src/main/java/com/sparkTutorial/pairRdd/groupbykey/AirportsByCountryProblem.java

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,15 @@
11
packagecom.sparkTutorial.pairRdd.groupbykey;
22

3-
publicclassAirportsProblem {
3+
publicclassAirportsByCountryProblem {
44

55
publicstaticvoidmain(String[]args)throwsException {
66

7-
/* TODO: Create a Spark program to read the airport data from in/airports.text, output the the list of the names of the airports located in each country.
7+
/* Create a Spark program to read the airport data from in/airports.text,
8+
output the the list of the names of the airports located in each country.
89
910
Each row of the input file contains the following columns:
10-
11-
Airport ID, Name of airport, Main city served by airport, Country where airport is located, IATA/FAA code,ICAO Code, Latitude, Longitude, Altitude, Timezone, DST, Timezone in Olson format
11+
Airport ID, Name of airport, Main city served by airport, Country where airport is located, IATA/FAA code,
12+
ICAO Code, Latitude, Longitude, Altitude, Timezone, DST, Timezone in Olson format
1213
1314
Sample output:
1415

‎src/main/java/com/sparkTutorial/pairRdd/groupbykey/AirportsSolution.javarenamed to‎src/main/java/com/sparkTutorial/pairRdd/groupbykey/AirportsByCountrySolution.java

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,14 @@
11
packagecom.sparkTutorial.pairRdd.groupbykey;
22

3+
importcom.sparkTutorial.rdd.commons.Utils;
34
importorg.apache.spark.SparkConf;
45
importorg.apache.spark.api.java.JavaPairRDD;
56
importorg.apache.spark.api.java.JavaRDD;
67
importorg.apache.spark.api.java.JavaSparkContext;
78
importorg.apache.spark.api.java.function.PairFunction;
89
importscala.Tuple2;
910

10-
publicclassAirportsSolution {
11+
publicclassAirportsByCountrySolution {
1112

1213
publicstaticvoidmain(String[]args)throwsException {
1314

@@ -17,7 +18,10 @@ public static void main(String[] args) throws Exception {
1718

1819
JavaRDD<String>lines =sc.textFile("in/airports.text");
1920

20-
JavaPairRDD<String,String>CountryAndAirportNameAndPair =lines.mapToPair((PairFunction<String,String,String>)airport ->newTuple2<>(airport.split(",")[3],airport.split(",")[1]));
21+
JavaPairRDD<String,String>CountryAndAirportNameAndPair =
22+
lines.mapToPair((PairFunction<String,String,String>)airport ->
23+
newTuple2<>(airport.split(Utils.COMMA_DELIMITER)[3],
24+
airport.split(Utils.COMMA_DELIMITER)[1]));
2125

2226
JavaPairRDD<String,Iterable<String>>AirportsByCountry =CountryAndAirportNameAndPair.groupByKey();
2327

‎src/main/java/com/sparkTutorial/pairRdd/mapValues/AirportsUppercaseSolution.java

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
packagecom.sparkTutorial.pairRdd.mapValues;
22

3+
importcom.sparkTutorial.rdd.commons.Utils;
34
importorg.apache.spark.SparkConf;
45
importorg.apache.spark.api.java.JavaPairRDD;
56
importorg.apache.spark.api.java.JavaRDD;
@@ -25,7 +26,7 @@ public static void main(String[] args) throws Exception {
2526
}
2627

2728
privatestaticPairFunction<String,String,String>getAirportNameAndCountryNamePair() {
28-
return (PairFunction<String,String,String>)line ->newTuple2<>(line.split(",")[1],
29-
line.split(",")[3]);
29+
return (PairFunction<String,String,String>)line ->newTuple2<>(line.split(Utils.COMMA_DELIMITER)[1],
30+
line.split(Utils.COMMA_DELIMITER)[3]);
3031
}
3132
}

‎src/main/java/com/sparkTutorial/pairRdd/sort/sortbykey/SortedWorldCountProblem.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,8 @@
33

44
publicclassSortedWorldCountProblem {
55

6-
/* TODO: Create a Spark program to read the an article from in/word_count.text, output the number of occurrence of each word in descending order.
6+
/* Create a Spark program to read the an article from in/word_count.text,
7+
output the number of occurrence of each word in descending order.
78
89
Sample output:
910

‎src/main/java/com/sparkTutorial/pairRdd/sort/sortbykey/SortedWorldCountSolution.java

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,15 +24,20 @@ public static void main(String[] args) throws Exception {
2424
JavaRDD<String>lines =sc.textFile("in/word_count.text");
2525
JavaRDD<String>wordRdd =lines.flatMap(line ->Arrays.asList(line.split(" ")).iterator());
2626

27-
JavaPairRDD<String,Integer>wordPairRdd =wordRdd.mapToPair((PairFunction<String,String,Integer>)word ->newTuple2<>(word,1));
27+
JavaPairRDD<String,Integer>wordPairRdd =wordRdd.mapToPair(
28+
(PairFunction<String,String,Integer>)word ->newTuple2<>(word,1));
2829

2930
JavaPairRDD<String,Integer>wordToCountPairs =wordPairRdd.reduceByKey((Function2<Integer,Integer,Integer>) (x,y) ->x +y);
3031

31-
JavaPairRDD<Integer,String>countToWordParis =wordToCountPairs.mapToPair((PairFunction<Tuple2<String,Integer>,Integer,String>)wordToCount ->newTuple2<>(wordToCount._2(),wordToCount._1()));
32+
JavaPairRDD<Integer,String>countToWordParis =wordToCountPairs.mapToPair(
33+
(PairFunction<Tuple2<String,Integer>,Integer,String>)wordToCount ->newTuple2<>(wordToCount._2(),
34+
wordToCount._1()));
3235

3336
JavaPairRDD<Integer,String>sortedCountToWordParis =countToWordParis.sortByKey(false);
3437

35-
JavaPairRDD<String,Integer>sortedWordToCountPairs =sortedCountToWordParis.mapToPair((PairFunction<Tuple2<Integer,String>,String,Integer>)countToWord ->newTuple2<>(countToWord._2(),countToWord._1()));
38+
JavaPairRDD<String,Integer>sortedWordToCountPairs =sortedCountToWordParis
39+
.mapToPair((PairFunction<Tuple2<Integer,String>,String,Integer>)countToWord ->newTuple2<>(countToWord._2(),
40+
countToWord._1()));
3641

3742
for (Tuple2<String,Integer>wordToCount :sortedWordToCountPairs.collect()) {
3843
System.out.println(wordToCount._1() +" : " +wordToCount._2());

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp