spdcoder/python-spark-tutorialPublic

forked fromjleetutorial/python-spark-tutorial

NotificationsYou must be signed in to change notification settings
Fork0
Star0

Commit83216e1

authored

Merge pull requestjleetutorial#7 from jleetutorial/pedro-changes-path

Added sys path to guarantee imports | Added SparkConf to all files

2 parents3ec564f +ac8e586 commit83216e1Copy full SHA for 83216e1

File tree

18 files changed

+145

-123

lines changed

advanced
- accumulator
  - StackOverFlowSurvey.py
  - StackOverFlowSurveyFollowUp.py
- broadcast
  - UkMakerSpaces.py
  - UkMakerSpacesWithoutBroadcast.py
pairRdd
- aggregation
  - combinebykey
    - AverageHousePriceSolution.py
  - reducebykey/housePrice
    - AverageHousePriceSolution.py
- filter
  - AirportsNotInUsaSolution.py
- groupbykey
  - AirportsByCountrySolution.py
- mapValues
  - AirportsUppercaseSolution.py
- sort
  - AverageHousePriceSolution.py
rdd
- airports
  - AirportsByLatitudeSolution.py
  - AirportsInUsaSolution.py
- count
  - CountExample.py
sparkSql

18 files changed

+145

-123

lines changed

`‎advanced/accumulator/StackOverFlowSurvey.py`

Lines changed: 14 additions & 15 deletions

Original file line number	Diff line number	Diff line change
`@@ -1,25 +1,24 @@`
`1`		`-frompysparkimportSparkContext`
	`1`	`+importsys`
	`2`	`+sys.path.insert(0,'.')`
	`3`	`+frompysparkimportSparkContext,SparkConf`
`2`	`4`	`fromcommons.UtilsimportUtils`
`3`	`5`
`4`		`-deffilterResponseFromCanada(response,total,missingSalaryMidPoint):`
`5`		`-splits=Utils.COMMA_DELIMITER.split(response)`
`6`		`-total.add(1)`
`7`		`-ifnotsplits[14]:`
`8`		`-missingSalaryMidPoint.add(1)`
`9`		`-returnsplits[2]=="Canada"`
`10`		`-`
`11`	`6`	`if__name__=="__main__":`
`12`		`-sc=SparkContext("local","StackOverFlowSurvey")`
`13`		`-sc.setLogLevel("ERROR")`
`14`		`-`
	`7`	`+conf=SparkConf().setAppName('StackOverFlowSurvey').setMaster("local[*]")`
	`8`	`+sc=SparkContext(conf=conf)`
`15`	`9`	`total=sc.accumulator(0)`
`16`	`10`	`missingSalaryMidPoint=sc.accumulator(0)`
`17`		`-`
`18`	`11`	`responseRDD=sc.textFile("in/2016-stack-overflow-survey-responses.csv")`
`19`	`12`
`20`		`-responseFromCanada=responseRDD.filter(lambdaresponse: \`
`21`		`-filterResponseFromCanada(response,total,missingSalaryMidPoint))`
	`13`	`+deffilterResponseFromCanada(response):`
	`14`	`+splits=Utils.COMMA_DELIMITER.split(response)`
	`15`	`+total.add(1)`
	`16`	`+ifnotsplits[14]:`
	`17`	`+missingSalaryMidPoint.add(1)`
	`18`	`+returnsplits[2]=="Canada"`
`22`	`19`
	`20`	`+responseFromCanada=responseRDD.filter(filterResponseFromCanada)`
`23`	`21`	`print("Count of responses from Canada: {}".format(responseFromCanada.count()))`
`24`	`22`	`print("Total count of responses: {}".format(total.value))`
`25`		`-print("Count of responses missing salary middle point: {}".format(missingSalaryMidPoint.value))`
	`23`	`+print("Count of responses missing salary middle point: {}" \`
	`24`	`+ .format(missingSalaryMidPoint.value))`

`‎advanced/accumulator/StackOverFlowSurveyFollowUp.py`

Lines changed: 13 additions & 14 deletions

Original file line number	Diff line number	Diff line change
`@@ -1,26 +1,25 @@`
`1`		`-frompysparkimportSparkContext`
	`1`	`+importsys`
	`2`	`+sys.path.insert(0,'.')`
	`3`	`+frompysparkimportSparkContext,SparkConf`
`2`	`4`	`fromcommons.UtilsimportUtils`
`3`	`5`
`4`		`-deffilterResponseFromCanada(response,total,missingSalaryMidPoint,processedBytes):`
`5`		`-processedBytes.add(len(response.encode('utf-8')))`
`6`		`-splits=Utils.COMMA_DELIMITER.split(response)`
`7`		`-total.add(1)`
`8`		`-ifnotsplits[14]:`
`9`		`-missingSalaryMidPoint.add(1)`
`10`		`-returnsplits[2]=="Canada"`
`11`		`-`
`12`	`6`	`if__name__=="__main__":`
`13`		`-sc=SparkContext("local","StackOverFlowSurvey")`
`14`		`-sc.setLogLevel("ERROR")`
	`7`	`+conf=SparkConf().setAppName('StackOverFlowSurvey').setMaster("local[*]")`
	`8`	`+sc=SparkContext(conf=conf)`
`15`	`9`
`16`	`10`	`total=sc.accumulator(0)`
`17`	`11`	`missingSalaryMidPoint=sc.accumulator(0)`
`18`	`12`	`processedBytes=sc.accumulator(0)`
`19`		`-`
`20`	`13`	`responseRDD=sc.textFile("in/2016-stack-overflow-survey-responses.csv")`
`21`	`14`
`22`		`-responseFromCanada=responseRDD.filter(lambdaresponse: \`
`23`		`-filterResponseFromCanada(response,total,missingSalaryMidPoint,processedBytes))`
	`15`	`+deffilterResponseFromCanada(response):`
	`16`	`+processedBytes.add(len(response.encode('utf-8')))`
	`17`	`+splits=Utils.COMMA_DELIMITER.split(response)`
	`18`	`+total.add(1)`
	`19`	`+ifnotsplits[14]:`
	`20`	`+missingSalaryMidPoint.add(1)`
	`21`	`+returnsplits[2]=="Canada"`
	`22`	`+responseFromCanada=responseRDD.filter(filterResponseFromCanada)`
`24`	`23`
`25`	`24`	`print("Count of responses from Canada: {}".format(responseFromCanada.count()))`
`26`	`25`	`print("Number of bytes processed: {}".format(processedBytes.value))`

`‎advanced/broadcast/UkMakerSpaces.py`

Lines changed: 10 additions & 8 deletions

Original file line number	Diff line number	Diff line change
`@@ -1,19 +1,21 @@`
`1`		`-frompysparkimportSparkContext`
	`1`	`+importsys`
	`2`	`+sys.path.insert(0,'.')`
	`3`	`+frompysparkimportSparkContext,SparkConf`
`2`	`4`	`fromcommons.UtilsimportUtils`
`3`	`5`
`4`		`-defgetPostPrefix(line:str):`
`5`		`-splits=Utils.COMMA_DELIMITER.split(line)`
`6`		`-postcode=splits[4]`
`7`		`-returnNoneifnotpostcodeelsepostcode.split(" ")[0]`
`8`		`-`
`9`	`6`	`defloadPostCodeMap():`
`10`	`7`	`lines=open("in/uk-postcode.csv","r").read().split("\n")`
`11`	`8`	`splitsForLines= [Utils.COMMA_DELIMITER.split(line)forlineinlinesifline!=""]`
`12`	`9`	`return {splits[0]:splits[7]forsplitsinsplitsForLines}`
`13`	`10`
	`11`	`+defgetPostPrefix(line:str):`
	`12`	`+splits=Utils.COMMA_DELIMITER.split(line)`
	`13`	`+postcode=splits[4]`
	`14`	`+returnNoneifnotpostcodeelsepostcode.split(" ")[0]`
	`15`	`+`
`14`	`16`	`if__name__=="__main__":`
`15`		`-sc=SparkContext("local","UkMakerSpaces")`
`16`		`-sc.setLogLevel("ERROR")`
	`17`	`+conf=SparkConf().setAppName('UkMakerSpaces').setMaster("local[*]")`
	`18`	`+sc=SparkContext(conf=conf)`
`17`	`19`
`18`	`20`	`postCodeMap=sc.broadcast(loadPostCodeMap())`
`19`	`21`

`‎advanced/broadcast/UkMakerSpacesWithoutBroadcast.py`

Lines changed: 12 additions & 10 deletions

Original file line number	Diff line number	Diff line change
`@@ -1,26 +1,28 @@`
`1`		`-frompysparkimportSparkContext`
	`1`	`+importsys`
	`2`	`+sys.path.insert(0,'.')`
	`3`	`+frompysparkimportSparkContext,SparkConf`
`2`	`4`	`fromcommons.UtilsimportUtils`
`3`	`5`
`4`		`-defgetPostPrefixes(line:str):`
`5`		`-postcode=Utils.COMMA_DELIMITER.split(line)[4]`
`6`		`-cleanedPostCode=postcode.replace("\\s+","")`
`7`		`-return [cleanedPostCode[0:i]foriinrange(0,len(cleanedPostCode)+1)]`
`8`		`-`
`9`	`6`	`defloadPostCodeMap():`
`10`	`7`	`lines=open("in/uk-postcode.csv","r").read().split("\n")`
`11`	`8`	`splitsForLines= [Utils.COMMA_DELIMITER.split(line)forlineinlinesifline!=""]`
`12`	`9`	`return {splits[0]:splits[7]forsplitsinsplitsForLines}`
`13`	`10`
	`11`	`+defgetPostPrefix(line:str):`
	`12`	`+splits=Utils.COMMA_DELIMITER.split(line)`
	`13`	`+postcode=splits[4]`
	`14`	`+returnNoneifnotpostcodeelsepostcode.split(" ")[0]`
	`15`	`+`
`14`	`16`	`if__name__=="__main__":`
`15`		`-sc=SparkContext("local","UkMakerSpaces")`
`16`		`-sc.setLogLevel("ERROR")`
	`17`	`+conf=SparkConf().setAppName('UkMakerSpaces').setMaster("local[*]")`
	`18`	`+sc=SparkContext(conf=conf)`
`17`	`19`	`postCodeMap=loadPostCodeMap()`
`18`	`20`	`makerSpaceRdd=sc.textFile("in/uk-makerspaces-identifiable-data.csv")`
`19`	`21`
`20`	`22`	`regions=makerSpaceRdd \`
`21`	`23`	`.filter(lambdaline:Utils.COMMA_DELIMITER.split(line)[0]!="Timestamp") \`
`22`		`- .map(lambdaline:next((postCodeMap[prefix]forprefixingetPostPrefixes(line) \`
`23`		`-ifprefixinpostCodeMap),"Unknow"))`
	`24`	`+ .map(lambdaline:postCodeMap[getPostPrefix(line)] \`
	`25`	`+ifgetPostPrefix(line)inpostCodeMapelse"Unknow")`
`24`	`26`
`25`	`27`	`forregion,countinregions.countByValue().items():`
`26`	`28`	`print("{} : {}".format(region,count))`

`‎pairRdd/aggregation/combinebykey/AverageHousePriceSolution.py`

Lines changed: 3 additions & 4 deletions

Original file line number	Diff line number	Diff line change
`@@ -1,9 +1,8 @@`
`1`		`-frompysparkimportSparkContext`
	`1`	`+frompysparkimportSparkContext,SparkConf`
`2`	`2`
`3`	`3`	`if__name__=="__main__":`
`4`		`-`
`5`		`-sc=SparkContext("local","AverageHousePrice")`
`6`		`-sc.setLogLevel("ERROR")`
	`4`	`+conf=SparkConf().setAppName("AverageHousePrice").setMaster("local")`
	`5`	`+sc=SparkContext(conf=conf)`
`7`	`6`
`8`	`7`	`lines=sc.textFile("in/RealEstate.csv")`
`9`	`8`	`cleanedLines=lines.filter(lambdaline:"Bedrooms"notinline)`

`‎pairRdd/aggregation/reducebykey/housePrice/AverageHousePriceSolution.py`

Lines changed: 11 additions & 9 deletions

Original file line number	Diff line number	Diff line change
`@@ -1,24 +1,26 @@`
`1`		`-frompysparkimportSparkContext`
	`1`	`+importsys`
	`2`	`+sys.path.insert(0,'.')`
	`3`	`+frompysparkimportSparkContext,SparkConf`
	`4`	`+frompairRdd.aggregation.reducebykey.housePrice.AvgCountimportAvgCount`
`2`	`5`
`3`	`6`	`if__name__=="__main__":`
`4`		`-`
`5`		`-sc=SparkContext("local","avgHousePrice")`
`6`		`-sc.setLogLevel("ERROR")`
	`7`	`+conf=SparkConf().setAppName("avgHousePrice").setMaster("local[3]")`
	`8`	`+sc=SparkContext(conf=conf)`
`7`	`9`
`8`	`10`	`lines=sc.textFile("in/RealEstate.csv")`
`9`	`11`	`cleanedLines=lines.filter(lambdaline:"Bedrooms"notinline)`
`10`	`12`
`11`	`13`	`housePricePairRdd=cleanedLines.map(lambdaline: \`
`12`		`- (line.split(",")[3], (1,float(line.split(",")[2]))))`
	`14`	`+ (line.split(",")[3],AvgCount(1,float(line.split(",")[2]))))`
`13`	`15`
`14`	`16`	`housePriceTotal=housePricePairRdd \`
`15`		`- .reduceByKey(lambdax,y:(x[0]+y[0],x[1]+y[1]))`
	`17`	`+ .reduceByKey(lambdax,y:AvgCount(x.count+y.count,x.total+y.total))`
`16`	`18`
`17`	`19`	`print("housePriceTotal: ")`
`18`		`-forbedroom,totalinhousePriceTotal.collect():`
`19`		`-print("{} :{}".format(bedroom,total))`
	`20`	`+forbedroom,avgCountinhousePriceTotal.collect():`
	`21`	`+print("{} :({}, {})".format(bedroom,avgCount.count,avgCount.total))`
`20`	`22`
`21`		`-housePriceAvg=housePriceTotal.mapValues(lambdaavgCount:avgCount[1]/avgCount[0])`
	`23`	`+housePriceAvg=housePriceTotal.mapValues(lambdaavgCount:avgCount.total/avgCount.count)`
`22`	`24`	`print("\nhousePriceAvg: ")`
`23`	`25`	`forbedroom,avginhousePriceAvg.collect():`
`24`	`26`	`print("{} : {}".format(bedroom,avg))`

`‎pairRdd/filter/AirportsNotInUsaSolution.py`

Lines changed: 5 additions & 3 deletions

Original file line number	Diff line number	Diff line change
`@@ -1,10 +1,12 @@`
`1`		`-frompysparkimportSparkContext`
	`1`	`+importsys`
	`2`	`+sys.path.insert(0,'.')`
	`3`	`+frompysparkimportSparkContext,SparkConf`
`2`	`4`	`fromcommons.UtilsimportUtils`
`3`	`5`
`4`	`6`	`if__name__=="__main__":`
`5`	`7`
`6`		`-sc=SparkContext("local","airports")`
`7`		`-sc.setLogLevel("ERROR")`
	`8`	`+conf=SparkConf().setAppName("airports").setMaster("local[*]")`
	`9`	`+sc=SparkContext(conf=conf)`
`8`	`10`
`9`	`11`	`airportsRDD=sc.textFile("in/airports.text")`
`10`	`12`

`‎pairRdd/groupbykey/AirportsByCountrySolution.py`

Lines changed: 6 additions & 4 deletions

Original file line number	Diff line number	Diff line change
`@@ -1,10 +1,12 @@`
`1`		`-frompysparkimportSparkContext`
	`1`	`+importsys`
	`2`	`+sys.path.insert(0,'.')`
	`3`	`+frompysparkimportSparkContext,SparkConf`
`2`	`4`	`fromcommons.UtilsimportUtils`
`3`	`5`
`4`	`6`	`if__name__=="__main__":`
`5`	`7`
`6`		`-sc=SparkContext("local","airports")`
`7`		`-sc.setLogLevel("ERROR")`
	`8`	`+conf=SparkConf().setAppName("airports").setMaster("local[*]")`
	`9`	`+sc=SparkContext(conf=conf)`
`8`	`10`
`9`	`11`	`lines=sc.textFile("in/airports.text")`
`10`	`12`
`@@ -15,4 +17,4 @@`
`15`	`17`	`airportsByCountry=countryAndAirportNameAndPair.groupByKey()`
`16`	`18`
`17`	`19`	`forcountry,airportNameinairportsByCountry.collectAsMap().items():`
`18`		`-print("{}: {}".format(country,list(airportName)))`
	`20`	`+print("{}: {}".format(country,list(airportName)))`

`‎pairRdd/mapValues/AirportsUppercaseSolution.py`

Lines changed: 5 additions & 4 deletions

Original file line number	Diff line number	Diff line change
`@@ -1,10 +1,11 @@`
`1`		`-frompysparkimportSparkContext`
	`1`	`+importsys`
	`2`	`+sys.path.insert(0,'.')`
	`3`	`+frompysparkimportSparkContext,SparkConf`
`2`	`4`	`fromcommons.UtilsimportUtils`
`3`	`5`
`4`	`6`	`if__name__=="__main__":`
`5`		`-`
`6`		`-sc=SparkContext("local","airports")`
`7`		`-sc.setLogLevel("ERROR")`
	`7`	`+conf=SparkConf().setAppName("airports").setMaster("local[*]")`
	`8`	`+sc=SparkContext(conf=conf)`
`8`	`9`
`9`	`10`	`airportsRDD=sc.textFile("in/airports.text")`
`10`	`11`

`‎pairRdd/sort/AverageHousePriceSolution.py`

Lines changed: 5 additions & 5 deletions

Original file line number	Diff line number	Diff line change
`@@ -1,11 +1,11 @@`
	`1`	`+importsys`
	`2`	`+sys.path.insert(0,'.')`
`1`	`3`	`frompairRdd.aggregation.reducebykey.housePrice.AvgCountimportAvgCount`
`2`		`-frompysparkimportSparkContext`
`3`		`-`
	`4`	`+frompysparkimportSparkContext,SparkConf`
`4`	`5`
`5`	`6`	`if__name__=="__main__":`
`6`		`-`
`7`		`-sc=SparkContext("local","averageHousePriceSolution")`
`8`		`-sc.setLogLevel("ERROR")`
	`7`	`+conf=SparkConf().setAppName("averageHousePriceSolution").setMaster("local[*]")`
	`8`	`+sc=SparkContext(conf=conf)`
`9`	`9`
`10`	`10`	`lines=sc.textFile("in/RealEstate.csv")`
`11`	`11`	`cleanedLines=lines.filter(lambdaline:"Bedrooms"notinline)`

`‎rdd/airports/AirportsByLatitudeSolution.py`

Lines changed: 5 additions & 2 deletions

Original file line number	Diff line number	Diff line change
`@@ -1,12 +1,15 @@`
`1`		`-frompysparkimportSparkContext`
	`1`	`+importsys`
	`2`	`+sys.path.insert(0,'.')`
	`3`	`+frompysparkimportSparkContext,SparkConf`
`2`	`4`	`fromcommons.UtilsimportUtils`
`3`	`5`
`4`	`6`	`defsplitComma(line:str):`
`5`	`7`	`splits=Utils.COMMA_DELIMITER.split(line)`
`6`	`8`	`return"{}, {}".format(splits[1],splits[6])`
`7`	`9`
`8`	`10`	`if__name__=="__main__":`
`9`		`-sc=SparkContext("local","airports")`
	`11`	`+conf=SparkConf().setAppName("airports").setMaster("local[*]")`
	`12`	`+sc=SparkContext(conf=conf)`
`10`	`13`
`11`	`14`	`airports=sc.textFile("in/airports.text")`
`12`	`15`

`‎rdd/airports/AirportsInUsaSolution.py`

Lines changed: 5 additions & 2 deletions

Original file line number	Diff line number	Diff line change
`@@ -1,12 +1,15 @@`
`1`		`-frompysparkimportSparkContext`
	`1`	`+importsys`
	`2`	`+sys.path.insert(0,'.')`
	`3`	`+frompysparkimportSparkContext,SparkConf`
`2`	`4`	`fromcommons.UtilsimportUtils`
`3`	`5`
`4`	`6`	`defsplitComma(line:str):`
`5`	`7`	`splits=Utils.COMMA_DELIMITER.split(line)`
`6`	`8`	`return"{}, {}".format(splits[1],splits[2])`
`7`	`9`
`8`	`10`	`if__name__=="__main__":`
`9`		`-sc=SparkContext("local","airports")`
	`11`	`+conf=SparkConf().setAppName("airports").setMaster("local[*]")`
	`12`	`+sc=SparkContext(conf=conf)`
`10`	`13`
`11`	`14`	`airports=sc.textFile("in/airports.text")`
`12`	`15`	`airportsInUSA=airports.filter(lambdaline :Utils.COMMA_DELIMITER.split(line)[3]=="\"United States\"")`

`‎rdd/count/CountExample.py`

Lines changed: 3 additions & 0 deletions

Original file line number	Diff line number	Diff line change
`@@ -3,9 +3,12 @@`
`3`	`3`	`if__name__=="__main__":`
`4`	`4`	`conf=SparkConf().setAppName("count").setMaster("local[*]")`
`5`	`5`	`sc=SparkContext(conf=conf)`
	`6`	`+`
`6`	`7`	`inputWords= ["spark","hadoop","spark","hive","pig","cassandra","hadoop"]`
	`8`	`+`
`7`	`9`	`wordRdd=sc.parallelize(inputWords)`
`8`	`10`	`print("Count: {}".format(wordRdd.count()))`
	`11`	`+`
`9`	`12`	`worldCountByValue=wordRdd.countByValue()`
`10`	`13`	`print("CountByValue: ")`
`11`	`14`	`forword,countinworldCountByValue.items():`

`‎sparkSql/HousePriceProblem.py`

Lines changed: 7 additions & 6 deletions

Original file line number	Diff line number	Diff line change
`@@ -4,20 +4,21 @@`
`4`	`4`	`Create a Spark program to read the house data from in/RealEstate.csv,`
`5`	`5`	`group by location, aggregate the average price per SQ Ft and sort by average price per SQ Ft.`
`6`	`6`
`7`		`- The houses dataset contains a collection of recent real estate listings in San Luis Obispo county and`
`8`		`- around it.`
	`7`	`+ The houses dataset contains a collection of recent real estate listings in`
	`8`	`+San Luis Obispo county andaround it.`
`9`	`9`
`10`	`10`	`The dataset contains the following fields:`
`11`	`11`	`1. MLS: Multiple listing service number for the house (unique ID).`
`12`		`- 2. Location: city/town where the house is located. Most locations are in San Luis Obispo county and`
`13`		`- northern Santa Barbara county (Santa MariaOrcutt, Lompoc, Guadelupe, Los Alamos), but there`
`14`		`- some out of area locations as well.`
	`12`	`+ 2. Location: city/town where the house is located. Most locations are in`
	`13`	`+ San Luis Obispo county andnorthern Santa Barbara county (Santa MariaOrcutt, Lompoc,`
	`14`	`+ Guadelupe, Los Alamos), but theresome out of area locations as well.`
`15`	`15`	`3. Price: the most recent listing price of the house (in dollars).`
`16`	`16`	`4. Bedrooms: number of bedrooms.`
`17`	`17`	`5. Bathrooms: number of bathrooms.`
`18`	`18`	`6. Size: size of the house in square feet.`
`19`	`19`	`7. Price/SQ.ft: price of the house per square foot.`
`20`		`- 8. Status: type of sale. Thee types are represented in the dataset: Short Sale, Foreclosure and Regular.`
	`20`	`+ 8. Status: type of sale. Thee types are represented in the dataset: Short Sale,`
	`21`	`+ Foreclosure and Regular.`
`21`	`22`
`22`	`23`	`Each field is comma separated.`
`23`	`24`

`‎sparkSql/HousePriceSolution.py`

Lines changed: 2 additions & 2 deletions

Original file line number	Diff line number	Diff line change
`@@ -4,8 +4,8 @@`
`4`	`4`
`5`	`5`	`if__name__=="__main__":`
`6`	`6`
`7`		`-session=SparkSession.builder.appName("HousePriceSolution").master("local").getOrCreate()`
`8`		`-session.sparkContext.setLogLevel("ERROR")`
	`7`	`+session=SparkSession.builder.appName("HousePriceSolution").master("local[*]").getOrCreate()`
	`8`	`+`
`9`	`9`	`realEstate=session.read \`
`10`	`10`	`.option("header","true") \`
`11`	`11`	`.option("inferSchema",value=True) \`

0 commit comments

Comments

(0)

Movatterモバイル変換

Navigation Menu

Search code, repositories, users, issues, pull requests...

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Commit83216e1

File tree

18 files changed

18 files changed

`‎advanced/accumulator/StackOverFlowSurvey.py`

`‎advanced/accumulator/StackOverFlowSurveyFollowUp.py`

`‎advanced/broadcast/UkMakerSpaces.py`

`‎advanced/broadcast/UkMakerSpacesWithoutBroadcast.py`

`‎pairRdd/aggregation/combinebykey/AverageHousePriceSolution.py`

`‎pairRdd/aggregation/reducebykey/housePrice/AverageHousePriceSolution.py`

`‎pairRdd/filter/AirportsNotInUsaSolution.py`

`‎pairRdd/groupbykey/AirportsByCountrySolution.py`

`‎pairRdd/mapValues/AirportsUppercaseSolution.py`

`‎pairRdd/sort/AverageHousePriceSolution.py`

`‎rdd/airports/AirportsByLatitudeSolution.py`

`‎rdd/airports/AirportsInUsaSolution.py`

`‎rdd/count/CountExample.py`

`‎sparkSql/HousePriceProblem.py`

`‎sparkSql/HousePriceSolution.py`

0 commit comments