@@ -5,8 +5,8 @@ import org.apache.spark.sql.SparkSession
55
66object TypedDataset {
77
8- val AGE_MIDPOINT = " ageMidpoint "
9- val SALARY_MIDPOINT = " salaryMidPoint "
8+ val AGE_MIDPOINT = " age_midpoint "
9+ val SALARY_MIDPOINT = " salary_midpoint "
1010val SALARY_MIDPOINT_BUCKET = " salaryMidpointBucket"
1111
1212def main (args :Array [String ]) {
@@ -24,9 +24,9 @@ object TypedDataset {
2424
2525val responseWithRenamedColumns = responseWithSelectedColumns
2626 .withColumn(" country" , responses.col(" country" ))
27- .withColumn(AGE_MIDPOINT , responses.col(" age_midpoint " ).cast(" integer" ))
27+ .withColumn(AGE_MIDPOINT , responses.col(AGE_MIDPOINT ).cast(" integer" ))
2828 .withColumn(" occupation" , responses.col(" occupation" ))
29- .withColumn(SALARY_MIDPOINT , responses.col(" salary_midpoint " ).cast(" integer" ))
29+ .withColumn(SALARY_MIDPOINT , responses.col(SALARY_MIDPOINT ).cast(" integer" ))
3030
3131import session .implicits ._
3232val typedDataset = responseWithRenamedColumns.as[Response ]
@@ -44,16 +44,16 @@ object TypedDataset {
4444 typedDataset.groupBy(typedDataset.col(" occupation" )).count().show()
4545
4646System .out.println(" === Print responses with average mid age less than 20 ===" )
47- typedDataset.filter(response=> response.ageMidPoint .isDefined&& response.ageMidPoint .get< 20 ).show()
47+ typedDataset.filter(response=> response.age_midpoint .isDefined&& response.age_midpoint .get< 20 ).show()
4848
4949System .out.println(" === Print the result by salary middle point in descending order ===" )
5050 typedDataset.orderBy(typedDataset.col(SALARY_MIDPOINT ).desc).show()
5151
5252System .out.println(" === Group by country and aggregate by average salary middle point ===" )
53- typedDataset.filter(response=> response.salaryMidPoint .isDefined).groupBy(" country" ).avg(SALARY_MIDPOINT ).show()
53+ typedDataset.filter(response=> response.salary_midpoint .isDefined).groupBy(" country" ).avg(SALARY_MIDPOINT ).show()
5454
5555System .out.println(" === Group by salary bucket ===" )
56- typedDataset.map(response=> response.salaryMidPoint .map(point=> Math .round(point/ 20000 )* 20000 ).orElse(None ))
56+ typedDataset.map(response=> response.salary_midpoint .map(point=> Math .round(point/ 20000 )* 20000 ).orElse(None ))
5757 .withColumnRenamed(" value" ,SALARY_MIDPOINT_BUCKET )
5858 .groupBy(SALARY_MIDPOINT_BUCKET )
5959 .count()