Commitf940c1a

authored

Update TypedDataset.scala

1 parent0838a86 commitf940c1aCopy full SHA for f940c1a

File tree

1 file changed

-7

lines changed

src/main/scala/com/sparkTutorial/sparkSql
- TypedDataset.scala

1 file changed

-7

lines changed

`‎src/main/scala/com/sparkTutorial/sparkSql/TypedDataset.scala‎`

Lines changed: 7 additions & 7 deletions

Original file line number	Diff line number	Diff line change
`@@ -5,8 +5,8 @@ import org.apache.spark.sql.SparkSession`
`5`	`5`
`6`	`6`	`objectTypedDataset {`
`7`	`7`
`8`		`-valAGE_MIDPOINT="ageMidpoint"`
`9`		`-valSALARY_MIDPOINT="salaryMidPoint"`
	`8`	`+valAGE_MIDPOINT="age_midpoint"`
	`9`	`+valSALARY_MIDPOINT="salary_midpoint"`
`10`	`10`	`valSALARY_MIDPOINT_BUCKET="salaryMidpointBucket"`
`11`	`11`
`12`	`12`	`defmain(args:Array[String]) {`
`@@ -24,9 +24,9 @@ object TypedDataset {`
`24`	`24`
`25`	`25`	`valresponseWithRenamedColumns= responseWithSelectedColumns`
`26`	`26`	`.withColumn("country", responses.col("country"))`
`27`		`- .withColumn(AGE_MIDPOINT, responses.col("age_midpoint").cast("integer"))`
	`27`	`+ .withColumn(AGE_MIDPOINT, responses.col(AGE_MIDPOINT).cast("integer"))`
`28`	`28`	`.withColumn("occupation", responses.col("occupation"))`
`29`		`- .withColumn(SALARY_MIDPOINT, responses.col("salary_midpoint").cast("integer"))`
	`29`	`+ .withColumn(SALARY_MIDPOINT, responses.col(SALARY_MIDPOINT).cast("integer"))`
`30`	`30`
`31`	`31`	`importsession.implicits._`
`32`	`32`	`valtypedDataset= responseWithRenamedColumns.as[Response]`
`@@ -44,16 +44,16 @@ object TypedDataset {`
`44`	`44`	`typedDataset.groupBy(typedDataset.col("occupation")).count().show()`
`45`	`45`
`46`	`46`	`System.out.println("=== Print responses with average mid age less than 20 ===")`
`47`		`- typedDataset.filter(response=> response.ageMidPoint.isDefined&& response.ageMidPoint.get<20).show()`
	`47`	`+ typedDataset.filter(response=> response.age_midpoint.isDefined&& response.age_midpoint.get<20).show()`
`48`	`48`
`49`	`49`	`System.out.println("=== Print the result by salary middle point in descending order ===")`
`50`	`50`	`typedDataset.orderBy(typedDataset.col(SALARY_MIDPOINT).desc).show()`
`51`	`51`
`52`	`52`	`System.out.println("=== Group by country and aggregate by average salary middle point ===")`
`53`		`- typedDataset.filter(response=> response.salaryMidPoint.isDefined).groupBy("country").avg(SALARY_MIDPOINT).show()`
	`53`	`+ typedDataset.filter(response=> response.salary_midpoint.isDefined).groupBy("country").avg(SALARY_MIDPOINT).show()`
`54`	`54`
`55`	`55`	`System.out.println("=== Group by salary bucket ===")`
`56`		`- typedDataset.map(response=> response.salaryMidPoint.map(point=>Math.round(point/20000)*20000).orElse(None))`
	`56`	`+ typedDataset.map(response=> response.salary_midpoint.map(point=>Math.round(point/20000)*20000).orElse(None))`
`57`	`57`	`.withColumnRenamed("value",SALARY_MIDPOINT_BUCKET)`
`58`	`58`	`.groupBy(SALARY_MIDPOINT_BUCKET)`
`59`	`59`	`.count()`

0 commit comments

Comments

(0)

Movatterモバイル変換

Navigation Menu

Search code, repositories, users, issues, pull requests...

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Commitf940c1a

File tree

1 file changed

1 file changed

`‎src/main/scala/com/sparkTutorial/sparkSql/TypedDataset.scala‎`

0 commit comments