Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit10ec52f

Browse files
authored
feat: add bigframes.bigquery.st_regionstats to join raster data from Earth Engine (#2228)
* feat: add bigframes.bigquery.st_regionstats to join raster data from Earth Engine* upgrade sqlglot* address samples lint error* avoid sqlglot rtrim/ltrim bug
1 parent5663d2a commit10ec52f

File tree

16 files changed

+399
-7
lines changed

16 files changed

+399
-7
lines changed

‎bigframes/bigquery/__init__.py‎

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@
4040
st_intersection,
4141
st_isclosed,
4242
st_length,
43+
st_regionstats,
4344
st_simplify,
4445
)
4546
frombigframes.bigquery._operations.jsonimport (
@@ -81,6 +82,7 @@
8182
st_intersection,
8283
st_isclosed,
8384
st_length,
85+
st_regionstats,
8486
st_simplify,
8587
# json ops
8688
json_extract,

‎bigframes/bigquery/_operations/geo.py‎

Lines changed: 62 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,11 +14,13 @@
1414

1515
from __future__importannotations
1616

17-
fromtypingimportUnion
17+
importjson
18+
fromtypingimportMapping,Optional,Union
1819

1920
importshapely# type: ignore
2021

2122
frombigframesimportoperationsasops
23+
importbigframes.dataframe
2224
importbigframes.geopandas
2325
importbigframes.series
2426

@@ -677,6 +679,65 @@ def st_length(
677679
returnseries
678680

679681

682+
defst_regionstats(
683+
geography:Union[bigframes.series.Series,bigframes.geopandas.GeoSeries],
684+
raster_id:str,
685+
band:Optional[str]=None,
686+
include:Optional[str]=None,
687+
options:Optional[Mapping[str,Union[str,int,float]]]=None,
688+
)->bigframes.series.Series:
689+
"""Returns statistics summarizing the pixel values of the raster image
690+
referenced by raster_id that intersect with geography.
691+
692+
The statistics include the count, minimum, maximum, sum, standard
693+
deviation, mean, and area of the valid pixels of the raster band named
694+
band_name. Google Earth Engine computes the results of the function call.
695+
696+
See: https://cloud.google.com/bigquery/docs/reference/standard-sql/geography_functions#st_regionstats
697+
698+
Args:
699+
geography (bigframes.series.Series | bigframes.geopandas.GeoSeries):
700+
A series of geography objects to intersect with the raster image.
701+
raster_id (str):
702+
A string that identifies a raster image. The following formats are
703+
supported. A URI from an image table provided by Google Earth Engine
704+
in BigQuery sharing (formerly Analytics Hub). A URI for a readable
705+
GeoTIFF raster file. A Google Earth Engine asset path that
706+
references public catalog data or project-owned assets with read
707+
access.
708+
band (Optional[str]):
709+
A string in one of the following formats:
710+
A single band within the raster image specified by raster_id. A
711+
formula to compute a value from the available bands in the raster
712+
image. The formula uses the Google Earth Engine image expression
713+
syntax. Bands can be referenced by their name, band_name, in
714+
expressions. If you don't specify a band, the first band of the
715+
image is used.
716+
include (Optional[str]):
717+
An optional string formula that uses the Google Earth Engine image
718+
expression syntax to compute a pixel weight. The formula should
719+
return values from 0 to 1. Values outside this range are set to the
720+
nearest limit, either 0 or 1. A value of 0 means that the pixel is
721+
invalid and it's excluded from analysis. A positive value means that
722+
a pixel is valid. Values between 0 and 1 represent proportional
723+
weights for calculations, such as weighted means.
724+
options (Mapping[str, Union[str, int, float]], optional):
725+
A dictionary of options to pass to the function. See the BigQuery
726+
documentation for a list of available options.
727+
728+
Returns:
729+
bigframes.pandas.Series:
730+
A STRUCT Series containing the computed statistics.
731+
"""
732+
op=ops.GeoStRegionStatsOp(
733+
raster_id=raster_id,
734+
band=band,
735+
include=include,
736+
options=json.dumps(options)ifoptionselseNone,
737+
)
738+
returngeography._apply_unary_op(op)
739+
740+
680741
defst_simplify(
681742
geography:"bigframes.series.Series",
682743
tolerance_meters:float,

‎bigframes/core/compile/ibis_compiler/operations/geo_ops.py‎

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,10 @@
1616

1717
fromtypingimportcast
1818

19+
frombigframes_vendoredimportibis
1920
frombigframes_vendored.ibis.exprimporttypesasibis_types
2021
importbigframes_vendored.ibis.expr.datatypesasibis_dtypes
22+
importbigframes_vendored.ibis.expr.operations.geospatialasibis_geo
2123
importbigframes_vendored.ibis.expr.operations.udfasibis_udf
2224

2325
frombigframes.core.compile.ibis_compilerimportscalar_op_compiler
@@ -101,6 +103,35 @@ def geo_st_isclosed_op_impl(x: ibis_types.Value):
101103
returnst_isclosed(x)
102104

103105

106+
@register_unary_op(ops.GeoStRegionStatsOp,pass_op=True)
107+
defgeo_st_regionstats_op_impl(
108+
geography:ibis_types.Value,
109+
op:ops.GeoStRegionStatsOp,
110+
):
111+
ifop.band:
112+
band=ibis.literal(op.band,type=ibis_dtypes.string())
113+
else:
114+
band=None
115+
116+
ifop.include:
117+
include=ibis.literal(op.include,type=ibis_dtypes.string())
118+
else:
119+
include=None
120+
121+
ifop.options:
122+
options=ibis.literal(op.options,type=ibis_dtypes.json())
123+
else:
124+
options=None
125+
126+
returnibis_geo.GeoRegionStats(
127+
arg=geography,# type: ignore
128+
raster_id=ibis.literal(op.raster_id,type=ibis_dtypes.string()),# type: ignore
129+
band=band,# type: ignore
130+
include=include,# type: ignore
131+
options=options,# type: ignore
132+
).to_expr()
133+
134+
104135
@register_unary_op(ops.GeoStSimplifyOp,pass_op=True)
105136
defst_simplify_op_impl(x:ibis_types.Value,op:ops.GeoStSimplifyOp):
106137
x=cast(ibis_types.GeoSpatialValue,x)

‎bigframes/core/compile/sqlglot/expressions/geo_ops.py‎

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,32 @@ def _(expr: TypedExpr, op: ops.GeoStLengthOp) -> sge.Expression:
7474
returnsge.func("ST_LENGTH",expr.expr)
7575

7676

77+
@register_unary_op(ops.GeoStRegionStatsOp,pass_op=True)
78+
def_(
79+
geography:TypedExpr,
80+
op:ops.GeoStRegionStatsOp,
81+
):
82+
args= [geography.expr,sge.convert(op.raster_id)]
83+
ifop.band:
84+
args.append(sge.Kwarg(this="band",expression=sge.convert(op.band)))
85+
ifop.include:
86+
args.append(sge.Kwarg(this="include",expression=sge.convert(op.include)))
87+
ifop.options:
88+
args.append(
89+
sge.Kwarg(this="options",expression=sge.JSON(this=sge.convert(op.options)))
90+
)
91+
returnsge.func("ST_REGIONSTATS",*args)
92+
93+
94+
@register_unary_op(ops.GeoStSimplifyOp,pass_op=True)
95+
def_(expr:TypedExpr,op:ops.GeoStSimplifyOp)->sge.Expression:
96+
returnsge.func(
97+
"ST_SIMPLIFY",
98+
expr.expr,
99+
sge.convert(op.tolerance_meters),
100+
)
101+
102+
77103
@register_unary_op(ops.geo_x_op)
78104
def_(expr:TypedExpr)->sge.Expression:
79105
returnsge.func("SAFE.ST_X",expr.expr)

‎bigframes/operations/__init__.py‎

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,7 @@
121121
GeoStBufferOp,
122122
GeoStDistanceOp,
123123
GeoStLengthOp,
124+
GeoStRegionStatsOp,
124125
GeoStSimplifyOp,
125126
)
126127
frombigframes.operations.json_opsimport (
@@ -415,12 +416,13 @@
415416
"geo_st_geogpoint_op",
416417
"geo_st_intersection_op",
417418
"geo_st_isclosed_op",
418-
"GeoStBufferOp",
419-
"GeoStLengthOp",
420-
"GeoStSimplifyOp",
421419
"geo_x_op",
422420
"geo_y_op",
421+
"GeoStBufferOp",
423422
"GeoStDistanceOp",
423+
"GeoStLengthOp",
424+
"GeoStRegionStatsOp",
425+
"GeoStSimplifyOp",
424426
# AI ops
425427
"AIClassify",
426428
"AIGenerate",

‎bigframes/operations/geo_ops.py‎

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
# limitations under the License.
1414

1515
importdataclasses
16+
fromtypingimportOptional
1617

1718
frombigframesimportdtypes
1819
frombigframes.operationsimportbase_ops
@@ -135,6 +136,29 @@ def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionT
135136
returndtypes.FLOAT_DTYPE
136137

137138

139+
@dataclasses.dataclass(frozen=True)
140+
classGeoStRegionStatsOp(base_ops.UnaryOp):
141+
"""See: https://cloud.google.com/bigquery/docs/reference/standard-sql/geography_functions#st_regionstats"""
142+
143+
name="geo_st_regionstats"
144+
raster_id:str
145+
band:Optional[str]
146+
include:Optional[str]
147+
options:Optional[str]
148+
149+
defoutput_type(self,*input_types:dtypes.ExpressionType)->dtypes.ExpressionType:
150+
returndtypes.struct_type(
151+
[
152+
("min",dtypes.FLOAT_DTYPE),
153+
("max",dtypes.FLOAT_DTYPE),
154+
("sum",dtypes.FLOAT_DTYPE),
155+
("count",dtypes.INT_DTYPE),
156+
("mean",dtypes.FLOAT_DTYPE),
157+
("area",dtypes.FLOAT_DTYPE),
158+
]
159+
)
160+
161+
138162
@dataclasses.dataclass(frozen=True)
139163
classGeoStSimplifyOp(base_ops.UnaryOp):
140164
name="st_simplify"
Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
# Copyright 2025 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
"""Code sample for https://docs.cloud.google.com/bigquery/docs/raster-data#analytics-hub-source"""
16+
17+
18+
deftest_st_regionstats()->None:
19+
project_id="bigframes-dev"
20+
21+
# [START bigquery_dataframes_st_regionstats]
22+
importdatetime
23+
fromtypingimportcast
24+
25+
importbigframes.bigqueryasbbq
26+
importbigframes.pandasasbpd
27+
28+
# TODO: Set the project_id to your Google Cloud project ID.
29+
# project_id = "your-project-id"
30+
bpd.options.bigquery.project=project_id
31+
32+
# TODO: Set the dataset_id to the ID of the dataset that contains the
33+
# `climate` table. This is likely a linked dataset to Earth Engine.
34+
# See: https://cloud.google.com/bigquery/docs/link-earth-engine
35+
linked_dataset="era5_land_daily_aggregated"
36+
37+
# For the best efficiency, use partial ordering mode.
38+
bpd.options.bigquery.ordering_mode="partial"
39+
40+
# Load the table of country boundaries.
41+
countries=bpd.read_gbq("bigquery-public-data.overture_maps.division_area")
42+
43+
# Filter to just the countries.
44+
countries=countries[countries["subtype"]=="country"].copy()
45+
countries["name"]=countries["names"].struct.field("primary")
46+
countries["simplified_geometry"]=bbq.st_simplify(
47+
countries["geometry"],
48+
tolerance_meters=10_000,
49+
)
50+
51+
# Get the reference to the temperature data from a linked dataset.
52+
# Note: This sample assumes you have a linked dataset to Earth Engine.
53+
image_href= (
54+
bpd.read_gbq(f"{project_id}.{linked_dataset}.climate")
55+
.set_index("start_datetime")
56+
.loc[[datetime.datetime(2025,1,1,tzinfo=datetime.timezone.utc)], :]
57+
)
58+
raster_id=image_href["assets"].struct.field("image").struct.field("href")
59+
raster_id=raster_id.item()
60+
stats=bbq.st_regionstats(
61+
countries["simplified_geometry"],
62+
raster_id=cast(str,raster_id),
63+
band="temperature_2m",
64+
)
65+
66+
# Extract the mean and convert from Kelvin to Celsius.
67+
countries["mean_temperature"]=stats.struct.field("mean")-273.15
68+
69+
# Sort by the mean temperature to find the warmest countries.
70+
result=countries[["name","mean_temperature"]].sort_values(
71+
"mean_temperature",ascending=False
72+
)
73+
print(result.head(10))
74+
# [END bigquery_dataframes_st_regionstats]
75+
76+
assertlen(result)>0
77+
78+
79+
if__name__=="__main__":
80+
test_st_regionstats()

‎setup.py‎

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,8 @@
5454
"pydata-google-auth >=1.8.2",
5555
"requests >=2.27.1",
5656
"shapely >=1.8.5",
57-
"sqlglot >=23.6.3",
57+
# 25.20.0 introduces this fix https://github.com/TobikoData/sqlmesh/issues/3095 for rtrim/ltrim.
58+
"sqlglot >=25.20.0",
5859
"tabulate >=0.9",
5960
"ipywidgets >=7.7.1",
6061
"humanize >=4.6.0",

‎testing/constraints-3.9.txt‎

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ pydata-google-auth==1.8.2
2121
requests==2.27.1
2222
scikit-learn==1.2.2
2323
shapely==1.8.5
24-
sqlglot==23.6.3
24+
sqlglot==25.20.0
2525
tabulate==0.9
2626
ipywidgets==7.7.1
2727
humanize==4.6.0
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
WITH`bfcte_0`AS (
2+
SELECT
3+
*
4+
FROM UNNEST(ARRAY<STRUCT<`bfcol_0` STRING,`bfcol_1` INT64>>[STRUCT('POINT(1 1)',0)])
5+
),`bfcte_1`AS (
6+
SELECT
7+
*,
8+
ST_REGIONSTATS(
9+
`bfcol_0`,
10+
'ee://some/raster/uri',
11+
band=>'band1',
12+
include=>'some equation',
13+
options=> JSON'{"scale": 100}'
14+
)AS`bfcol_2`
15+
FROM`bfcte_0`
16+
),`bfcte_2`AS (
17+
SELECT
18+
*,
19+
`bfcol_2`.`min`AS`bfcol_5`,
20+
`bfcol_2`.`max`AS`bfcol_6`,
21+
`bfcol_2`.`sum`AS`bfcol_7`,
22+
`bfcol_2`.`count`AS`bfcol_8`,
23+
`bfcol_2`.`mean`AS`bfcol_9`,
24+
`bfcol_2`.`area`AS`bfcol_10`
25+
FROM`bfcte_1`
26+
)
27+
SELECT
28+
`bfcol_5`AS`min`,
29+
`bfcol_6`AS`max`,
30+
`bfcol_7`AS`sum`,
31+
`bfcol_8`AS`count`,
32+
`bfcol_9`AS`mean`,
33+
`bfcol_10`AS`area`
34+
FROM`bfcte_2`
35+
ORDER BY
36+
`bfcol_1`ASC NULLS LAST

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp