Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit5251b5d

Browse files
authored
feat: support RANGE in queries Part 2: Arrow (#1868)
* feat: support range in queries as dict* fix sys tests* lint* add arrow support* fix python 3.7 test error* print dependencies in sys test* add unit test and docs* fix unit test* add func docs* add sys test for tabledata.list in arrow* add sys test for tabledata.list as iterator* lint* fix docs error* fix docstring* fix docstring* fix docstring* docs* docs* docs* move dtypes mapping code* address comment* address comment* fix pytest error* Revert "move dtypes mapping code"This reverts commitc46c65c.* remove commented out assertions* typo and formats* add None-check for range_element_type and add unit tests* change test skip condition* fix test error* change test skip condition* change test skip condition* change decorator order* use a different way to construct test data* fix error message and add warning number check* add warning number check and comments
1 parentbd0814c commit5251b5d

File tree

15 files changed

+516
-25
lines changed

15 files changed

+516
-25
lines changed

‎google/cloud/bigquery/_helpers.py‎

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,8 @@
6666
_UNIVERSE_DOMAIN_ENV="GOOGLE_CLOUD_UNIVERSE_DOMAIN"
6767
"""Environment variable for setting universe domain."""
6868

69+
_SUPPORTED_RANGE_ELEMENTS= {"TIMESTAMP","DATETIME","DATE"}
70+
6971

7072
def_get_client_universe(
7173
client_options:Optional[Union[client_options_lib.ClientOptions,dict]]
@@ -310,17 +312,13 @@ def _json_from_json(value, field):
310312

311313

312314
def_range_element_from_json(value,field):
313-
"""Coerce 'value' to a range element value, if set or not nullable."""
315+
"""Coerce 'value' to a range element value."""
314316
ifvalue=="UNBOUNDED":
315317
returnNone
316-
eliffield.element_type=="DATE":
317-
return_date_from_json(value,None)
318-
eliffield.element_type=="DATETIME":
319-
return_datetime_from_json(value,None)
320-
eliffield.element_type=="TIMESTAMP":
321-
return_timestamp_from_json(value,None)
318+
iffield.element_typein_SUPPORTED_RANGE_ELEMENTS:
319+
return_CELLDATA_FROM_JSON[field.element_type](value,field.element_type)
322320
else:
323-
raiseValueError(f"Unsupported rangefield type:{value}")
321+
raiseValueError(f"Unsupported rangeelement type:{field.element_type}")
324322

325323

326324
def_range_from_json(value,field):
@@ -344,7 +342,7 @@ def _range_from_json(value, field):
344342
end=_range_element_from_json(end,field.range_element_type)
345343
return {"start":start,"end":end}
346344
else:
347-
raiseValueError(f"Unknown rangeformat:{value}")
345+
raiseValueError(f"Unknownformat forrangevalue:{value}")
348346
else:
349347
returnNone
350348

‎google/cloud/bigquery/_pandas_helpers.py‎

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,17 @@ def bq_to_arrow_struct_data_type(field):
142142
returnpyarrow.struct(arrow_fields)
143143

144144

145+
defbq_to_arrow_range_data_type(field):
146+
iffieldisNone:
147+
raiseValueError(
148+
"Range element type cannot be None, must be one of "
149+
"DATE, DATETIME, or TIMESTAMP"
150+
)
151+
element_type=field.element_type.upper()
152+
arrow_element_type=_pyarrow_helpers.bq_to_arrow_scalars(element_type)()
153+
returnpyarrow.struct([("start",arrow_element_type), ("end",arrow_element_type)])
154+
155+
145156
defbq_to_arrow_data_type(field):
146157
"""Return the Arrow data type, corresponding to a given BigQuery column.
147158
@@ -160,6 +171,9 @@ def bq_to_arrow_data_type(field):
160171
iffield_type_upperinschema._STRUCT_TYPES:
161172
returnbq_to_arrow_struct_data_type(field)
162173

174+
iffield_type_upper=="RANGE":
175+
returnbq_to_arrow_range_data_type(field.range_element_type)
176+
163177
data_type_constructor=_pyarrow_helpers.bq_to_arrow_scalars(field_type_upper)
164178
ifdata_type_constructorisNone:
165179
returnNone
@@ -220,6 +234,9 @@ def default_types_mapper(
220234
datetime_dtype:Union[Any,None]=None,
221235
time_dtype:Union[Any,None]=None,
222236
timestamp_dtype:Union[Any,None]=None,
237+
range_date_dtype:Union[Any,None]=None,
238+
range_datetime_dtype:Union[Any,None]=None,
239+
range_timestamp_dtype:Union[Any,None]=None,
223240
):
224241
"""Create a mapping from pyarrow types to pandas types.
225242
@@ -274,6 +291,22 @@ def types_mapper(arrow_data_type):
274291
eliftime_dtypeisnotNoneandpyarrow.types.is_time(arrow_data_type):
275292
returntime_dtype
276293

294+
elifpyarrow.types.is_struct(arrow_data_type):
295+
ifrange_datetime_dtypeisnotNoneandarrow_data_type.equals(
296+
range_datetime_dtype.pyarrow_dtype
297+
):
298+
returnrange_datetime_dtype
299+
300+
elifrange_date_dtypeisnotNoneandarrow_data_type.equals(
301+
range_date_dtype.pyarrow_dtype
302+
):
303+
returnrange_date_dtype
304+
305+
elifrange_timestamp_dtypeisnotNoneandarrow_data_type.equals(
306+
range_timestamp_dtype.pyarrow_dtype
307+
):
308+
returnrange_timestamp_dtype
309+
277310
returntypes_mapper
278311

279312

‎google/cloud/bigquery/dbapi/_helpers.py‎

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -277,12 +277,14 @@ def complex_query_parameter(
277277
param=query.ArrayQueryParameter(
278278
name,
279279
sub_type,
280-
value
281-
ifisinstance(sub_type,query.ScalarQueryParameterType)
282-
else [
283-
complex_query_parameter(None,v,sub_type._complex__src,base)
284-
forvinvalue
285-
],
280+
(
281+
value
282+
ifisinstance(sub_type,query.ScalarQueryParameterType)
283+
else [
284+
complex_query_parameter(None,v,sub_type._complex__src,base)
285+
forvinvalue
286+
]
287+
),
286288
)
287289
eliftype_type==STRUCT:
288290
ifnotisinstance(value,collections_abc.Mapping):

‎google/cloud/bigquery/enums.py‎

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,15 @@ class DefaultPandasDTypes(enum.Enum):
9999
TIME_DTYPE=object()
100100
"""Specifies default time dtype"""
101101

102+
RANGE_DATE_DTYPE=object()
103+
"""Specifies default range date dtype"""
104+
105+
RANGE_DATETIME_DTYPE=object()
106+
"""Specifies default range datetime dtype"""
107+
108+
RANGE_TIMESTAMP_DTYPE=object()
109+
"""Specifies default range timestamp dtype"""
110+
102111

103112
classDestinationFormat(object):
104113
"""The exported file format. The default value is :attr:`CSV`.

‎google/cloud/bigquery/job/query.py‎

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1784,6 +1784,13 @@ def to_dataframe(
17841784
datetime_dtype:Union[Any,None]=None,
17851785
time_dtype:Union[Any,None]=DefaultPandasDTypes.TIME_DTYPE,
17861786
timestamp_dtype:Union[Any,None]=None,
1787+
range_date_dtype:Union[Any,None]=DefaultPandasDTypes.RANGE_DATE_DTYPE,
1788+
range_datetime_dtype:Union[
1789+
Any,None
1790+
]=DefaultPandasDTypes.RANGE_DATETIME_DTYPE,
1791+
range_timestamp_dtype:Union[
1792+
Any,None
1793+
]=DefaultPandasDTypes.RANGE_TIMESTAMP_DTYPE,
17871794
)->"pandas.DataFrame":
17881795
"""Return a pandas DataFrame from a QueryJob
17891796
@@ -1919,6 +1926,63 @@ def to_dataframe(
19191926
19201927
.. versionadded:: 3.10.0
19211928
1929+
range_date_dtype (Optional[pandas.Series.dtype, None]):
1930+
If set, indicate a pandas ExtensionDtype, such as:
1931+
1932+
.. code-block:: python
1933+
1934+
pandas.ArrowDtype(pyarrow.struct(
1935+
[("start", pyarrow.date32()), ("end", pyarrow.date32())]
1936+
))
1937+
1938+
to convert BigQuery RANGE<DATE> type, instead of relying on
1939+
the default ``object``. If you explicitly set the value to
1940+
``None``, the data type will be ``object``. BigQuery Range type
1941+
can be found at:
1942+
https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#range_type
1943+
1944+
.. versionadded:: 3.21.0
1945+
1946+
range_datetime_dtype (Optional[pandas.Series.dtype, None]):
1947+
If set, indicate a pandas ExtensionDtype, such as:
1948+
1949+
.. code-block:: python
1950+
1951+
pandas.ArrowDtype(pyarrow.struct(
1952+
[
1953+
("start", pyarrow.timestamp("us")),
1954+
("end", pyarrow.timestamp("us")),
1955+
]
1956+
))
1957+
1958+
to convert BigQuery RANGE<DATETIME> type, instead of relying on
1959+
the default ``object``. If you explicitly set the value to
1960+
``None``, the data type will be ``object``. BigQuery Range type
1961+
can be found at:
1962+
https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#range_type
1963+
1964+
.. versionadded:: 3.21.0
1965+
1966+
range_timestamp_dtype (Optional[pandas.Series.dtype, None]):
1967+
If set, indicate a pandas ExtensionDtype, such as:
1968+
1969+
.. code-block:: python
1970+
1971+
pandas.ArrowDtype(pyarrow.struct(
1972+
[
1973+
("start", pyarrow.timestamp("us", tz="UTC")),
1974+
("end", pyarrow.timestamp("us", tz="UTC")),
1975+
]
1976+
))
1977+
1978+
to convert BigQuery RANGE<TIMESTAMP> type, instead of relying
1979+
on the default ``object``. If you explicitly set the value to
1980+
``None``, the data type will be ``object``. BigQuery Range type
1981+
can be found at:
1982+
https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#range_type
1983+
1984+
.. versionadded:: 3.21.0
1985+
19221986
Returns:
19231987
pandas.DataFrame:
19241988
A :class:`~pandas.DataFrame` populated with row data
@@ -1949,6 +2013,9 @@ def to_dataframe(
19492013
datetime_dtype=datetime_dtype,
19502014
time_dtype=time_dtype,
19512015
timestamp_dtype=timestamp_dtype,
2016+
range_date_dtype=range_date_dtype,
2017+
range_datetime_dtype=range_datetime_dtype,
2018+
range_timestamp_dtype=range_timestamp_dtype,
19522019
)
19532020

19542021
# If changing the signature of this method, make sure to apply the same

‎google/cloud/bigquery/query.py‎

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -24,14 +24,13 @@
2424
fromgoogle.cloud.bigquery._helpersimport_rows_from_json
2525
fromgoogle.cloud.bigquery._helpersimport_QUERY_PARAMS_FROM_JSON
2626
fromgoogle.cloud.bigquery._helpersimport_SCALAR_VALUE_TO_JSON_PARAM
27+
fromgoogle.cloud.bigquery._helpersimport_SUPPORTED_RANGE_ELEMENTS
2728

2829

2930
_SCALAR_VALUE_TYPE=Optional[
3031
Union[str,int,float,decimal.Decimal,bool,datetime.datetime,datetime.date]
3132
]
3233

33-
_RANGE_ELEMENT_TYPE_STR= {"TIMESTAMP","DATETIME","DATE"}
34-
3534

3635
classConnectionProperty:
3736
"""A connection-level property to customize query behavior.
@@ -388,14 +387,14 @@ def _parse_range_element_type(self, type_):
388387
google.cloud.bigquery.query.ScalarQueryParameterType: Instance
389388
"""
390389
ifisinstance(type_,str):
391-
iftype_notin_RANGE_ELEMENT_TYPE_STR:
390+
iftype_notin_SUPPORTED_RANGE_ELEMENTS:
392391
raiseValueError(
393392
"If given as a string, range element type must be one of "
394393
"'TIMESTAMP', 'DATE', or 'DATETIME'."
395394
)
396395
returnScalarQueryParameterType(type_)
397396
elifisinstance(type_,ScalarQueryParameterType):
398-
iftype_._typenotin_RANGE_ELEMENT_TYPE_STR:
397+
iftype_._typenotin_SUPPORTED_RANGE_ELEMENTS:
399398
raiseValueError(
400399
"If given as a ScalarQueryParameter object, range element "
401400
"type must be one of 'TIMESTAMP', 'DATE', or 'DATETIME' "
@@ -960,14 +959,14 @@ class RangeQueryParameter(_AbstractQueryParameter):
960959
@classmethod
961960
def_parse_range_element_type(self,range_element_type):
962961
ifisinstance(range_element_type,str):
963-
ifrange_element_typenotin_RANGE_ELEMENT_TYPE_STR:
962+
ifrange_element_typenotin_SUPPORTED_RANGE_ELEMENTS:
964963
raiseValueError(
965964
"If given as a string, range_element_type must be one of "
966965
f"'TIMESTAMP', 'DATE', or 'DATETIME'. Got{range_element_type}."
967966
)
968967
returnRangeQueryParameterType(range_element_type)
969968
elifisinstance(range_element_type,RangeQueryParameterType):
970-
ifrange_element_type.type_._typenotin_RANGE_ELEMENT_TYPE_STR:
969+
ifrange_element_type.type_._typenotin_SUPPORTED_RANGE_ELEMENTS:
971970
raiseValueError(
972971
"If given as a RangeQueryParameterType object, "
973972
"range_element_type must be one of 'TIMESTAMP', 'DATE', "

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp