Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commitef363e4

Browse files
committed
feat: add date, datetime, time, timestamp dtype to to_dataframe
1 parent5838fd3 commitef363e4

File tree

5 files changed

+309
-36
lines changed

5 files changed

+309
-36
lines changed

‎google/cloud/bigquery/_pandas_helpers.py‎

Lines changed: 23 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -290,6 +290,10 @@ def default_types_mapper(
290290
int_dtype:Union[Any,None]=None,
291291
float_dtype:Union[Any,None]=None,
292292
string_dtype:Union[Any,None]=None,
293+
date_dtype:Union[Any,None]=None,
294+
datetime_dtype:Union[Any,None]=None,
295+
time_dtype:Union[Any,None]=None,
296+
timestamp_dtype:Union[Any,None]=None,
293297
):
294298
"""Create a mapping from pyarrow types to pandas types.
295299
@@ -321,13 +325,28 @@ def types_mapper(arrow_data_type):
321325
elif (
322326
# If date_as_object is True, we know some DATE columns are
323327
# out-of-bounds of what is supported by pandas.
324-
notdate_as_object
328+
date_dtypeisnotNone
329+
andnotdate_as_object
325330
andpyarrow.types.is_date(arrow_data_type)
326331
):
327-
returndb_dtypes.DateDtype()
332+
returndate_dtype
328333

329-
elifpyarrow.types.is_time(arrow_data_type):
330-
returndb_dtypes.TimeDtype()
334+
elif (
335+
datetime_dtypeisnotNone
336+
andpyarrow.types.is_timestamp(arrow_data_type)
337+
andarrow_data_type.tzisNone
338+
):
339+
returndatetime_dtype
340+
341+
elif (
342+
timestamp_dtypeisnotNone
343+
andpyarrow.types.is_timestamp(arrow_data_type)
344+
andarrow_data_type.tzisnotNone
345+
):
346+
returntimestamp_dtype
347+
348+
eliftime_dtypeisnotNoneandpyarrow.types.is_time(arrow_data_type):
349+
returntime_dtype
331350

332351
returntypes_mapper
333352

‎google/cloud/bigquery/enums.py‎

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,12 @@ class DefaultPandasDTypes(enum.Enum):
9090
INT_DTYPE=object()
9191
"""Specifies default integer dtype"""
9292

93+
DATE_DTYPE=object()
94+
"""Specifies default date dtype"""
95+
96+
TIME_DTYPE=object()
97+
"""Specifies default time dtype"""
98+
9399

94100
classDestinationFormat(object):
95101
"""The exported file format. The default value is :attr:`CSV`.

‎google/cloud/bigquery/job/query.py‎

Lines changed: 60 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,11 @@
5858
exceptImportError:# pragma: NO COVER
5959
pandas=None
6060

61+
try:
62+
importdb_dtypes# type: ignore
63+
exceptImportError:# pragma: NO COVER
64+
db_dtypes=None
65+
6166
iftyping.TYPE_CHECKING:# pragma: NO COVER
6267
# Assumption: type checks are only used by library developers and CI environments
6368
# that have all optional dependencies installed, thus no conditional imports.
@@ -1629,6 +1634,10 @@ def to_dataframe(
16291634
int_dtype:Union[Any,None]=DefaultPandasDTypes.INT_DTYPE,
16301635
float_dtype:Union[Any,None]=None,
16311636
string_dtype:Union[Any,None]=None,
1637+
date_dtype:Union[Any,None]=DefaultPandasDTypes.DATE_DTYPE,
1638+
datetime_dtype:Union[Any,None]=None,
1639+
time_dtype:Union[Any,None]=DefaultPandasDTypes.TIME_DTYPE,
1640+
timestamp_dtype:Union[Any,None]=None,
16321641
)->"pandas.DataFrame":
16331642
"""Return a pandas DataFrame from a QueryJob
16341643
@@ -1689,7 +1698,7 @@ def to_dataframe(
16891698
type can be found at:
16901699
https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#boolean_type
16911700
1692-
.. versionadded:: 3.7.1
1701+
.. versionadded:: 3.8.0
16931702
16941703
int_dtype (Optional[pandas.Series.dtype, None]):
16951704
If set, indicate a pandas ExtensionDtype (e.g. ``pandas.Int64Dtype()``)
@@ -1699,7 +1708,7 @@ def to_dataframe(
16991708
Integer types can be found at:
17001709
https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#integer_types
17011710
1702-
.. versionadded:: 3.7.1
1711+
.. versionadded:: 3.8.0
17031712
17041713
float_dtype (Optional[pandas.Series.dtype, None]):
17051714
If set, indicate a pandas ExtensionDtype (e.g. ``pandas.Float32Dtype()``)
@@ -1709,7 +1718,7 @@ def to_dataframe(
17091718
type can be found at:
17101719
https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#floating_point_types
17111720
1712-
.. versionadded:: 3.7.1
1721+
.. versionadded:: 3.8.0
17131722
17141723
string_dtype (Optional[pandas.Series.dtype, None]):
17151724
If set, indicate a pandas ExtensionDtype (e.g. ``pandas.StringDtype()``) to
@@ -1719,7 +1728,50 @@ def to_dataframe(
17191728
type can be found at:
17201729
https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#string_type
17211730
1722-
.. versionadded:: 3.7.1
1731+
.. versionadded:: 3.8.0
1732+
1733+
date_dtype (Optional[pandas.Series.dtype, None]):
1734+
If set, indicate a pandas ExtensionDtype (e.g.
1735+
``pandas.ArrowDtype(pyarrow.date32())``) to convert BigQuery Date
1736+
type, instead of relying on the default ``db_dtypes.DateDtype()``.
1737+
If you explicitly set the value to ``None``, then the data type will be
1738+
``numpy.dtype("datetime64[ns]")`` or ``object`` if out of bound. BigQuery
1739+
Date type can be found at:
1740+
https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#date_type
1741+
1742+
.. versionadded:: 3.10.0
1743+
1744+
datetime_dtype (Optional[pandas.Series.dtype, None]):
1745+
If set, indicate a pandas ExtensionDtype (e.g.
1746+
``pandas.ArrowDtype(pyarrow.timestamp("us"))``) to convert BigQuery Datetime
1747+
type, instead of relying on the default ``numpy.dtype("datetime64[ns]``.
1748+
If you explicitly set the value to ``None``, then the data type will be
1749+
``numpy.dtype("datetime64[ns]")`` or ``object`` if out of bound. BigQuery
1750+
Datetime type can be found at:
1751+
https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#datetime_type
1752+
1753+
.. versionadded:: 3.10.0
1754+
1755+
time_dtype (Optional[pandas.Series.dtype, None]):
1756+
If set, indicate a pandas ExtensionDtype (e.g.
1757+
``pandas.ArrowDtype(pyarrow.time64("us"))``) to convert BigQuery Time
1758+
type, instead of relying on the default ``db_dtypes.TimeDtype()``.
1759+
If you explicitly set the value to ``None``, then the data type will be
1760+
``numpy.dtype("object")``. BigQuery Time type can be found at:
1761+
https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#time_type
1762+
1763+
.. versionadded:: 3.10.0
1764+
1765+
timestamp_dtype (Optional[pandas.Series.dtype, None]):
1766+
If set, indicate a pandas ExtensionDtype (e.g.
1767+
``pandas.ArrowDtype(pyarrow.timestamp("us", tz="UTC"))``) to convert BigQuery Timestamp
1768+
type, instead of relying on the default ``numpy.dtype("datetime64[ns, UTC]")``.
1769+
If you explicitly set the value to ``None``, then the data type will be
1770+
``numpy.dtype("datetime64[ns, UTC]")`` or ``object`` if out of bound. BigQuery
1771+
Datetime type can be found at:
1772+
https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#timestamp_type
1773+
1774+
.. versionadded:: 3.10.0
17231775
17241776
Returns:
17251777
pandas.DataFrame:
@@ -1747,6 +1799,10 @@ def to_dataframe(
17471799
int_dtype=int_dtype,
17481800
float_dtype=float_dtype,
17491801
string_dtype=string_dtype,
1802+
date_dtype=date_dtype,
1803+
datetime_dtype=datetime_dtype,
1804+
time_dtype=time_dtype,
1805+
timestamp_dtype=timestamp_dtype,
17501806
)
17511807

17521808
# If changing the signature of this method, make sure to apply the same

‎google/cloud/bigquery/table.py‎

Lines changed: 106 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1935,6 +1935,10 @@ def to_dataframe(
19351935
int_dtype:Union[Any,None]=DefaultPandasDTypes.INT_DTYPE,
19361936
float_dtype:Union[Any,None]=None,
19371937
string_dtype:Union[Any,None]=None,
1938+
date_dtype:Union[Any,None]=DefaultPandasDTypes.DATE_DTYPE,
1939+
datetime_dtype:Union[Any,None]=None,
1940+
time_dtype:Union[Any,None]=DefaultPandasDTypes.TIME_DTYPE,
1941+
timestamp_dtype:Union[Any,None]=None,
19381942
)->"pandas.DataFrame":
19391943
"""Create a pandas DataFrame by loading all pages of a query.
19401944
@@ -1999,7 +2003,7 @@ def to_dataframe(
19992003
type can be found at:
20002004
https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#boolean_type
20012005
2002-
.. versionadded:: 3.7.1
2006+
.. versionadded:: 3.8.0
20032007
20042008
int_dtype (Optional[pandas.Series.dtype, None]):
20052009
If set, indicate a pandas ExtensionDtype (e.g. ``pandas.Int64Dtype()``)
@@ -2009,7 +2013,7 @@ def to_dataframe(
20092013
Integer types can be found at:
20102014
https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#integer_types
20112015
2012-
.. versionadded:: 3.7.1
2016+
.. versionadded:: 3.8.0
20132017
20142018
float_dtype (Optional[pandas.Series.dtype, None]):
20152019
If set, indicate a pandas ExtensionDtype (e.g. ``pandas.Float32Dtype()``)
@@ -2019,7 +2023,7 @@ def to_dataframe(
20192023
type can be found at:
20202024
https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#floating_point_types
20212025
2022-
.. versionadded:: 3.7.1
2026+
.. versionadded:: 3.8.0
20232027
20242028
string_dtype (Optional[pandas.Series.dtype, None]):
20252029
If set, indicate a pandas ExtensionDtype (e.g. ``pandas.StringDtype()``) to
@@ -2029,7 +2033,50 @@ def to_dataframe(
20292033
type can be found at:
20302034
https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#string_type
20312035
2032-
.. versionadded:: 3.7.1
2036+
.. versionadded:: 3.8.0
2037+
2038+
date_dtype (Optional[pandas.Series.dtype, None]):
2039+
If set, indicate a pandas ExtensionDtype (e.g.
2040+
``pandas.ArrowDtype(pyarrow.date32())``) to convert BigQuery Date
2041+
type, instead of relying on the default ``db_dtypes.DateDtype()``.
2042+
If you explicitly set the value to ``None``, then the data type will be
2043+
``numpy.dtype("datetime64[ns]")`` or ``object`` if out of bound. BigQuery
2044+
Date type can be found at:
2045+
https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#date_type
2046+
2047+
.. versionadded:: 3.10.0
2048+
2049+
datetime_dtype (Optional[pandas.Series.dtype, None]):
2050+
If set, indicate a pandas ExtensionDtype (e.g.
2051+
``pandas.ArrowDtype(pyarrow.timestamp("us"))``) to convert BigQuery Datetime
2052+
type, instead of relying on the default ``numpy.dtype("datetime64[ns]``.
2053+
If you explicitly set the value to ``None``, then the data type will be
2054+
``numpy.dtype("datetime64[ns]")`` or ``object`` if out of bound. BigQuery
2055+
Datetime type can be found at:
2056+
https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#datetime_type
2057+
2058+
.. versionadded:: 3.10.0
2059+
2060+
time_dtype (Optional[pandas.Series.dtype, None]):
2061+
If set, indicate a pandas ExtensionDtype (e.g.
2062+
``pandas.ArrowDtype(pyarrow.time64("us"))``) to convert BigQuery Time
2063+
type, instead of relying on the default ``db_dtypes.TimeDtype()``.
2064+
If you explicitly set the value to ``None``, then the data type will be
2065+
``numpy.dtype("object")``. BigQuery Time type can be found at:
2066+
https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#time_type
2067+
2068+
.. versionadded:: 3.10.0
2069+
2070+
timestamp_dtype (Optional[pandas.Series.dtype, None]):
2071+
If set, indicate a pandas ExtensionDtype (e.g.
2072+
``pandas.ArrowDtype(pyarrow.timestamp("us", tz="UTC"))``) to convert BigQuery Timestamp
2073+
type, instead of relying on the default ``numpy.dtype("datetime64[ns, UTC]")``.
2074+
If you explicitly set the value to ``None``, then the data type will be
2075+
``numpy.dtype("datetime64[ns, UTC]")`` or ``object`` if out of bound. BigQuery
2076+
Datetime type can be found at:
2077+
https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#timestamp_type
2078+
2079+
.. versionadded:: 3.10.0
20332080
20342081
Returns:
20352082
pandas.DataFrame:
@@ -2059,6 +2106,9 @@ def to_dataframe(
20592106
ifint_dtypeisDefaultPandasDTypes.INT_DTYPE:
20602107
int_dtype=pandas.Int64Dtype()
20612108

2109+
iftime_dtypeisDefaultPandasDTypes.TIME_DTYPE:
2110+
time_dtype=db_dtypes.TimeDtype()
2111+
20622112
ifbool_dtypeisnotNoneandnothasattr(bool_dtype,"__from_arrow__"):
20632113
raiseValueError("bool_dtype",_NO_SUPPORTED_DTYPE)
20642114

@@ -2071,6 +2121,24 @@ def to_dataframe(
20712121
ifstring_dtypeisnotNoneandnothasattr(string_dtype,"__from_arrow__"):
20722122
raiseValueError("string_dtype",_NO_SUPPORTED_DTYPE)
20732123

2124+
if (
2125+
date_dtypeisnotNone
2126+
anddate_dtypeisnotDefaultPandasDTypes.DATE_DTYPE
2127+
andnothasattr(date_dtype,"__from_arrow__")
2128+
):
2129+
raiseValueError("date_dtype",_NO_SUPPORTED_DTYPE)
2130+
2131+
ifdatetime_dtypeisnotNoneandnothasattr(datetime_dtype,"__from_arrow__"):
2132+
raiseValueError("datetime_dtype",_NO_SUPPORTED_DTYPE)
2133+
2134+
iftime_dtypeisnotNoneandnothasattr(time_dtype,"__from_arrow__"):
2135+
raiseValueError("time_dtype",_NO_SUPPORTED_DTYPE)
2136+
2137+
iftimestamp_dtypeisnotNoneandnothasattr(
2138+
timestamp_dtype,"__from_arrow__"
2139+
):
2140+
raiseValueError("timestamp_dtype",_NO_SUPPORTED_DTYPE)
2141+
20742142
ifdtypesisNone:
20752143
dtypes= {}
20762144

@@ -2086,25 +2154,29 @@ def to_dataframe(
20862154
create_bqstorage_client=create_bqstorage_client,
20872155
)
20882156

2089-
# When converting date or timestamp values to nanosecond precision, the result
2090-
# can be out of pyarrow bounds. To avoid the error when converting to
2091-
# Pandas, we set the date_as_object or timestamp_as_object parameter to True,
2092-
# if necessary.
2093-
date_as_object=notall(
2094-
self.__can_cast_timestamp_ns(col)
2095-
forcolinrecord_batch
2096-
# Type can be date32 or date64 (plus units).
2097-
# See: https://arrow.apache.org/docs/python/api/datatypes.html
2098-
ifpyarrow.types.is_date(col.type)
2099-
)
2157+
# Default date dtype is `db_dtypes.DateDtype()` that could cause out of bounds error,
2158+
# when pyarrow converts date values to nanosecond precision. To avoid the error, we
2159+
# set the date_as_object parameter to True, if necessary.
2160+
date_as_object=False
2161+
ifdate_dtypeisDefaultPandasDTypes.DATE_DTYPE:
2162+
date_dtype=db_dtypes.DateDtype()
2163+
date_as_object=notall(
2164+
self.__can_cast_timestamp_ns(col)
2165+
forcolinrecord_batch
2166+
# Type can be date32 or date64 (plus units).
2167+
# See: https://arrow.apache.org/docs/python/api/datatypes.html
2168+
ifpyarrow.types.is_date(col.type)
2169+
)
21002170

2101-
timestamp_as_object=notall(
2102-
self.__can_cast_timestamp_ns(col)
2103-
forcolinrecord_batch
2104-
# Type can be datetime and timestamp (plus units and time zone).
2105-
# See: https://arrow.apache.org/docs/python/api/datatypes.html
2106-
ifpyarrow.types.is_timestamp(col.type)
2107-
)
2171+
timestamp_as_object=False
2172+
ifdatetime_dtypeisNoneandtimestamp_dtypeisNone:
2173+
timestamp_as_object=notall(
2174+
self.__can_cast_timestamp_ns(col)
2175+
forcolinrecord_batch
2176+
# Type can be datetime and timestamp (plus units and time zone).
2177+
# See: https://arrow.apache.org/docs/python/api/datatypes.html
2178+
ifpyarrow.types.is_timestamp(col.type)
2179+
)
21082180

21092181
iflen(record_batch)>0:
21102182
df=record_batch.to_pandas(
@@ -2117,6 +2189,10 @@ def to_dataframe(
21172189
int_dtype=int_dtype,
21182190
float_dtype=float_dtype,
21192191
string_dtype=string_dtype,
2192+
date_dtype=date_dtype,
2193+
datetime_dtype=datetime_dtype,
2194+
time_dtype=time_dtype,
2195+
timestamp_dtype=timestamp_dtype,
21202196
),
21212197
)
21222198
else:
@@ -2317,6 +2393,10 @@ def to_dataframe(
23172393
int_dtype=None,
23182394
float_dtype=None,
23192395
string_dtype=None,
2396+
date_dtype=None,
2397+
datetime_dtype=None,
2398+
time_dtype=None,
2399+
timestamp_dtype=None,
23202400
)->"pandas.DataFrame":
23212401
"""Create an empty dataframe.
23222402
@@ -2330,6 +2410,10 @@ def to_dataframe(
23302410
int_dtype (Any): Ignored. Added for compatibility with RowIterator.
23312411
float_dtype (Any): Ignored. Added for compatibility with RowIterator.
23322412
string_dtype (Any): Ignored. Added for compatibility with RowIterator.
2413+
date_dtype (Any): Ignored. Added for compatibility with RowIterator.
2414+
datetime_dtype (Any): Ignored. Added for compatibility with RowIterator.
2415+
time_dtype (Any): Ignored. Added for compatibility with RowIterator.
2416+
timestamp_dtype (Any): Ignored. Added for compatibility with RowIterator.
23332417
23342418
Returns:
23352419
pandas.DataFrame: An empty :class:`~pandas.DataFrame`.

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp