3434except ImportError :# pragma: NO COVER
3535pyarrow = None
3636
37+ try :
38+ import db_dtypes # type: ignore
39+ except ImportError :# pragma: NO COVER
40+ db_dtypes = None
41+
3742try :
3843import geopandas # type: ignore
3944except ImportError :
5560import google .cloud ._helpers # type: ignore
5661from google .cloud .bigquery import _helpers
5762from google .cloud .bigquery import _pandas_helpers
63+ from google .cloud .bigquery .enums import DefaultPandasDTypes
5864from google .cloud .bigquery .exceptions import LegacyBigQueryStorageError
5965from google .cloud .bigquery .schema import _build_schema_resource
6066from google .cloud .bigquery .schema import _parse_schema_resource
8894
8995_TABLE_HAS_NO_SCHEMA = 'Table has no schema: call "client.get_table()"'
9096
97+ _NO_SUPPORTED_DTYPE = (
98+ "The dtype cannot to be converted to a pandas ExtensionArray "
99+ "because the necessary `__from_arrow__` attribute is missing."
100+ )
101+
91102
92103def _reference_getter (table ):
93104"""A :class:`~google.cloud.bigquery.table.TableReference` pointing to
@@ -1920,6 +1931,10 @@ def to_dataframe(
19201931progress_bar_type :str = None ,
19211932create_bqstorage_client :bool = True ,
19221933geography_as_object :bool = False ,
1934+ bool_dtype :Union [Any ,None ]= DefaultPandasDTypes .BOOL_DTYPE ,
1935+ int_dtype :Union [Any ,None ]= DefaultPandasDTypes .INT_DTYPE ,
1936+ float_dtype :Union [Any ,None ]= None ,
1937+ string_dtype :Union [Any ,None ]= None ,
19231938 )-> "pandas.DataFrame" :
19241939"""Create a pandas DataFrame by loading all pages of a query.
19251940
@@ -1958,6 +1973,7 @@ def to_dataframe(
19581973 progress bar as a graphical dialog box.
19591974
19601975 .. versionadded:: 1.11.0
1976+
19611977 create_bqstorage_client (Optional[bool]):
19621978 If ``True`` (default), create a BigQuery Storage API client
19631979 using the default API settings. The BigQuery Storage API
@@ -1975,6 +1991,46 @@ def to_dataframe(
19751991
19761992 .. versionadded:: 2.24.0
19771993
1994+ bool_dtype (Optional[pandas.Series.dtype, None]):
1995+ If set, indicate a pandas ExtensionDtype (e.g. ``pandas.BooleanDtype()``)
1996+ to convert BigQuery Boolean type, instead of relying on the default
1997+ ``pandas.BooleanDtype()``. If you explicitly set the value to ``None``,
1998+ then the data type will be ``numpy.dtype("bool")``. BigQuery Boolean
1999+ type can be found at:
2000+ https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#boolean_type
2001+
2002+ .. versionadded:: 3.7.1
2003+
2004+ int_dtype (Optional[pandas.Series.dtype, None]):
2005+ If set, indicate a pandas ExtensionDtype (e.g. ``pandas.Int64Dtype()``)
2006+ to convert BigQuery Integer types, instead of relying on the default
2007+ ``pandas.Int64Dtype()``. If you explicitly set the value to ``None``,
2008+ then the data type will be ``numpy.dtype("int64")``. A list of BigQuery
2009+ Integer types can be found at:
2010+ https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#integer_types
2011+
2012+ .. versionadded:: 3.7.1
2013+
2014+ float_dtype (Optional[pandas.Series.dtype, None]):
2015+ If set, indicate a pandas ExtensionDtype (e.g. ``pandas.Float32Dtype()``)
2016+ to convert BigQuery Float type, instead of relying on the default
2017+ ``numpy.dtype("float64")``. If you explicitly set the value to ``None``,
2018+ then the data type will be ``numpy.dtype("float64")``. BigQuery Float
2019+ type can be found at:
2020+ https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#floating_point_types
2021+
2022+ .. versionadded:: 3.7.1
2023+
2024+ string_dtype (Optional[pandas.Series.dtype, None]):
2025+ If set, indicate a pandas ExtensionDtype (e.g. ``pandas.StringDtype()``) to
2026+ convert BigQuery String type, instead of relying on the default
2027+ ``numpy.dtype("object")``. If you explicitly set the value to ``None``,
2028+ then the data type will be ``numpy.dtype("object")``. BigQuery String
2029+ type can be found at:
2030+ https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#string_type
2031+
2032+ .. versionadded:: 3.7.1
2033+
19782034 Returns:
19792035 pandas.DataFrame:
19802036 A :class:`~pandas.DataFrame` populated with row data and column
@@ -1987,14 +2043,34 @@ def to_dataframe(
19872043 the :mod:`google.cloud.bigquery_storage_v1` module is
19882044 required but cannot be imported. Also if
19892045 `geography_as_object` is `True`, but the
1990- :mod:`shapely` library cannot be imported.
2046+ :mod:`shapely` library cannot be imported. Also if
2047+ `bool_dtype`, `int_dtype` or other dtype parameters
2048+ is not supported dtype.
19912049
19922050 """
19932051_pandas_helpers .verify_pandas_imports ()
19942052
19952053if geography_as_object and shapely is None :
19962054raise ValueError (_NO_SHAPELY_ERROR )
19972055
2056+ if bool_dtype is DefaultPandasDTypes .BOOL_DTYPE :
2057+ bool_dtype = pandas .BooleanDtype ()
2058+
2059+ if int_dtype is DefaultPandasDTypes .INT_DTYPE :
2060+ int_dtype = pandas .Int64Dtype ()
2061+
2062+ if bool_dtype is not None and not hasattr (bool_dtype ,"__from_arrow__" ):
2063+ raise ValueError ("bool_dtype" ,_NO_SUPPORTED_DTYPE )
2064+
2065+ if int_dtype is not None and not hasattr (int_dtype ,"__from_arrow__" ):
2066+ raise ValueError ("int_dtype" ,_NO_SUPPORTED_DTYPE )
2067+
2068+ if float_dtype is not None and not hasattr (float_dtype ,"__from_arrow__" ):
2069+ raise ValueError ("float_dtype" ,_NO_SUPPORTED_DTYPE )
2070+
2071+ if string_dtype is not None and not hasattr (string_dtype ,"__from_arrow__" ):
2072+ raise ValueError ("string_dtype" ,_NO_SUPPORTED_DTYPE )
2073+
19982074if dtypes is None :
19992075dtypes = {}
20002076
@@ -2019,15 +2095,15 @@ def to_dataframe(
20192095for col in record_batch
20202096# Type can be date32 or date64 (plus units).
20212097# See: https://arrow.apache.org/docs/python/api/datatypes.html
2022- if str (col .type ). startswith ( "date" )
2098+ if pyarrow . types . is_date (col .type )
20232099 )
20242100
20252101timestamp_as_object = not all (
20262102self .__can_cast_timestamp_ns (col )
20272103for col in record_batch
2028- # Type can be timestamp (plus units and time zone).
2104+ # Type can bedatetime and timestamp (plus units and time zone).
20292105# See: https://arrow.apache.org/docs/python/api/datatypes.html
2030- if str (col .type ). startswith ( "timestamp" )
2106+ if pyarrow . types . is_timestamp (col .type )
20312107 )
20322108
20332109if len (record_batch )> 0 :
@@ -2036,7 +2112,11 @@ def to_dataframe(
20362112timestamp_as_object = timestamp_as_object ,
20372113integer_object_nulls = True ,
20382114types_mapper = _pandas_helpers .default_types_mapper (
2039- date_as_object = date_as_object
2115+ date_as_object = date_as_object ,
2116+ bool_dtype = bool_dtype ,
2117+ int_dtype = int_dtype ,
2118+ float_dtype = float_dtype ,
2119+ string_dtype = string_dtype ,
20402120 ),
20412121 )
20422122else :
@@ -2233,6 +2313,10 @@ def to_dataframe(
22332313progress_bar_type = None ,
22342314create_bqstorage_client = True ,
22352315geography_as_object = False ,
2316+ bool_dtype = None ,
2317+ int_dtype = None ,
2318+ float_dtype = None ,
2319+ string_dtype = None ,
22362320 )-> "pandas.DataFrame" :
22372321"""Create an empty dataframe.
22382322
@@ -2241,6 +2325,11 @@ def to_dataframe(
22412325 dtypes (Any): Ignored. Added for compatibility with RowIterator.
22422326 progress_bar_type (Any): Ignored. Added for compatibility with RowIterator.
22432327 create_bqstorage_client (bool): Ignored. Added for compatibility with RowIterator.
2328+ geography_as_object (bool): Ignored. Added for compatibility with RowIterator.
2329+ bool_dtype (Any): Ignored. Added for compatibility with RowIterator.
2330+ int_dtype (Any): Ignored. Added for compatibility with RowIterator.
2331+ float_dtype (Any): Ignored. Added for compatibility with RowIterator.
2332+ string_dtype (Any): Ignored. Added for compatibility with RowIterator.
22442333
22452334 Returns:
22462335 pandas.DataFrame: An empty :class:`~pandas.DataFrame`.