Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit8209203

Browse files
authored
fix: converting to dataframe with out of bounds timestamps (#209)
Fixes#168.This PR fixes the problem when converting query results to Pandas with `pyarrow` when data contains timestamps that would fall out of `pyarrow`'s nanoseconds precision.The fix requires `pyarrow>=1.0.0`, thus it only works on Python 3.### PR checklist- [x] Make sure to open an issue as a [bug/issue](https://github.com/googleapis/python-bigquery/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea- [x] Ensure the tests and linter pass- [x] Code coverage does not decrease (if any source code was changed)- [x] Appropriate docs were updated (if necessary)
1 parent478597a commit8209203

File tree

3 files changed

+96
-2
lines changed

3 files changed

+96
-2
lines changed

‎google/cloud/bigquery/table.py‎

Lines changed: 30 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
importfunctools
2222
importlogging
2323
importoperator
24+
importpytz
2425
importwarnings
2526

2627
importsix
@@ -1726,7 +1727,35 @@ def to_dataframe(
17261727
bqstorage_client=bqstorage_client,
17271728
create_bqstorage_client=create_bqstorage_client,
17281729
)
1729-
df=record_batch.to_pandas(date_as_object=date_as_object)
1730+
1731+
# When converting timestamp values to nanosecond precision, the result
1732+
# can be out of pyarrow bounds. To avoid the error when converting to
1733+
# Pandas, we set the timestamp_as_object parameter to True, if necessary.
1734+
#
1735+
# NOTE: Python 3+ only, as timestamp_as_object parameter is only supported
1736+
# in pyarrow>=1.0, but the latter is not compatible with Python 2.
1737+
ifsix.PY2:
1738+
extra_kwargs= {}
1739+
else:
1740+
types_to_check= {
1741+
pyarrow.timestamp("us"),
1742+
pyarrow.timestamp("us",tz=pytz.UTC),
1743+
}
1744+
1745+
forcolumninrecord_batch:
1746+
ifcolumn.typeintypes_to_check:
1747+
try:
1748+
column.cast("timestamp[ns]")
1749+
exceptpyarrow.lib.ArrowInvalid:
1750+
timestamp_as_object=True
1751+
break
1752+
else:
1753+
timestamp_as_object=False
1754+
1755+
extra_kwargs= {"timestamp_as_object":timestamp_as_object}
1756+
1757+
df=record_batch.to_pandas(date_as_object=date_as_object,**extra_kwargs)
1758+
17301759
forcolumnindtypes:
17311760
df[column]=pandas.Series(df[column],dtype=dtypes[column])
17321761
returndf

‎setup.py‎

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,9 @@
4848
"pandas": ["pandas>=0.17.1"],
4949
# Exclude PyArrow dependency from Windows Python 2.7.
5050
'pyarrow: platform_system != "Windows" or python_version >= "3.5"': [
51-
"pyarrow>=0.17.0"
51+
"pyarrow>=1.0.0, <2.0dev; python_version>='3.4'",
52+
# Pyarrow >= 0.17.0 is not compatible with Python 2 anymore.
53+
"pyarrow < 0.17.0; python_version < '3.0'",
5254
],
5355
"tqdm": ["tqdm >= 4.0.0, <5.0.0dev"],
5456
"fastparquet": [

‎tests/unit/test_table.py‎

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15+
importdatetimeasdt
1516
importitertools
1617
importlogging
1718
importtime
@@ -2271,6 +2272,68 @@ def test_to_dataframe(self):
22712272
self.assertEqual(df.name.dtype.name,"object")
22722273
self.assertEqual(df.age.dtype.name,"int64")
22732274

2275+
@pytest.mark.xfail(
2276+
six.PY2,
2277+
reason=(
2278+
"Requires pyarrow>-1.0 to work, but the latter is not compatible "
2279+
"with Python 2 anymore."
2280+
),
2281+
)
2282+
@unittest.skipIf(pandasisNone,"Requires `pandas`")
2283+
@unittest.skipIf(pyarrowisNone,"Requires `pyarrow`")
2284+
deftest_to_dataframe_timestamp_out_of_pyarrow_bounds(self):
2285+
fromgoogle.cloud.bigquery.schemaimportSchemaField
2286+
2287+
schema= [SchemaField("some_timestamp","TIMESTAMP")]
2288+
rows= [
2289+
{"f": [{"v":"81953424000.0"}]},# 4567-01-01 00:00:00 UTC
2290+
{"f": [{"v":"253402214400.0"}]},# 9999-12-31 00:00:00 UTC
2291+
]
2292+
path="/foo"
2293+
api_request=mock.Mock(return_value={"rows":rows})
2294+
row_iterator=self._make_one(_mock_client(),api_request,path,schema)
2295+
2296+
df=row_iterator.to_dataframe(create_bqstorage_client=False)
2297+
2298+
self.assertIsInstance(df,pandas.DataFrame)
2299+
self.assertEqual(len(df),2)# verify the number of rows
2300+
self.assertEqual(list(df.columns), ["some_timestamp"])
2301+
self.assertEqual(
2302+
list(df["some_timestamp"]),
2303+
[dt.datetime(4567,1,1),dt.datetime(9999,12,31)],
2304+
)
2305+
2306+
@pytest.mark.xfail(
2307+
six.PY2,
2308+
reason=(
2309+
"Requires pyarrow>-1.0 to work, but the latter is not compatible "
2310+
"with Python 2 anymore."
2311+
),
2312+
)
2313+
@unittest.skipIf(pandasisNone,"Requires `pandas`")
2314+
@unittest.skipIf(pyarrowisNone,"Requires `pyarrow`")
2315+
deftest_to_dataframe_datetime_out_of_pyarrow_bounds(self):
2316+
fromgoogle.cloud.bigquery.schemaimportSchemaField
2317+
2318+
schema= [SchemaField("some_datetime","DATETIME")]
2319+
rows= [
2320+
{"f": [{"v":"4567-01-01T00:00:00"}]},
2321+
{"f": [{"v":"9999-12-31T00:00:00"}]},
2322+
]
2323+
path="/foo"
2324+
api_request=mock.Mock(return_value={"rows":rows})
2325+
row_iterator=self._make_one(_mock_client(),api_request,path,schema)
2326+
2327+
df=row_iterator.to_dataframe(create_bqstorage_client=False)
2328+
2329+
self.assertIsInstance(df,pandas.DataFrame)
2330+
self.assertEqual(len(df),2)# verify the number of rows
2331+
self.assertEqual(list(df.columns), ["some_datetime"])
2332+
self.assertEqual(
2333+
list(df["some_datetime"]),
2334+
[dt.datetime(4567,1,1),dt.datetime(9999,12,31)],
2335+
)
2336+
22742337
@unittest.skipIf(pandasisNone,"Requires `pandas`")
22752338
deftest_to_dataframe_warning_wo_pyarrow(self):
22762339
fromgoogle.cloud.bigquery.clientimportPyarrowMissingWarning

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp